summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-01-12 09:10:34 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-12 09:10:34 -0800
commit9f5974c8734d83d4ab7096ed98136a82f41210d6 (patch)
tree6f328555796bafefb74936ab68128aa84efd28b1
parenta2d823bf13efea4c859376f6e85c49cfbad7ab60 (diff)
parentddae9c2ea79449beb00027cf77fca6dc489f2d15 (diff)
Merge git://oss.sgi.com:8090/oss/git/xfs-2.6
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1088
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1373
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h696
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c121
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c56
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h18
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h19
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c18
-rw-r--r--fs/xfs/support/debug.c60
-rw-r--r--fs/xfs/support/debug.h25
-rw-r--r--fs/xfs/support/uuid.c23
-rw-r--r--fs/xfs/xfs_arch.h22
-rw-r--r--fs/xfs/xfs_attr_leaf.c12
-rw-r--r--fs/xfs/xfs_attr_leaf.h79
-rw-r--r--fs/xfs/xfs_bmap.c412
-rw-r--r--fs/xfs/xfs_bmap.h7
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_dfrag.c16
-rw-r--r--fs/xfs/xfs_dinode.h22
-rw-r--r--fs/xfs/xfs_dir.c2
-rw-r--r--fs/xfs/xfs_dir.h2
-rw-r--r--fs/xfs/xfs_dir2.h3
-rw-r--r--fs/xfs/xfs_dir_leaf.h64
-rw-r--r--fs/xfs/xfs_error.c1
-rw-r--r--fs/xfs/xfs_error.h8
-rw-r--r--fs/xfs/xfs_fs.h10
-rw-r--r--fs/xfs/xfs_fsops.c26
-rw-r--r--fs/xfs/xfs_fsops.h1
-rw-r--r--fs/xfs/xfs_iget.c5
-rw-r--r--fs/xfs/xfs_inode.c61
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c9
-rw-r--r--fs/xfs/xfs_iomap.c425
-rw-r--r--fs/xfs/xfs_itable.c5
-rw-r--r--fs/xfs/xfs_log.c123
-rw-r--r--fs/xfs/xfs_log.h11
-rw-r--r--fs/xfs/xfs_log_priv.h77
-rw-r--r--fs/xfs/xfs_log_recover.c12
-rw-r--r--fs/xfs/xfs_mount.c5
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_rename.c7
-rw-r--r--fs/xfs/xfs_rw.c9
-rw-r--r--fs/xfs/xfs_sb.h17
-rw-r--r--fs/xfs/xfs_trans.c14
-rw-r--r--fs/xfs/xfs_trans.h1
-rw-r--r--fs/xfs/xfs_utils.c9
-rw-r--r--fs/xfs/xfs_vfsops.c50
-rw-r--r--fs/xfs/xfs_vnodeops.c193
-rw-r--r--mm/swap.c2
58 files changed, 2719 insertions, 2572 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 94d3cdfbf9b..d1db8c17a74 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -40,11 +40,10 @@
#include "xfs_rw.h"
#include "xfs_iomap.h"
#include <linux/mpage.h>
+#include <linux/pagevec.h>
#include <linux/writeback.h>
STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
-STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
- struct writeback_control *wbc, void *, int, int);
#if defined(XFS_RW_TRACE)
void
@@ -55,17 +54,15 @@ xfs_page_trace(
int mask)
{
xfs_inode_t *ip;
- bhv_desc_t *bdp;
vnode_t *vp = LINVFS_GET_VP(inode);
loff_t isize = i_size_read(inode);
- loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ loff_t offset = page_offset(page);
int delalloc = -1, unmapped = -1, unwritten = -1;
if (page_has_buffers(page))
xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
- bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
- ip = XFS_BHVTOI(bdp);
+ ip = xfs_vtoi(vp);
if (!ip->i_rwtrace)
return;
@@ -103,15 +100,56 @@ xfs_finish_ioend(
queue_work(xfsdatad_workqueue, &ioend->io_work);
}
+/*
+ * We're now finished for good with this ioend structure.
+ * Update the page state via the associated buffer_heads,
+ * release holds on the inode and bio, and finally free
+ * up memory. Do not use the ioend after this.
+ */
STATIC void
xfs_destroy_ioend(
xfs_ioend_t *ioend)
{
+ struct buffer_head *bh, *next;
+
+ for (bh = ioend->io_buffer_head; bh; bh = next) {
+ next = bh->b_private;
+ bh->b_end_io(bh, ioend->io_uptodate);
+ }
+
vn_iowake(ioend->io_vnode);
mempool_free(ioend, xfs_ioend_pool);
}
/*
+ * Buffered IO write completion for delayed allocate extents.
+ * TODO: Update ondisk isize now that we know the file data
+ * has been flushed (i.e. the notorious "NULL file" problem).
+ */
+STATIC void
+xfs_end_bio_delalloc(
+ void *data)
+{
+ xfs_ioend_t *ioend = data;
+
+ xfs_destroy_ioend(ioend);
+}
+
+/*
+ * Buffered IO write completion for regular, written extents.
+ */
+STATIC void
+xfs_end_bio_written(
+ void *data)
+{
+ xfs_ioend_t *ioend = data;
+
+ xfs_destroy_ioend(ioend);
+}
+
+/*
+ * IO write completion for unwritten extents.
+ *
* Issue transactions to convert a buffer range from unwritten
* to written extents.
*/
@@ -123,21 +161,10 @@ xfs_end_bio_unwritten(
vnode_t *vp = ioend->io_vnode;
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
- struct buffer_head *bh, *next;
int error;
if (ioend->io_uptodate)
VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
-
- /* ioend->io_buffer_head is only non-NULL for buffered I/O */
- for (bh = ioend->io_buffer_head; bh; bh = next) {
- next = bh->b_private;
-
- bh->b_end_io = NULL;
- clear_buffer_unwritten(bh);
- end_buffer_async_write(bh, ioend->io_uptodate);
- }
-
xfs_destroy_ioend(ioend);
}
@@ -149,7 +176,8 @@ xfs_end_bio_unwritten(
*/
STATIC xfs_ioend_t *
xfs_alloc_ioend(
- struct inode *inode)
+ struct inode *inode,
+ unsigned int type)
{
xfs_ioend_t *ioend;
@@ -162,45 +190,25 @@ xfs_alloc_ioend(
*/
atomic_set(&ioend->io_remaining, 1);
ioend->io_uptodate = 1; /* cleared if any I/O fails */
+ ioend->io_list = NULL;
+ ioend->io_type = type;
ioend->io_vnode = LINVFS_GET_VP(inode);
ioend->io_buffer_head = NULL;
+ ioend->io_buffer_tail = NULL;
atomic_inc(&ioend->io_vnode->v_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
- INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+ if (type == IOMAP_UNWRITTEN)
+ INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+ else if (type == IOMAP_DELAY)
+ INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
+ else
+ INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
return ioend;
}
-void
-linvfs_unwritten_done(
- struct buffer_head *bh,
- int uptodate)
-{
- xfs_ioend_t *ioend = bh->b_private;
- static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
- unsigned long flags;
-
- ASSERT(buffer_unwritten(bh));
- bh->b_end_io = NULL;
-
- if (!uptodate)
- ioend->io_uptodate = 0;
-
- /*
- * Deep magic here. We reuse b_private in the buffer_heads to build
- * a chain for completing the I/O from user context after we've issued
- * a transaction to convert the unwritten extent.
- */
- spin_lock_irqsave(&unwritten_done_lock, flags);
- bh->b_private = ioend->io_buffer_head;
- ioend->io_buffer_head = bh;
- spin_unlock_irqrestore(&unwritten_done_lock, flags);
-
- xfs_finish_ioend(ioend);
-}
-
STATIC int
xfs_map_blocks(
struct inode *inode,
@@ -218,138 +226,260 @@ xfs_map_blocks(
return -error;
}
+STATIC inline int
+xfs_iomap_valid(
+ xfs_iomap_t *iomapp,
+ loff_t offset)
+{
+ return offset >= iomapp->iomap_offset &&
+ offset < iomapp->iomap_offset + iomapp->iomap_bsize;
+}
+
/*
- * Finds the corresponding mapping in block @map array of the
- * given @offset within a @page.
+ * BIO completion handler for buffered IO.
*/
-STATIC xfs_iomap_t *
-xfs_offset_to_map(
+STATIC int
+xfs_end_bio(
+ struct bio *bio,
+ unsigned int bytes_done,
+ int error)
+{
+ xfs_ioend_t *ioend = bio->bi_private;
+
+ if (bio->bi_size)
+ return 1;
+
+ ASSERT(ioend);
+ ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+
+ /* Toss bio and pass work off to an xfsdatad thread */
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ ioend->io_uptodate = 0;
+ bio->bi_private = NULL;
+ bio->bi_end_io = NULL;
+
+ bio_put(bio);
+ xfs_finish_ioend(ioend);
+ return 0;
+}
+
+STATIC void
+xfs_submit_ioend_bio(
+ xfs_ioend_t *ioend,
+ struct bio *bio)
+{
+ atomic_inc(&ioend->io_remaining);
+
+ bio->bi_private = ioend;
+ bio->bi_end_io = xfs_end_bio;
+
+ submit_bio(WRITE, bio);
+ ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
+ bio_put(bio);
+}
+
+STATIC struct bio *
+xfs_alloc_ioend_bio(
+ struct buffer_head *bh)
+{
+ struct bio *bio;
+ int nvecs = bio_get_nr_vecs(bh->b_bdev);
+
+ do {
+ bio = bio_alloc(GFP_NOIO, nvecs);
+ nvecs >>= 1;
+ } while (!bio);
+
+ ASSERT(bio->bi_private == NULL);
+ bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_bdev = bh->b_bdev;
+ bio_get(bio);
+ return bio;
+}
+
+STATIC void
+xfs_start_buffer_writeback(
+ struct buffer_head *bh)
+{
+ ASSERT(buffer_mapped(bh));
+ ASSERT(buffer_locked(bh));
+ ASSERT(!buffer_delay(bh));
+ ASSERT(!buffer_unwritten(bh));
+
+ mark_buffer_async_write(bh);
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
+}
+
+STATIC void
+xfs_start_page_writeback(
struct page *page,
- xfs_iomap_t *iomapp,
- unsigned long offset)
+ struct writeback_control *wbc,
+ int clear_dirty,
+ int buffers)
+{
+ ASSERT(PageLocked(page));
+ ASSERT(!PageWriteback(page));
+ set_page_writeback(page);
+ if (clear_dirty)
+ clear_page_dirty(page);
+ unlock_page(page);
+ if (!buffers) {
+ end_page_writeback(page);
+ wbc->pages_skipped++; /* We didn't write this page */
+ }
+}
+
+static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+{
+ return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+}
+
+/*
+ * Submit all of the bios for all of the ioends we have saved up,
+ * covering the initial writepage page and also any probed pages.
+ */
+STATIC void
+xfs_submit_ioend(
+ xfs_ioend_t *ioend)
+{
+ xfs_ioend_t *next;
+ struct buffer_head *bh;
+ struct bio *bio;
+ sector_t lastblock = 0;
+
+ do {
+ next = ioend->io_list;
+ bio = NULL;
+
+ for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+ xfs_start_buffer_writeback(bh);
+
+ if (!bio) {
+ retry:
+ bio = xfs_alloc_ioend_bio(bh);
+ } else if (bh->b_blocknr != lastblock + 1) {
+ xfs_submit_ioend_bio(ioend, bio);
+ goto retry;
+ }
+
+ if (bio_add_buffer(bio, bh) != bh->b_size) {
+ xfs_submit_ioend_bio(ioend, bio);
+ goto retry;
+ }
+
+ lastblock = bh->b_blocknr;
+ }
+ if (bio)
+ xfs_submit_ioend_bio(ioend, bio);
+ xfs_finish_ioend(ioend);
+ } while ((ioend = next) != NULL);
+}
+
+/*
+ * Cancel submission of all buffer_heads so far in this endio.
+ * Toss the endio too. Only ever called for the initial page
+ * in a writepage request, so only ever one page.
+ */
+STATIC void
+xfs_cancel_ioend(
+ xfs_ioend_t *ioend)
+{
+ xfs_ioend_t *next;
+ struct buffer_head *bh, *next_bh;
+
+ do {
+ next = ioend->io_list;
+ bh = ioend->io_buffer_head;
+ do {
+ next_bh = bh->b_private;
+ clear_buffer_async_write(bh);
+ unlock_buffer(bh);
+ } while ((bh = next_bh) != NULL);
+
+ vn_iowake(ioend->io_vnode);
+ mempool_free(ioend, xfs_ioend_pool);
+ } while ((ioend = next) != NULL);
+}
+
+/*
+ * Test to see if we've been building up a completion structure for
+ * earlier buffers -- if so, we try to append to this ioend if we
+ * can, otherwise we finish off any current ioend and start another.
+ * Return true if we've finished the given ioend.
+ */
+STATIC void
+xfs_add_to_ioend(
+ struct inode *inode,
+ struct buffer_head *bh,
+ xfs_off_t offset,
+ unsigned int type,
+ xfs_ioend_t **result,
+ int need_ioend)
{
- loff_t full_offset; /* offset from start of file */
+ xfs_ioend_t *ioend = *result;
- ASSERT(offset < PAGE_CACHE_SIZE);
+ if (!ioend || need_ioend || type != ioend->io_type) {
+ xfs_ioend_t *previous = *result;
- full_offset = page->index; /* NB: using 64bit number */
- full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */
- full_offset += offset; /* offset from page start */
+ ioend = xfs_alloc_ioend(inode, type);
+ ioend->io_offset = offset;
+ ioend->io_buffer_head = bh;
+ ioend->io_buffer_tail = bh;
+ if (previous)
+ previous->io_list = ioend;
+ *result = ioend;
+ } else {
+ ioend->io_buffer_tail->b_private = bh;
+ ioend->io_buffer_tail = bh;
+ }
- if (full_offset < iomapp->iomap_offset)
- return NULL;
- if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
- return iomapp;
- return NULL;
+ bh->b_private = NULL;
+ ioend->io_size += bh->b_size;
}
STATIC void
xfs_map_at_offset(
- struct page *page,
struct buffer_head *bh,
- unsigned long offset,
+ loff_t offset,
int block_bits,
xfs_iomap_t *iomapp)
{
xfs_daddr_t bn;
- loff_t delta;
int sector_shift;
ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
- delta = page->index;
- delta <<= PAGE_CACHE_SHIFT;
- delta += offset;
- delta -= iomapp->iomap_offset;
- delta >>= block_bits;
-
sector_shift = block_bits - BBSHIFT;
- bn = iomapp->iomap_bn >> sector_shift;
- bn += delta;
- BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
+ bn = (iomapp->iomap_bn >> sector_shift) +
+ ((offset - iomapp->iomap_offset) >> block_bits);
+
+ ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
lock_buffer(bh);
bh->b_blocknr = bn;
- bh->b_bdev = iomapp->iomap_target->pbr_bdev;
+ bh->b_bdev = iomapp->iomap_target->bt_bdev;
set_buffer_mapped(bh);
clear_buffer_delay(bh);
+ clear_buffer_unwritten(bh);
}
/*
- * Look for a page at index which is unlocked and contains our
- * unwritten extent flagged buffers at its head. Returns page
- * locked and with an extra reference count, and length of the
- * unwritten extent component on this page that we can write,
- * in units of filesystem blocks.
- */
-STATIC struct page *
-xfs_probe_unwritten_page(
- struct address_space *mapping,
- pgoff_t index,
- xfs_iomap_t *iomapp,
- xfs_ioend_t *ioend,
- unsigned long max_offset,
- unsigned long *fsbs,
- unsigned int bbits)
-{
- struct page *page;
-
- page = find_trylock_page(mapping, index);
- if (!page)
- return NULL;
- if (PageWriteback(page))
- goto out;
-
- if (page->mapping && page_has_buffers(page)) {
- struct buffer_head *bh, *head;
- unsigned long p_offset = 0;
-
- *fsbs = 0;
- bh = head = page_buffers(page);
- do {
- if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
- break;
- if (!xfs_offset_to_map(page, iomapp, p_offset))
- break;
- if (p_offset >= max_offset)
- break;
- xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
- set_buffer_unwritten_io(bh);
- bh->b_private = ioend;
- p_offset += bh->b_size;
- (*fsbs)++;
- } while ((bh = bh->b_this_page) != head);
-
- if (p_offset)
- return page;
- }
-
-out:
- unlock_page(page);
- return NULL;
-}
-
-/*
- * Look for a page at index which is unlocked and not mapped
- * yet - clustering for mmap write case.
+ * Look for a page at index that is suitable for clustering.
*/
STATIC unsigned int
-xfs_probe_unmapped_page(
- struct address_space *mapping,
- pgoff_t index,
- unsigned int pg_offset)
+xfs_probe_page(
+ struct page *page,
+ unsigned int pg_offset,
+ int mapped)
{
- struct page *page;
int ret = 0;
- page = find_trylock_page(mapping, index);
- if (!page)
- return 0;
if (PageWriteback(page))
- goto out;
+ return 0;
if (page->mapping && PageDirty(page)) {
if (page_has_buffers(page)) {
@@ -357,79 +487,101 @@ xfs_probe_unmapped_page(
bh = head = page_buffers(page);
do {
- if (buffer_mapped(bh) || !buffer_uptodate(bh))
+ if (!buffer_uptodate(bh))
+ break;
+ if (mapped != buffer_mapped(bh))
break;
ret += bh->b_size;
if (ret >= pg_offset)
break;
} while ((bh = bh->b_this_page) != head);
} else
- ret = PAGE_CACHE_SIZE;
+ ret = mapped ? 0 : PAGE_CACHE_SIZE;
}
-out:
- unlock_page(page);
return ret;
}
-STATIC unsigned int
-xfs_probe_unmapped_cluster(
+STATIC size_t
+xfs_probe_cluster(
struct inode *inode,
struct page *startpage,
struct buffer_head *bh,
- struct buffer_head *head)
+ struct buffer_head *head,
+ int mapped)
{
+ struct pagevec pvec;
pgoff_t tindex, tlast, tloff;
- unsigned int pg_offset, len, total = 0;
- struct address_space *mapping = inode->i_mapping;
+ size_t total = 0;
+ int done = 0, i;
/* First sum forwards in this page */
do {
- if (buffer_mapped(bh))
- break;
+ if (mapped != buffer_mapped(bh))
+ return total;
total += bh->b_size;
} while ((bh = bh->b_this_page) != head);
- /* If we reached the end of the page, sum forwards in
- * following pages.
- */
- if (bh == head) {
- tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
- /* Prune this back to avoid pathological behavior */
- tloff = min(tlast, startpage->index + 64);
- for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
- len = xfs_probe_unmapped_page(mapping, tindex,
- PAGE_CACHE_SIZE);
- if (!len)
- return total;
+ /* if we reached the end of the page, sum forwards in following pages */
+ tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+ tindex = startpage->index + 1;
+
+ /* Prune this back to avoid pathological behavior */
+ tloff = min(tlast, startpage->index + 64);
+
+ pagevec_init(&pvec, 0);
+ while (!done && tindex <= tloff) {
+ unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+ if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+ break;
+
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+ size_t pg_offset, len = 0;
+
+ if (tindex == tlast) {
+ pg_offset =
+ i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
+ if (!pg_offset) {
+ done = 1;
+ break;
+ }
+ } else
+ pg_offset = PAGE_CACHE_SIZE;
+
+ if (page->index == tindex && !TestSetPageLocked(page)) {
+ len = xfs_probe_page(page, pg_offset, mapped);
+ unlock_page(page);
+ }
+
+ if (!len) {
+ done = 1;
+ break;
+ }
+
total += len;
+ tindex++;
}
- if (tindex == tlast &&
- (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
- total += xfs_probe_unmapped_page(mapping,
- tindex, pg_offset);
- }
+
+ pagevec_release(&pvec);
+ cond_resched();
}
+
return total;
}
/*
- * Probe for a given page (index) in the inode and test if it is delayed
- * and without unwritten buffers. Returns page locked and with an extra
- * reference count.
+ * Test if a given page is suitable for writing as part of an unwritten
+ * or delayed allocate extent.
*/
-STATIC struct page *
-xfs_probe_delalloc_page(
- struct inode *inode,
- pgoff_t index)
+STATIC int
+xfs_is_delayed_page(
+ struct page *page,
+ unsigned int type)
{
- struct page *page;
-
- page = find_trylock_page(inode->i_mapping, index);
- if (!page)
- return NULL;
if (PageWriteback(page))
- goto out;
+ return 0;
if (page->mapping && page_has_buffers(page)) {
struct buffer_head *bh, *head;
@@ -437,243 +589,156 @@ xfs_probe_delalloc_page(
bh = head = page_buffers(page);
do {
- if (buffer_unwritten(bh)) {
- acceptable = 0;
+ if (buffer_unwritten(bh))
+ acceptable = (type == IOMAP_UNWRITTEN);
+ else if (buffer_delay(bh))
+ acceptable = (type == IOMAP_DELAY);
+ else if (buffer_mapped(bh))
+ acceptable = (type == 0);
+ else
break;
- } else if (buffer_delay(bh)) {
- acceptable = 1;
- }
} while ((bh = bh->b_this_page) != head);
if (acceptable)
- return page;
- }
-
-out:
- unlock_page(page);
- return NULL;
-}
-
-STATIC int
-xfs_map_unwritten(
- struct inode *inode,
- struct page *start_page,
- struct buffer_head *head,
- struct buffer_head *curr,
- unsigned long p_offset,
- int block_bits,
- xfs_iomap_t *iomapp,
- struct writeback_control *wbc,
- int startio,
- int all_bh)
-{
- struct buffer_head *bh = curr;
- xfs_iomap_t *tmp;
- xfs_ioend_t *ioend;
- loff_t offset;
- unsigned long nblocks = 0;
-
- offset = start_page->index;
- offset <<= PAGE_CACHE_SHIFT;
- offset += p_offset;
-
- ioend = xfs_alloc_ioend(inode);
-
- /* First map forwards in the page consecutive buffers
- * covering this unwritten extent
- */
- do {
- if (!buffer_unwritten(bh))
- break;
- tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
- if (!tmp)
- break;
- xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
- set_buffer_unwritten_io(bh);
- bh->b_private = ioend;
- p_offset += bh->b_size;
- nblocks++;
- } while ((bh = bh->b_this_page) != head);
-
- atomic_add(nblocks, &ioend->io_remaining);
-
- /* If we reached the end of the page, map forwards in any
- * following pages which are also covered by this extent.
- */
- if (bh == head) {
- struct address_space *mapping = inode->i_mapping;
- pgoff_t tindex, tloff, tlast;
- unsigned long bs;
- unsigned int pg_offset, bbits = inode->i_blkbits;
- struct page *page;
-
- tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
- tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
- tloff = min(tlast, tloff);
- for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
- page = xfs_probe_unwritten_page(mapping,
- tindex, iomapp, ioend,
- PAGE_CACHE_SIZE, &bs, bbits);
- if (!page)
- break;
- nblocks += bs;
- atomic_add(bs, &ioend->io_remaining);
- xfs_convert_page(inode, page, iomapp, wbc, ioend,
- startio, all_bh);
- /* stop if converting the next page might add
- * enough blocks that the corresponding byte
- * count won't fit in our ulong page buf length */
- if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
- goto enough;
- }
-
- if (tindex == tlast &&
- (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
- page = xfs_probe_unwritten_page(mapping,
- tindex, iomapp, ioend,
- pg_offset, &bs, bbits);
- if (page) {
- nblocks += bs;
- atomic_add(bs, &ioend->io_remaining);
- xfs_convert_page(inode, page, iomapp, wbc, ioend,
- startio, all_bh);
- if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
- goto enough;
- }
- }
+ return 1;
}
-enough:
- ioend->io_size = (xfs_off_t)nblocks << block_bits;
- ioend->io_offset = offset;
- xfs_finish_ioend(ioend);
return 0;
}
-STATIC void
-xfs_submit_page(
- struct page *page,
- struct writeback_control *wbc,
- struct buffer_head *bh_arr[],
- int bh_count,
- int probed_page,
- int clear_dirty)
-{
- struct buffer_head *bh;
- int i;
-
- BUG_ON(PageWriteback(page));
- if (bh_count)
- set_page_writeback(page);
- if (clear_dirty)
- clear_page_dirty(page);
- unlock_page(page);
-
- if (bh_count) {
- for (i = 0; i < bh_count; i++) {
- bh = bh_arr[i];
- mark_buffer_async_write(bh);
- if (buffer_unwritten(bh))
- set_buffer_unwritten_io(bh);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- }
-
- for (i = 0; i < bh_count; i++)
- submit_bh(WRITE, bh_arr[i]);
-
- if (probed_page && clear_dirty)
- wbc->nr_to_write--; /* Wrote an "extra" page */
- }
-}
-
/*
* Allocate & map buffers for page given the extent map. Write it out.
* except for the original page of a writepage, this is called on
* delalloc/unwritten pages only, for the original page it is possible
* that the page has no mapping at all.
*/
-STATIC void
+STATIC int
xfs_convert_page(
struct inode *inode,
struct page *page,
- xfs_iomap_t *iomapp,
+ loff_t tindex,
+ xfs_iomap_t *mp,
+ xfs_ioend_t **ioendp,
struct writeback_control *wbc,
- void *private,
int startio,
int all_bh)
{
- struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
- xfs_iomap_t *mp = iomapp, *tmp;
- unsigned long offset, end_offset;
- int index = 0;
+ struct buffer_head *bh, *head;
+ xfs_off_t end_offset;
+ unsigned long p_offset;
+ unsigned int type;
int bbits = inode->i_blkbits;
int len, page_dirty;
+ int count = 0, done = 0, uptodate = 1;
+ xfs_off_t offset = page_offset(page);
- end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1));
+ if (page->index != tindex)
+ goto fail;
+ if (TestSetPageLocked(page))
+ goto fail;
+ if (PageWriteback(page))
+ goto fail_unlock_page;
+ if (page->mapping != inode->i_mapping)
+ goto fail_unlock_page;
+ if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
+ goto fail_unlock_page;
/*
* page_dirty is initially a count of buffers on the page before
* EOF and is decrememted as we move each into a cleanable state.
+ *
+ * Derivation:
+ *
+ * End offset is the highest offset that this page should represent.
+ * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+ * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+ * hence give us the correct page_dirty count. On any other page,
+ * it will be zero and in that case we need page_dirty to be the
+ * count of buffers on the page.
*/
+ end_offset = min_t(unsigned long long,
+ (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+ i_size_read(inode));
+
len = 1 << inode->i_blkbits;
- end_offset = max(end_offset, PAGE_CACHE_SIZE);
- end_offset = roundup(end_offset, len);
- page_dirty = end_offset / len;
+ p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
+ PAGE_CACHE_SIZE);
+ p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+ page_dirty = p_offset / len;
- offset = 0;
bh = head = page_buffers(page);
do {
if (offset >= end_offset)
break;
- if (!(PageUptodate(page) || buffer_uptodate(bh)))
+ if (!buffer_uptodate(bh))
+ uptodate = 0;
+ if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+ done = 1;
continue;
- if (buffer_mapped(bh) && all_bh &&
- !(buffer_unwritten(bh) || buffer_delay(bh))) {
+ }
+
+ if (buffer_unwritten(bh) || buffer_delay(bh)) {
+ if (buffer_unwritten(bh))
+ type = IOMAP_UNWRITTEN;
+ else
+ type = IOMAP_DELAY;
+
+ if (!xfs_iomap_valid(mp, offset)) {
+ done = 1;
+ continue;
+ }
+
+ ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
+ ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
+
+ xfs_map_at_offset(bh, offset, bbits, mp);
if (startio) {
+ xfs_add_to_ioend(inode, bh, offset,
+ type, ioendp, done);
+ } else {
+ set_buffer_dirty(bh);
+ unlock_buffer(bh);
+ mark_buffer_dirty(bh);
+ }
+ page_dirty--;
+ count++;
+ } else {
+ type = 0;
+ if (buffer_mapped(bh) && all_bh && startio) {
lock_buffer(bh);
- bh_arr[index++] = bh;
+ xfs_add_to_ioend(inode, bh, offset,
+ type, ioendp, done);
+ count++;
page_dirty--;
+ } else {
+ done = 1;
}
- continue;
}
- tmp = xfs_offset_to_map(page, mp, offset);
- if (!tmp)
- continue;
- ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
- ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
+ } while (offset += len, (bh = bh->b_this_page) != head);
- /* If this is a new unwritten extent buffer (i.e. one
- * that we haven't passed in private data for, we must
- * now map this buffer too.
- */
- if (buffer_unwritten(bh) && !bh->b_end_io) {
- ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
- xfs_map_unwritten(inode, page, head, bh, offset,
- bbits, tmp, wbc, startio, all_bh);
- } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
- xfs_map_at_offset(page, bh, offset, bbits, tmp);
- if (buffer_unwritten(bh)) {
- set_buffer_unwritten_io(bh);
- bh->b_private = private;
- ASSERT(private);
+ if (uptodate && bh == head)
+ SetPageUptodate(page);
+
+ if (startio) {
+ if (count) {
+ struct backing_dev_info *bdi;
+
+ bdi = inode->i_mapping->backing_dev_info;
+ if (bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ done = 1;
+ } else if (--wbc->nr_to_write <= 0) {
+ done = 1;
}
}
- if (startio) {
- bh_arr[index++] = bh;
- } else {
- set_buffer_dirty(bh);
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- }
- page_dirty--;
- } while (offset += len, (bh = bh->b_this_page) != head);
-
- if (startio && index) {
- xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty);
- } else {
- unlock_page(page);
+ xfs_start_page_writeback(page, wbc, !page_dirty, count);
}
+
+ return done;
+ fail_unlock_page:
+ unlock_page(page);
+ fail:
+ return 1;
}
/*
@@ -685,19 +750,31 @@ xfs_cluster_write(
struct inode *inode,
pgoff_t tindex,
xfs_iomap_t *iomapp,
+ xfs_ioend_t **ioendp,
struct writeback_control *wbc,
int startio,
int all_bh,
pgoff_t tlast)
{
- struct page *page;
+ struct pagevec pvec;
+ int done = 0, i;
- for (; tindex <= tlast; tindex++) {
- page = xfs_probe_delalloc_page(inode, tindex);
- if (!page)
+ pagevec_init(&pvec, 0);
+ while (!done && tindex <= tlast) {
+ unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+ if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
break;
- xfs_convert_page(inode, page, iomapp, wbc, NULL,
- startio, all_bh);
+
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+ iomapp, ioendp, wbc, startio, all_bh);
+ if (done)
+ break;
+ }
+
+ pagevec_release(&pvec);
+ cond_resched();
}
}
@@ -728,18 +805,22 @@ xfs_page_state_convert(
int startio,
int unmapped) /* also implies page uptodate */
{
- struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
- xfs_iomap_t *iomp, iomap;
+ struct buffer_head *bh, *head;
+ xfs_iomap_t iomap;
+ xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset;
unsigned long p_offset = 0;
+ unsigned int type;
__uint64_t end_offset;
pgoff_t end_index, last_index, tlast;
- int len, err, i, cnt = 0, uptodate = 1;
- int flags;
- int page_dirty;
+ ssize_t size, len;
+ int flags, err, iomap_valid = 0, uptodate = 1;
+ int page_dirty, count = 0, trylock_flag = 0;
+ int all_bh = unmapped;
/* wait for other IO threads? */
- flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK;
+ if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
+ trylock_flag |= BMAPI_TRYLOCK;
/* Is this page beyond the end of the file? */
offset = i_size_read(inode);
@@ -754,161 +835,173 @@ xfs_page_state_convert(
}
}
- end_offset = min_t(unsigned long long,
- (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
- offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-
/*
* page_dirty is initially a count of buffers on the page before
* EOF and is decrememted as we move each into a cleanable state.
- */
+ *
+ * Derivation:
+ *
+ * End offset is the highest offset that this page should represent.
+ * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+ * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+ * hence give us the correct page_dirty count. On any other page,
+ * it will be zero and in that case we need page_dirty to be the
+ * count of buffers on the page.
+ */
+ end_offset = min_t(unsigned long long,
+ (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
len = 1 << inode->i_blkbits;
- p_offset = max(p_offset, PAGE_CACHE_SIZE);
- p_offset = roundup(p_offset, len);
+ p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
+ PAGE_CACHE_SIZE);
+ p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
page_dirty = p_offset / len;
- iomp = NULL;
- p_offset = 0;
bh = head = page_buffers(page);
+ offset = page_offset(page);
+ flags = -1;
+ type = 0;
+
+ /* TODO: cleanup count and page_dirty */
do {
if (offset >= end_offset)
break;
if (!buffer_uptodate(bh))
uptodate = 0;
- if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)
+ if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
+ /*
+ * the iomap is actually still valid, but the ioend
+ * isn't. shouldn't happen too often.
+ */
+ iomap_valid = 0;
continue;
-
- if (iomp) {
- iomp = xfs_offset_to_map(page, &iomap, p_offset);
}
+ if (iomap_valid)
+ iomap_valid = xfs_iomap_valid(&iomap, offset);
+
/*
* First case, map an unwritten extent and prepare for
* extent state conversion transaction on completion.
- */
- if (buffer_unwritten(bh)) {
- if (!startio)
- continue;
- if (!iomp) {
- err = xfs_map_blocks(inode, offset, len, &iomap,
- BMAPI_WRITE|BMAPI_IGNSTATE);
- if (err) {
- goto error;
- }
- iomp = xfs_offset_to_map(page, &iomap,
- p_offset);
+ *
+ * Second case, allocate space for a delalloc buffer.
+ * We can return EAGAIN here in the release page case.
+ *
+ * Third case, an unmapped buffer was found, and we are
+ * in a path where we need to write the whole page out.
+ */
+ if (buffer_unwritten(bh) || buffer_delay(bh) ||
+ ((buffer_uptodate(bh) || PageUptodate(page)) &&
+ !buffer_mapped(bh) && (unmapped || startio))) {
+ /*
+ * Make sure we don't use a read-only iomap
+ */
+ if (flags == BMAPI_READ)
+ iomap_valid = 0;
+
+ if (buffer_unwritten(bh)) {
+ type = IOMAP_UNWRITTEN;
+ flags = BMAPI_WRITE|BMAPI_IGNSTATE;
+ } else if (buffer_delay(bh)) {
+ type = IOMAP_DELAY;
+ flags = BMAPI_ALLOCATE;
+ if (!startio)
+ flags |= trylock_flag;
+ } else {
+ type = IOMAP_NEW;
+ flags = BMAPI_WRITE|BMAPI_MMAP;
}
- if (iomp) {
- if (!bh->b_end_io) {
- err = xfs_map_unwritten(inode, page,
- head, bh, p_offset,
- inode->i_blkbits, iomp,
- wbc, startio, unmapped);
- if (err) {
- goto error;
- }
+
+ if (!iomap_valid) {
+ if (type == IOMAP_NEW) {
+ size = xfs_probe_cluster(inode,
+ page, bh, head, 0);
} else {
- set_bit(BH_Lock, &bh->b_state);
+ size = len;
}
- BUG_ON(!buffer_locked(bh));
- bh_arr[cnt++] = bh;
- page_dirty--;
- }
- /*
- * Second case, allocate space for a delalloc buffer.
- * We can return EAGAIN here in the release page case.
- */
- } else if (buffer_delay(bh)) {
- if (!iomp) {
- err = xfs_map_blocks(inode, offset, len, &iomap,
- BMAPI_ALLOCATE | flags);
- if (err) {
+
+ err = xfs_map_blocks(inode, offset, size,
+ &iomap, flags);
+ if (err)
goto error;
- }
- iomp = xfs_offset_to_map(page, &iomap,
- p_offset);
+ iomap_valid = xfs_iomap_valid(&iomap, offset);
}
- if (iomp) {
- xfs_map_at_offset(page, bh, p_offset,
- inode->i_blkbits, iomp);
+ if (iomap_valid) {
+ xfs_map_at_offset(bh, offset,
+ inode->i_blkbits, &iomap);
if (startio) {
- bh_arr[cnt++] = bh;
+ xfs_add_to_ioend(inode, bh, offset,
+ type, &ioend,
+ !iomap_valid);
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
mark_buffer_dirty(bh);
}
page_dirty--;
+ count++;
+ }
+ } else if (buffer_uptodate(bh) && startio) {
+ /*
+ * we got here because the buffer is already mapped.
+ * That means it must already have extents allocated
+ * underneath it. Map the extent by reading it.
+ */
+ if (!iomap_valid || type != 0) {
+ flags = BMAPI_READ;
+ size = xfs_probe_cluster(inode, page, bh,
+ head, 1);
+ err = xfs_map_blocks(inode, offset, size,
+ &iomap, flags);
+ if (err)
+ goto error;
+ iomap_valid = xfs_iomap_valid(&iomap, offset);
}
- } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
- (unmapped || startio)) {
- if (!buffer_mapped(bh)) {
- int size;
-
- /*
- * Getting here implies an unmapped buffer
- * was found, and we are in a path where we
- * need to write the whole page out.
- */
- if (!iomp) {
- size = xfs_probe_unmapped_cluster(
- inode, page, bh, head);
- err = xfs_map_blocks(inode, offset,
- size, &iomap,
- BMAPI_WRITE|BMAPI_MMAP);
- if (err) {
- goto error;
- }
- iomp = xfs_offset_to_map(page, &iomap,
- p_offset);
- }
- if (iomp) {
- xfs_map_at_offset(page,
- bh, p_offset,
- inode->i_blkbits, iomp);
- if (startio) {
- bh_arr[cnt++] = bh;
- } else {
- set_buffer_dirty(bh);
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- }
- page_dirty--;
- }
- } else if (startio) {
- if (buffer_uptodate(bh) &&
- !test_and_set_bit(BH_Lock, &bh->b_state)) {
- bh_arr[cnt++] = bh;
- page_dirty--;
- }
+ type = 0;
+ if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+ ASSERT(buffer_mapped(bh));
+ if (iomap_valid)
+ all_bh = 1;
+ xfs_add_to_ioend(inode, bh, offset, type,
+ &ioend, !iomap_valid);
+ page_dirty--;
+ count++;
+ } else {
+ iomap_valid = 0;
}
+ } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+ (unmapped || startio)) {
+ iomap_valid = 0;
}
- } while (offset += len, p_offset += len,
- ((bh = bh->b_this_page) != head));
+
+ if (!iohead)
+ iohead = ioend;
+
+ } while (offset += len, ((bh = bh->b_this_page) != head));
if (uptodate && bh == head)
SetPageUptodate(page);
- if (startio) {
- xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty);
- }
+ if (startio)
+ xfs_start_page_writeback(page, wbc, 1, count);
- if (iomp) {
- offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+ if (ioend && iomap_valid) {
+ offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
PAGE_CACHE_SHIFT;
tlast = min_t(pgoff_t, offset, last_index);
- xfs_cluster_write(inode, page->index + 1, iomp, wbc,
- startio, unmapped, tlast);
+ xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
+ wbc, startio, all_bh, tlast);
}
+ if (iohead)
+ xfs_submit_ioend(iohead);
+
return page_dirty;
error:
- for (i = 0; i < cnt; i++) {
- unlock_buffer(bh_arr[i]);
- }
+ if (iohead)
+ xfs_cancel_ioend(iohead);
/*
* If it's delalloc and we have nowhere to put it,
@@ -916,9 +1009,8 @@ error:
* us to try again.
*/
if (err != -EAGAIN) {
- if (!unmapped) {
+ if (!unmapped)
block_invalidatepage(page, 0);
- }
ClearPageUptodate(page);
}
return err;
@@ -982,7 +1074,7 @@ __linvfs_get_block(
}
/* If this is a realtime file, data might be on a new device */
- bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+ bh_result->b_bdev = iomap.iomap_target->bt_bdev;
/* If we previously allocated a block out beyond eof and
* we are now coming back to use it then we will need to
@@ -1094,10 +1186,10 @@ linvfs_direct_IO(
if (error)
return -error;
- iocb->private = xfs_alloc_ioend(inode);
+ iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
- iomap.iomap_target->pbr_bdev,
+ iomap.iomap_target->bt_bdev,
iov, offset, nr_segs,
linvfs_get_blocks_direct,
linvfs_end_io_direct);
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4720758a9ad..55339dd5a30 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool;
typedef void (*xfs_ioend_func_t)(void *);
+/*
+ * xfs_ioend struct manages large extent writes for XFS.
+ * It can manage several multi-page bio's at once.
+ */
typedef struct xfs_ioend {
+ struct xfs_ioend *io_list; /* next ioend in chain */
+ unsigned int io_type; /* delalloc / unwritten */
unsigned int io_uptodate; /* I/O status register */
atomic_t io_remaining; /* hold count */
struct vnode *io_vnode; /* file being written to */
struct buffer_head *io_buffer_head;/* buffer linked list head */
+ struct buffer_head *io_buffer_tail;/* buffer linked list tail */
size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */
} xfs_ioend_t;
+extern struct address_space_operations linvfs_aops;
+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+
#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6fe21d2b884..e44b7c1a3a3 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -31,76 +31,77 @@
#include <linux/kthread.h>
#include "xfs_linux.h"
-STATIC kmem_cache_t *pagebuf_zone;
-STATIC kmem_shaker_t pagebuf_shake;
+STATIC kmem_zone_t *xfs_buf_zone;
+STATIC kmem_shaker_t xfs_buf_shake;
+STATIC int xfsbufd(void *);
STATIC int xfsbufd_wakeup(int, gfp_t);
-STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
+STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
STATIC struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
-#ifdef PAGEBUF_TRACE
+#ifdef XFS_BUF_TRACE
void
-pagebuf_trace(
- xfs_buf_t *pb,
+xfs_buf_trace(
+ xfs_buf_t *bp,
char *id,
void *data,
void *ra)
{
- ktrace_enter(pagebuf_trace_buf,
- pb, id,
- (void *)(unsigned long)pb->pb_flags,
- (void *)(unsigned long)pb->pb_hold.counter,
- (void *)(unsigned long)pb->pb_sema.count.counter,
+ ktrace_enter(xfs_buf_trace_buf,
+ bp, id,
+ (void *)(unsigned long)bp->b_flags,
+ (void *)(unsigned long)bp->b_hold.counter,
+ (void *)(unsigned long)bp->b_sema.count.counter,
(void *)current,
data, ra,
- (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
- (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
- (void *)(unsigned long)pb->pb_buffer_length,
+ (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
+ (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
+ (void *)(unsigned long)bp->b_buffer_length,
NULL, NULL, NULL, NULL, NULL);
}
-ktrace_t *pagebuf_trace_buf;
-#define PAGEBUF_TRACE_SIZE 4096
-#define PB_TRACE(pb, id, data) \
- pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
+ktrace_t *xfs_buf_trace_buf;
+#define XFS_BUF_TRACE_SIZE 4096
+#define XB_TRACE(bp, id, data) \
+ xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
#else
-#define PB_TRACE(pb, id, data) do { } while (0)
+#define XB_TRACE(bp, id, data) do { } while (0)
#endif
-#ifdef PAGEBUF_LOCK_TRACKING
-# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid)
-# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1)
-# define PB_GET_OWNER(pb) ((pb)->pb_last_holder)
+#ifdef XFS_BUF_LOCK_TRACKING
+# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
+# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
+# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
#else
-# define PB_SET_OWNER(pb) do { } while (0)
-# define PB_CLEAR_OWNER(pb) do { } while (0)
-# define PB_GET_OWNER(pb) do { } while (0)
+# define XB_SET_OWNER(bp) do { } while (0)
+# define XB_CLEAR_OWNER(bp) do { } while (0)
+# define XB_GET_OWNER(bp) do { } while (0)
#endif
-#define pb_to_gfp(flags) \
- ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
- ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+#define xb_to_gfp(flags) \
+ ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
+ ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-#define pb_to_km(flags) \
- (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+#define xb_to_km(flags) \
+ (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-#define pagebuf_allocate(flags) \
- kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))
-#define pagebuf_deallocate(pb) \
- kmem_zone_free(pagebuf_zone, (pb));
+#define xfs_buf_allocate(flags) \
+ kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
+#define xfs_buf_deallocate(bp) \
+ kmem_zone_free(xfs_buf_zone, (bp));
/*
- * Page Region interfaces.
+ * Page Region interfaces.
*
- * For pages in filesystems where the blocksize is smaller than the
- * pagesize, we use the page->private field (long) to hold a bitmap
- * of uptodate regions within the page.
+ * For pages in filesystems where the blocksize is smaller than the
+ * pagesize, we use the page->private field (long) to hold a bitmap
+ * of uptodate regions within the page.
*
- * Each such region is "bytes per page / bits per long" bytes long.
+ * Each such region is "bytes per page / bits per long" bytes long.
*
- * NBPPR == number-of-bytes-per-page-region
- * BTOPR == bytes-to-page-region (rounded up)
- * BTOPRT == bytes-to-page-region-truncated (rounded down)
+ * NBPPR == number-of-bytes-per-page-region
+ * BTOPR == bytes-to-page-region (rounded up)
+ * BTOPRT == bytes-to-page-region-truncated (rounded down)
*/
#if (BITS_PER_LONG == 32)
#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
@@ -159,7 +160,7 @@ test_page_region(
}
/*
- * Mapping of multi-page buffers into contiguous virtual space
+ * Mapping of multi-page buffers into contiguous virtual space
*/
typedef struct a_list {
@@ -172,7 +173,7 @@ STATIC int as_list_len;
STATIC DEFINE_SPINLOCK(as_lock);
/*
- * Try to batch vunmaps because they are costly.
+ * Try to batch vunmaps because they are costly.
*/
STATIC void
free_address(
@@ -215,83 +216,83 @@ purge_addresses(void)
}
/*
- * Internal pagebuf object manipulation
+ * Internal xfs_buf_t object manipulation
*/
STATIC void
-_pagebuf_initialize(
- xfs_buf_t *pb,
+_xfs_buf_initialize(
+ xfs_buf_t *bp,
xfs_buftarg_t *target,
- loff_t range_base,
+ xfs_off_t range_base,
size_t range_length,
- page_buf_flags_t flags)
+ xfs_buf_flags_t flags)
{
/*
- * We don't want certain flags to appear in pb->pb_flags.
+ * We don't want certain flags to appear in b_flags.
*/
- flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
-
- memset(pb, 0, sizeof(xfs_buf_t));
- atomic_set(&pb->pb_hold, 1);
- init_MUTEX_LOCKED(&pb->pb_iodonesema);
- INIT_LIST_HEAD(&pb->pb_list);
- INIT_LIST_HEAD(&pb->pb_hash_list);
- init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
- PB_SET_OWNER(pb);
- pb->pb_target = target;
- pb->pb_file_offset = range_base;
+ flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
+
+ memset(bp, 0, sizeof(xfs_buf_t));
+ atomic_set(&bp->b_hold, 1);
+ init_MUTEX_LOCKED(&bp->b_iodonesema);
+ INIT_LIST_HEAD(&bp->b_list);
+ INIT_LIST_HEAD(&bp->b_hash_list);
+ init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+ XB_SET_OWNER(bp);
+ bp->b_target = target;
+ bp->b_file_offset = range_base;
/*
* Set buffer_length and count_desired to the same value initially.
* I/O routines should use count_desired, which will be the same in
* most cases but may be reset (e.g. XFS recovery).
*/
- pb->pb_buffer_length = pb->pb_count_desired = range_length;
- pb->pb_flags = flags;
- pb->pb_bn = XFS_BUF_DADDR_NULL;
- atomic_set(&pb->pb_pin_count, 0);
- init_waitqueue_head(&pb->pb_waiters);
-
- XFS_STATS_INC(pb_create);
- PB_TRACE(pb, "initialize", target);
+ bp->b_buffer_length = bp->b_count_desired = range_length;
+ bp->b_flags = flags;
+ bp->b_bn = XFS_BUF_DADDR_NULL;
+ atomic_set(&bp->b_pin_count, 0);
+ init_waitqueue_head(&bp->b_waiters);
+
+ XFS_STATS_INC(xb_create);
+ XB_TRACE(bp, "initialize", target);
}
/*
- * Allocate a page array capable of holding a specified number
- * of pages, and point the page buf at it.
+ * Allocate a page array capable of holding a specified number
+ * of pages, and point the page buf at it.
*/
STATIC int
-_pagebuf_get_pages(
- xfs_buf_t *pb,
+_xfs_buf_get_pages(
+ xfs_buf_t *bp,
int page_count,
- page_buf_flags_t flags)
+ xfs_buf_flags_t flags)
{
/* Make sure that we have a page list */
- if (pb->pb_pages == NULL) {
- pb->pb_offset = page_buf_poff(pb->pb_file_offset);
- pb->pb_page_count = page_count;
- if (page_count <= PB_PAGES) {
- pb->pb_pages = pb->pb_page_array;
+ if (bp->b_pages == NULL) {
+ bp->b_offset = xfs_buf_poff(bp->b_file_offset);
+ bp->b_page_count = page_count;
+ if (page_count <= XB_PAGES) {
+ bp->b_pages = bp->b_page_array;
} else {
- pb->pb_pages = kmem_alloc(sizeof(struct page *) *
- page_count, pb_to_km(flags));
- if (pb->pb_pages == NULL)
+ bp->b_pages = kmem_alloc(sizeof(struct page *) *
+ page_count, xb_to_km(flags));
+ if (bp->b_pages == NULL)
return -ENOMEM;
}
- memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
+ memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
}
return 0;
}
/*
- * Frees pb_pages if it was malloced.
+ * Frees b_pages if it was allocated.
*/
STATIC void
-_pagebuf_free_pages(
+_xfs_buf_free_pages(
xfs_buf_t *bp)
{
- if (bp->pb_pages != bp->pb_page_array) {
- kmem_free(bp->pb_pages,
- bp->pb_page_count * sizeof(struct page *));
+ if (bp->b_pages != bp->b_page_array) {
+ kmem_free(bp->b_pages,
+ bp->b_page_count * sizeof(struct page *));
}
}
@@ -299,79 +300,79 @@ _pagebuf_free_pages(
* Releases the specified buffer.
*
* The modification state of any associated pages is left unchanged.
- * The buffer most not be on any hash - use pagebuf_rele instead for
+ * The buffer most not be on any hash - use xfs_buf_rele instead for
* hashed and refcounted buffers
*/
void
-pagebuf_free(
+xfs_buf_free(
xfs_buf_t *bp)
{
- PB_TRACE(bp, "free", 0);
+ XB_TRACE(bp, "free", 0);
- ASSERT(list_empty(&bp->pb_hash_list));
+ ASSERT(list_empty(&bp->b_hash_list));
- if (bp->pb_flags & _PBF_PAGE_CACHE) {
+ if (bp->b_flags & _XBF_PAGE_CACHE) {
uint i;
- if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
- free_address(bp->pb_addr - bp->pb_offset);
+ if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
+ free_address(bp->b_addr - bp->b_offset);
- for (i = 0; i < bp->pb_page_count; i++)
- page_cache_release(bp->pb_pages[i]);
- _pagebuf_free_pages(bp);
- } else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
+ for (i = 0; i < bp->b_page_count; i++)
+ page_cache_release(bp->b_pages[i]);
+ _xfs_buf_free_pages(bp);
+ } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
/*
- * XXX(hch): bp->pb_count_desired might be incorrect (see
- * pagebuf_associate_memory for details), but fortunately
+ * XXX(hch): bp->b_count_desired might be incorrect (see
+ * xfs_buf_associate_memory for details), but fortunately
* the Linux version of kmem_free ignores the len argument..
*/
- kmem_free(bp->pb_addr, bp->pb_count_desired);
- _pagebuf_free_pages(bp);
+ kmem_free(bp->b_addr, bp->b_count_desired);
+ _xfs_buf_free_pages(bp);
}
- pagebuf_deallocate(bp);
+ xfs_buf_deallocate(bp);
}
/*
* Finds all pages for buffer in question and builds it's page list.
*/
STATIC int
-_pagebuf_lookup_pages(
+_xfs_buf_lookup_pages(
xfs_buf_t *bp,
uint flags)
{
- struct address_space *mapping = bp->pb_target->pbr_mapping;
- size_t blocksize = bp->pb_target->pbr_bsize;
- size_t size = bp->pb_count_desired;
+ struct address_space *mapping = bp->b_target->bt_mapping;
+ size_t blocksize = bp->b_target->bt_bsize;
+ size_t size = bp->b_count_desired;
size_t nbytes, offset;
- gfp_t gfp_mask = pb_to_gfp(flags);
+ gfp_t gfp_mask = xb_to_gfp(flags);
unsigned short page_count, i;
pgoff_t first;
- loff_t end;
+ xfs_off_t end;
int error;
- end = bp->pb_file_offset + bp->pb_buffer_length;
- page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
+ end = bp->b_file_offset + bp->b_buffer_length;
+ page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
- error = _pagebuf_get_pages(bp, page_count, flags);
+ error = _xfs_buf_get_pages(bp, page_count, flags);
if (unlikely(error))
return error;
- bp->pb_flags |= _PBF_PAGE_CACHE;
+ bp->b_flags |= _XBF_PAGE_CACHE;
- offset = bp->pb_offset;
- first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
+ offset = bp->b_offset;
+ first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
- for (i = 0; i < bp->pb_page_count; i++) {
+ for (i = 0; i < bp->b_page_count; i++) {
struct page *page;
uint retries = 0;
retry:
page = find_or_create_page(mapping, first + i, gfp_mask);
if (unlikely(page == NULL)) {
- if (flags & PBF_READ_AHEAD) {
- bp->pb_page_count = i;
- for (i = 0; i < bp->pb_page_count; i++)
- unlock_page(bp->pb_pages[i]);
+ if (flags & XBF_READ_AHEAD) {
+ bp->b_page_count = i;
+ for (i = 0; i < bp->b_page_count; i++)
+ unlock_page(bp->b_pages[i]);
return -ENOMEM;
}
@@ -387,13 +388,13 @@ _pagebuf_lookup_pages(
"deadlock in %s (mode:0x%x)\n",
__FUNCTION__, gfp_mask);
- XFS_STATS_INC(pb_page_retries);
+ XFS_STATS_INC(xb_page_retries);
xfsbufd_wakeup(0, gfp_mask);
blk_congestion_wait(WRITE, HZ/50);
goto retry;
}
- XFS_STATS_INC(pb_page_found);
+ XFS_STATS_INC(xb_page_found);
nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
size -= nbytes;
@@ -401,27 +402,27 @@ _pagebuf_lookup_pages(
if (!PageUptodate(page)) {
page_count--;
if (blocksize >= PAGE_CACHE_SIZE) {
- if (flags & PBF_READ)
- bp->pb_locked = 1;
+ if (flags & XBF_READ)
+ bp->b_locked = 1;
} else if (!PagePrivate(page)) {
if (test_page_region(page, offset, nbytes))
page_count++;
}
}
- bp->pb_pages[i] = page;
+ bp->b_pages[i] = page;
offset = 0;
}
- if (!bp->pb_locked) {
- for (i = 0; i < bp->pb_page_count; i++)
- unlock_page(bp->pb_pages[i]);
+ if (!bp->b_locked) {
+ for (i = 0; i < bp->b_page_count; i++)
+ unlock_page(bp->b_pages[i]);
}
- if (page_count == bp->pb_page_count)
- bp->pb_flags |= PBF_DONE;
+ if (page_count == bp->b_page_count)
+ bp->b_flags |= XBF_DONE;
- PB_TRACE(bp, "lookup_pages", (long)page_count);
+ XB_TRACE(bp, "lookup_pages", (long)page_count);
return error;
}
@@ -429,23 +430,23 @@ _pagebuf_lookup_pages(
* Map buffer into kernel address-space if nessecary.
*/
STATIC int
-_pagebuf_map_pages(
+_xfs_buf_map_pages(
xfs_buf_t *bp,
uint flags)
{
/* A single page buffer is always mappable */
- if (bp->pb_page_count == 1) {
- bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
- bp->pb_flags |= PBF_MAPPED;
- } else if (flags & PBF_MAPPED) {
+ if (bp->b_page_count == 1) {
+ bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
+ bp->b_flags |= XBF_MAPPED;
+ } else if (flags & XBF_MAPPED) {
if (as_list_len > 64)
purge_addresses();
- bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
- VM_MAP, PAGE_KERNEL);
- if (unlikely(bp->pb_addr == NULL))
+ bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
+ VM_MAP, PAGE_KERNEL);
+ if (unlikely(bp->b_addr == NULL))
return -ENOMEM;
- bp->pb_addr += bp->pb_offset;
- bp->pb_flags |= PBF_MAPPED;
+ bp->b_addr += bp->b_offset;
+ bp->b_flags |= XBF_MAPPED;
}
return 0;
@@ -456,9 +457,7 @@ _pagebuf_map_pages(
*/
/*
- * _pagebuf_find
- *
- * Looks up, and creates if absent, a lockable buffer for
+ * Look up, and creates if absent, a lockable buffer for
* a given range of an inode. The buffer is returned
* locked. If other overlapping buffers exist, they are
* released before the new buffer is created and locked,
@@ -466,55 +465,55 @@ _pagebuf_map_pages(
* are unlocked. No I/O is implied by this call.
*/
xfs_buf_t *
-_pagebuf_find(
+_xfs_buf_find(
xfs_buftarg_t *btp, /* block device target */
- loff_t ioff, /* starting offset of range */
+ xfs_off_t ioff, /* starting offset of range */
size_t isize, /* length of range */
- page_buf_flags_t flags, /* PBF_TRYLOCK */
- xfs_buf_t *new_pb)/* newly allocated buffer */
+ xfs_buf_flags_t flags,
+ xfs_buf_t *new_bp)
{
- loff_t range_base;
+ xfs_off_t range_base;
size_t range_length;
xfs_bufhash_t *hash;
- xfs_buf_t *pb, *n;
+ xfs_buf_t *bp, *n;
range_base = (ioff << BBSHIFT);
range_length = (isize << BBSHIFT);
/* Check for IOs smaller than the sector size / not sector aligned */
- ASSERT(!(range_length < (1 << btp->pbr_sshift)));
- ASSERT(!(range_base & (loff_t)btp->pbr_smask));
+ ASSERT(!(range_length < (1 << btp->bt_sshift)));
+ ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
spin_lock(&hash->bh_lock);
- list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
- ASSERT(btp == pb->pb_target);
- if (pb->pb_file_offset == range_base &&
- pb->pb_buffer_length == range_length) {
+ list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
+ ASSERT(btp == bp->b_target);
+ if (bp->b_file_offset == range_base &&
+ bp->b_buffer_length == range_length) {
/*
- * If we look at something bring it to the
+ * If we look at something, bring it to the
* front of the list for next time.
*/
- atomic_inc(&pb->pb_hold);
- list_move(&pb->pb_hash_list, &hash->bh_list);
+ atomic_inc(&bp->b_hold);
+ list_move(&bp->b_hash_list, &hash->bh_list);
goto found;
}
}
/* No match found */
- if (new_pb) {
- _pagebuf_initialize(new_pb, btp, range_base,
+ if (new_bp) {
+ _xfs_buf_initialize(new_bp, btp, range_base,
range_length, flags);
- new_pb->pb_hash = hash;
- list_add(&new_pb->pb_hash_list, &hash->bh_list);
+ new_bp->b_hash = hash;
+ list_add(&new_bp->b_hash_list, &hash->bh_list);
} else {
- XFS_STATS_INC(pb_miss_locked);
+ XFS_STATS_INC(xb_miss_locked);
}
spin_unlock(&hash->bh_lock);
- return new_pb;
+ return new_bp;
found:
spin_unlock(&hash->bh_lock);
@@ -523,74 +522,72 @@ found:
* if this does not work then we need to drop the
* spinlock and do a hard attempt on the semaphore.
*/
- if (down_trylock(&pb->pb_sema)) {
- if (!(flags & PBF_TRYLOCK)) {
+ if (down_trylock(&bp->b_sema)) {
+ if (!(flags & XBF_TRYLOCK)) {
/* wait for buffer ownership */
- PB_TRACE(pb, "get_lock", 0);
- pagebuf_lock(pb);
- XFS_STATS_INC(pb_get_locked_waited);
+ XB_TRACE(bp, "get_lock", 0);
+ xfs_buf_lock(bp);
+ XFS_STATS_INC(xb_get_locked_waited);
} else {
/* We asked for a trylock and failed, no need
* to look at file offset and length here, we
- * know that this pagebuf at least overlaps our
- * pagebuf and is locked, therefore our buffer
- * either does not exist, or is this buffer
+ * know that this buffer at least overlaps our
+ * buffer and is locked, therefore our buffer
+ * either does not exist, or is this buffer.
*/
-
- pagebuf_rele(pb);
- XFS_STATS_INC(pb_busy_locked);
- return (NULL);
+ xfs_buf_rele(bp);
+ XFS_STATS_INC(xb_busy_locked);
+ return NULL;
}
} else {
/* trylock worked */
- PB_SET_OWNER(pb);
+ XB_SET_OWNER(bp);
}
- if (pb->pb_flags & PBF_STALE) {
- ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);
- pb->pb_flags &= PBF_MAPPED;
+ if (bp->b_flags & XBF_STALE) {
+ ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
+ bp->b_flags &= XBF_MAPPED;
}
- PB_TRACE(pb, "got_lock", 0);
- XFS_STATS_INC(pb_get_locked);
- return (pb);
+ XB_TRACE(bp, "got_lock", 0);
+ XFS_STATS_INC(xb_get_locked);
+ return bp;
}
/*
- * xfs_buf_get_flags assembles a buffer covering the specified range.
- *
+ * Assembles a buffer covering the specified range.
* Storage in memory for all portions of the buffer will be allocated,
* although backing storage may not be.
*/
xfs_buf_t *
-xfs_buf_get_flags( /* allocate a buffer */
+xfs_buf_get_flags(
xfs_buftarg_t *target,/* target for buffer */
- loff_t ioff, /* starting offset of range */
+ xfs_off_t ioff, /* starting offset of range */
size_t isize, /* length of range */
- page_buf_flags_t flags) /* PBF_TRYLOCK */
+ xfs_buf_flags_t flags)
{
- xfs_buf_t *pb, *new_pb;
+ xfs_buf_t *bp, *new_bp;
int error = 0, i;
- new_pb = pagebuf_allocate(flags);
- if (unlikely(!new_pb))
+ new_bp = xfs_buf_allocate(flags);
+ if (unlikely(!new_bp))
return NULL;
- pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
- if (pb == new_pb) {
- error = _pagebuf_lookup_pages(pb, flags);
+ bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
+ if (bp == new_bp) {
+ error = _xfs_buf_lookup_pages(bp, flags);
if (error)
goto no_buffer;
} else {
- pagebuf_deallocate(new_pb);
- if (unlikely(pb == NULL))
+ xfs_buf_deallocate(new_bp);
+ if (unlikely(bp == NULL))
return NULL;
}
- for (i = 0; i < pb->pb_page_count; i++)
- mark_page_accessed(pb->pb_pages[i]);
+ for (i = 0; i < bp->b_page_count; i++)
+ mark_page_accessed(bp->b_pages[i]);
- if (!(pb->pb_flags & PBF_MAPPED)) {
- error = _pagebuf_map_pages(pb, flags);
+ if (!(bp->b_flags & XBF_MAPPED)) {
+ error = _xfs_buf_map_pages(bp, flags);
if (unlikely(error)) {
printk(KERN_WARNING "%s: failed to map pages\n",
__FUNCTION__);
@@ -598,97 +595,97 @@ xfs_buf_get_flags( /* allocate a buffer */
}
}
- XFS_STATS_INC(pb_get);
+ XFS_STATS_INC(xb_get);
/*
* Always fill in the block number now, the mapped cases can do
* their own overlay of this later.
*/
- pb->pb_bn = ioff;
- pb->pb_count_desired = pb->pb_buffer_length;
+ bp->b_bn = ioff;
+ bp->b_count_desired = bp->b_buffer_length;
- PB_TRACE(pb, "get", (unsigned long)flags);
- return pb;
+ XB_TRACE(bp, "get", (unsigned long)flags);
+ return bp;
no_buffer:
- if (flags & (PBF_LOCK | PBF_TRYLOCK))
- pagebuf_unlock(pb);
- pagebuf_rele(pb);
+ if (flags & (XBF_LOCK | XBF_TRYLOCK))
+ xfs_buf_unlock(bp);
+ xfs_buf_rele(bp);
return NULL;
}
xfs_buf_t *
xfs_buf_read_flags(
xfs_buftarg_t *target,
- loff_t ioff,
+ xfs_off_t ioff,
size_t isize,
- page_buf_flags_t flags)
+ xfs_buf_flags_t flags)
{
- xfs_buf_t *pb;
-
- flags |= PBF_READ;
-
- pb = xfs_buf_get_flags(target, ioff, isize, flags);
- if (pb) {
- if (!XFS_BUF_ISDONE(pb)) {
- PB_TRACE(pb, "read", (unsigned long)flags);
- XFS_STATS_INC(pb_get_read);
- pagebuf_iostart(pb, flags);
- } else if (flags & PBF_ASYNC) {
- PB_TRACE(pb, "read_async", (unsigned long)flags);
+ xfs_buf_t *bp;
+
+ flags |= XBF_READ;
+
+ bp = xfs_buf_get_flags(target, ioff, isize, flags);
+ if (bp) {
+ if (!XFS_BUF_ISDONE(bp)) {
+ XB_TRACE(bp, "read", (unsigned long)flags);
+ XFS_STATS_INC(xb_get_read);
+ xfs_buf_iostart(bp, flags);
+ } else if (flags & XBF_ASYNC) {
+ XB_TRACE(bp, "read_async", (unsigned long)flags);
/*
* Read ahead call which is already satisfied,
* drop the buffer
*/
goto no_buffer;
} else {
- PB_TRACE(pb, "read_done", (unsigned long)flags);
+ XB_TRACE(bp, "read_done", (unsigned long)flags);
/* We do not want read in the flags */
- pb->pb_flags &= ~PBF_READ;
+ bp->b_flags &= ~XBF_READ;
}
}
- return pb;
+ return bp;
no_buffer:
- if (flags & (PBF_LOCK | PBF_TRYLOCK))
- pagebuf_unlock(pb);
- pagebuf_rele(pb);
+ if (flags & (XBF_LOCK | XBF_TRYLOCK))
+ xfs_buf_unlock(bp);
+ xfs_buf_rele(bp);
return NULL;
}
/*
- * If we are not low on memory then do the readahead in a deadlock
- * safe manner.
+ * If we are not low on memory then do the readahead in a deadlock
+ * safe manner.
*/
void
-pagebuf_readahead(
+xfs_buf_readahead(
xfs_buftarg_t *target,
- loff_t ioff,
+ xfs_off_t ioff,
size_t isize,
- page_buf_flags_t flags)
+ xfs_buf_flags_t flags)
{
struct backing_dev_info *bdi;
- bdi = target->pbr_mapping->backing_dev_info;
+ bdi = target->bt_mapping->backing_dev_info;
if (bdi_read_congested(bdi))
return;
- flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
+ flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
xfs_buf_read_flags(target, ioff, isize, flags);
}
xfs_buf_t *
-pagebuf_get_empty(
+xfs_buf_get_empty(
size_t len,
xfs_buftarg_t *target)
{
- xfs_buf_t *pb;
+ xfs_buf_t *bp;
- pb = pagebuf_allocate(0);
- if (pb)
- _pagebuf_initialize(pb, target, 0, len, 0);
- return pb;
+ bp = xfs_buf_allocate(0);
+ if (bp)
+ _xfs_buf_initialize(bp, target, 0, len, 0);
+ return bp;
}
static inline struct page *
@@ -704,8 +701,8 @@ mem_to_page(
}
int
-pagebuf_associate_memory(
- xfs_buf_t *pb,
+xfs_buf_associate_memory(
+ xfs_buf_t *bp,
void *mem,
size_t len)
{
@@ -722,40 +719,40 @@ pagebuf_associate_memory(
page_count++;
/* Free any previous set of page pointers */
- if (pb->pb_pages)
- _pagebuf_free_pages(pb);
+ if (bp->b_pages)
+ _xfs_buf_free_pages(bp);
- pb->pb_pages = NULL;
- pb->pb_addr = mem;
+ bp->b_pages = NULL;
+ bp->b_addr = mem;
- rval = _pagebuf_get_pages(pb, page_count, 0);
+ rval = _xfs_buf_get_pages(bp, page_count, 0);
if (rval)
return rval;
- pb->pb_offset = offset;
+ bp->b_offset = offset;
ptr = (size_t) mem & PAGE_CACHE_MASK;
end = PAGE_CACHE_ALIGN((size_t) mem + len);
end_cur = end;
/* set up first page */
- pb->pb_pages[0] = mem_to_page(mem);
+ bp->b_pages[0] = mem_to_page(mem);
ptr += PAGE_CACHE_SIZE;
- pb->pb_page_count = ++i;
+ bp->b_page_count = ++i;
while (ptr < end) {
- pb->pb_pages[i] = mem_to_page((void *)ptr);
- pb->pb_page_count = ++i;
+ bp->b_pages[i] = mem_to_page((void *)ptr);
+ bp->b_page_count = ++i;
ptr += PAGE_CACHE_SIZE;
}
- pb->pb_locked = 0;
+ bp->b_locked = 0;
- pb->pb_count_desired = pb->pb_buffer_length = len;
- pb->pb_flags |= PBF_MAPPED;
+ bp->b_count_desired = bp->b_buffer_length = len;
+ bp->b_flags |= XBF_MAPPED;
return 0;
}
xfs_buf_t *
-pagebuf_get_no_daddr(
+xfs_buf_get_noaddr(
size_t len,
xfs_buftarg_t *target)
{
@@ -764,10 +761,10 @@ pagebuf_get_no_daddr(
void *data;
int error;
- bp = pagebuf_allocate(0);
+ bp = xfs_buf_allocate(0);
if (unlikely(bp == NULL))
goto fail;
- _pagebuf_initialize(bp, target, 0, len, 0);
+ _xfs_buf_initialize(bp, target, 0, len, 0);
try_again:
data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
@@ -776,78 +773,73 @@ pagebuf_get_no_daddr(
/* check whether alignment matches.. */
if ((__psunsigned_t)data !=
- ((__psunsigned_t)data & ~target->pbr_smask)) {
+ ((__psunsigned_t)data & ~target->bt_smask)) {
/* .. else double the size and try again */
kmem_free(data, malloc_len);
malloc_len <<= 1;
goto try_again;
}
- error = pagebuf_associate_memory(bp, data, len);
+ error = xfs_buf_associate_memory(bp, data, len);
if (error)
goto fail_free_mem;
- bp->pb_flags |= _PBF_KMEM_ALLOC;
+ bp->b_flags |= _XBF_KMEM_ALLOC;
- pagebuf_unlock(bp);
+ xfs_buf_unlock(bp);
- PB_TRACE(bp, "no_daddr", data);
+ XB_TRACE(bp, "no_daddr", data);
return bp;
fail_free_mem:
kmem_free(data, malloc_len);
fail_free_buf:
- pagebuf_free(bp);
+ xfs_buf_free(bp);
fail:
return NULL;
}
/*
- * pagebuf_hold
- *
* Increment reference count on buffer, to hold the buffer concurrently
* with another thread which may release (free) the buffer asynchronously.
- *
* Must hold the buffer already to call this function.
*/
void
-pagebuf_hold(
- xfs_buf_t *pb)
+xfs_buf_hold(
+ xfs_buf_t *bp)
{
- atomic_inc(&pb->pb_hold);
- PB_TRACE(pb, "hold", 0);
+ atomic_inc(&bp->b_hold);
+ XB_TRACE(bp, "hold", 0);
}
/*
- * pagebuf_rele
- *
- * pagebuf_rele releases a hold on the specified buffer. If the
- * the hold count is 1, pagebuf_rele calls pagebuf_free.
+ * Releases a hold on the specified buffer. If the
+ * the hold count is 1, calls xfs_buf_free.
*/
void
-pagebuf_rele(
- xfs_buf_t *pb)
+xfs_buf_rele(
+ xfs_buf_t *bp)
{
- xfs_bufhash_t *hash = pb->pb_hash;
+ xfs_bufhash_t *hash = bp->b_hash;
- PB_TRACE(pb, "rele", pb->pb_relse);
+ XB_TRACE(bp, "rele", bp->b_relse);
- if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
- if (pb->pb_relse) {
- atomic_inc(&pb->pb_hold);
+ if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
+ if (bp->b_relse) {
+ atomic_inc(&bp->b_hold);
spin_unlock(&hash->bh_lock);
- (*(pb->pb_relse)) (pb);
- } else if (pb->pb_flags & PBF_FS_MANAGED) {
+ (*(bp->b_relse)) (bp);
+ } else if (bp->b_flags & XBF_FS_MANAGED) {
spin_unlock(&hash->bh_lock);
} else {
- ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)));
- list_del_init(&pb->pb_hash_list);
+ ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
+ list_del_init(&bp->b_hash_list);
spin_unlock(&hash->bh_lock);
- pagebuf_free(pb);
+ xfs_buf_free(bp);
}
} else {
/*
* Catch reference count leaks
*/
- ASSERT(atomic_read(&pb->pb_hold) >= 0);
+ ASSERT(atomic_read(&bp->b_hold) >= 0);
}
}
@@ -863,168 +855,122 @@ pagebuf_rele(
*/
/*
- * pagebuf_cond_lock
- *
- * pagebuf_cond_lock locks a buffer object, if it is not already locked.
- * Note that this in no way
- * locks the underlying pages, so it is only useful for synchronizing
- * concurrent use of page buffer objects, not for synchronizing independent
- * access to the underlying pages.
+ * Locks a buffer object, if it is not already locked.
+ * Note that this in no way locks the underlying pages, so it is only
+ * useful for synchronizing concurrent use of buffer objects, not for
+ * synchronizing independent access to the underlying pages.
*/
int
-pagebuf_cond_lock( /* lock buffer, if not locked */
- /* returns -EBUSY if locked) */
- xfs_buf_t *pb)
+xfs_buf_cond_lock(
+ xfs_buf_t *bp)
{
int locked;
- locked = down_trylock(&pb->pb_sema) == 0;
+ locked = down_trylock(&bp->b_sema) == 0;
if (locked) {
- PB_SET_OWNER(pb);
+ XB_SET_OWNER(bp);
}
- PB_TRACE(pb, "cond_lock", (long)locked);
- return(locked ? 0 : -EBUSY);
+ XB_TRACE(bp, "cond_lock", (long)locked);
+ return locked ? 0 : -EBUSY;
}
#if defined(DEBUG) || defined(XFS_BLI_TRACE)
-/*
- * pagebuf_lock_value
- *
- * Return lock value for a pagebuf
- */
int
-pagebuf_lock_value(
- xfs_buf_t *pb)
+xfs_buf_lock_value(
+ xfs_buf_t *bp)
{
- return(atomic_read(&pb->pb_sema.count));
+ return atomic_read(&bp->b_sema.count);
}
#endif
/*
- * pagebuf_lock
- *
- * pagebuf_lock locks a buffer object. Note that this in no way
- * locks the underlying pages, so it is only useful for synchronizing
- * concurrent use of page buffer objects, not for synchronizing independent
- * access to the underlying pages.
+ * Locks a buffer object.
+ * Note that this in no way locks the underlying pages, so it is only
+ * useful for synchronizing concurrent use of buffer objects, not for
+ * synchronizing independent access to the underlying pages.
*/
-int
-pagebuf_lock(
- xfs_buf_t *pb)
+void
+xfs_buf_lock(
+ xfs_buf_t *bp)
{
- PB_TRACE(pb, "lock", 0);
- if (atomic_read(&pb->pb_io_remaining))
- blk_run_address_space(pb->pb_target->pbr_mapping);
- down(&pb->pb_sema);
- PB_SET_OWNER(pb);
- PB_TRACE(pb, "locked", 0);
- return 0;
+ XB_TRACE(bp, "lock", 0);
+ if (atomic_read(&bp->b_io_remaining))
+ blk_run_address_space(bp->b_target->bt_mapping);
+ down(&bp->b_sema);
+ XB_SET_OWNER(bp);
+ XB_TRACE(bp, "locked", 0);
}
/*
- * pagebuf_unlock
- *
- * pagebuf_unlock releases the lock on the buffer object created by
- * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
- * created by pagebuf_pin).
- *
+ * Releases the lock on the buffer object.
* If the buffer is marked delwri but is not queued, do so before we
- * unlock the buffer as we need to set flags correctly. We also need to
+ * unlock the buffer as we need to set flags correctly. We also need to
* take a reference for the delwri queue because the unlocker is going to
* drop their's and they don't know we just queued it.
*/
void
-pagebuf_unlock( /* unlock buffer */
- xfs_buf_t *pb) /* buffer to unlock */
+xfs_buf_unlock(
+ xfs_buf_t *bp)
{
- if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {
- atomic_inc(&pb->pb_hold);
- pb->pb_flags |= PBF_ASYNC;
- pagebuf_delwri_queue(pb, 0);
+ if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
+ atomic_inc(&bp->b_hold);
+ bp->b_flags |= XBF_ASYNC;
+ xfs_buf_delwri_queue(bp, 0);
}
- PB_CLEAR_OWNER(pb);
- up(&pb->pb_sema);
- PB_TRACE(pb, "unlock", 0);
+ XB_CLEAR_OWNER(bp);
+ up(&bp->b_sema);
+ XB_TRACE(bp, "unlock", 0);
}
/*
* Pinning Buffer Storage in Memory
- */
-
-/*
- * pagebuf_pin
- *
- * pagebuf_pin locks all of the memory represented by a buffer in
- * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for
- * the same or different buffers affecting a given page, will
- * properly count the number of outstanding "pin" requests. The
- * buffer may be released after the pagebuf_pin and a different
- * buffer used when calling pagebuf_unpin, if desired.
- * pagebuf_pin should be used by the file system when it wants be
- * assured that no attempt will be made to force the affected
- * memory to disk. It does not assure that a given logical page
- * will not be moved to a different physical page.
+ * Ensure that no attempt to force a buffer to disk will succeed.
*/
void
-pagebuf_pin(
- xfs_buf_t *pb)
+xfs_buf_pin(
+ xfs_buf_t *bp)
{
- atomic_inc(&pb->pb_pin_count);
- PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
+ atomic_inc(&bp->b_pin_count);
+ XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
}
-/*
- * pagebuf_unpin
- *
- * pagebuf_unpin reverses the locking of memory performed by
- * pagebuf_pin. Note that both functions affected the logical
- * pages associated with the buffer, not the buffer itself.
- */
void
-pagebuf_unpin(
- xfs_buf_t *pb)
+xfs_buf_unpin(
+ xfs_buf_t *bp)
{
- if (atomic_dec_and_test(&pb->pb_pin_count)) {
- wake_up_all(&pb->pb_waiters);
- }
- PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
+ if (atomic_dec_and_test(&bp->b_pin_count))
+ wake_up_all(&bp->b_waiters);
+ XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
}
int
-pagebuf_ispin(
- xfs_buf_t *pb)
+xfs_buf_ispin(
+ xfs_buf_t *bp)
{
- return atomic_read(&pb->pb_pin_count);
+ return atomic_read(&bp->b_pin_count);
}
-/*
- * pagebuf_wait_unpin
- *
- * pagebuf_wait_unpin waits until all of the memory associated
- * with the buffer is not longer locked in memory. It returns
- * immediately if none of the affected pages are locked.
- */
-static inline void
-_pagebuf_wait_unpin(
- xfs_buf_t *pb)
+STATIC void
+xfs_buf_wait_unpin(
+ xfs_buf_t *bp)
{
DECLARE_WAITQUEUE (wait, current);
- if (atomic_read(&pb->pb_pin_count) == 0)
+ if (atomic_read(&bp->b_pin_count) == 0)
return;
- add_wait_queue(&pb->pb_waiters, &wait);
+ add_wait_queue(&bp->b_waiters, &wait);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&pb->pb_pin_count) == 0)
+ if (atomic_read(&bp->b_pin_count) == 0)
break;
- if (atomic_read(&pb->pb_io_remaining))
- blk_run_address_space(pb->pb_target->pbr_mapping);
+ if (atomic_read(&bp->b_io_remaining))
+ blk_run_address_space(bp->b_target->bt_mapping);
schedule();
}
- remove_wait_queue(&pb->pb_waiters, &wait);
+ remove_wait_queue(&bp->b_waiters, &wait);
set_current_state(TASK_RUNNING);
}
@@ -1032,241 +978,216 @@ _pagebuf_wait_unpin(
* Buffer Utility Routines
*/
-/*
- * pagebuf_iodone
- *
- * pagebuf_iodone marks a buffer for which I/O is in progress
- * done with respect to that I/O. The pb_iodone routine, if
- * present, will be called as a side-effect.
- */
STATIC void
-pagebuf_iodone_work(
+xfs_buf_iodone_work(
void *v)
{
xfs_buf_t *bp = (xfs_buf_t *)v;
- if (bp->pb_iodone)
- (*(bp->pb_iodone))(bp);
- else if (bp->pb_flags & PBF_ASYNC)
+ if (bp->b_iodone)
+ (*(bp->b_iodone))(bp);
+ else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
}
void
-pagebuf_iodone(
- xfs_buf_t *pb,
+xfs_buf_ioend(
+ xfs_buf_t *bp,
int schedule)
{
- pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
- if (pb->pb_error == 0)
- pb->pb_flags |= PBF_DONE;
+ bp->b_flags &= ~(XBF_READ | XBF_WRITE);
+ if (bp->b_error == 0)
+ bp->b_flags |= XBF_DONE;
- PB_TRACE(pb, "iodone", pb->pb_iodone);
+ XB_TRACE(bp, "iodone", bp->b_iodone);
- if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
+ if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
if (schedule) {
- INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
- queue_work(xfslogd_workqueue, &pb->pb_iodone_work);
+ INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
+ queue_work(xfslogd_workqueue, &bp->b_iodone_work);
} else {
- pagebuf_iodone_work(pb);
+ xfs_buf_iodone_work(bp);
}
} else {
- up(&pb->pb_iodonesema);
+ up(&bp->b_iodonesema);
}
}
-/*
- * pagebuf_ioerror
- *
- * pagebuf_ioerror sets the error code for a buffer.
- */
void
-pagebuf_ioerror( /* mark/clear buffer error flag */
- xfs_buf_t *pb, /* buffer to mark */
- int error) /* error to store (0 if none) */
+xfs_buf_ioerror(
+ xfs_buf_t *bp,
+ int error)
{
ASSERT(error >= 0 && error <= 0xffff);
- pb->pb_error = (unsigned short)error;
- PB_TRACE(pb, "ioerror", (unsigned long)error);
+ bp->b_error = (unsigned short)error;
+ XB_TRACE(bp, "ioerror", (unsigned long)error);
}
/*
- * pagebuf_iostart
- *
- * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
- * If necessary, it will arrange for any disk space allocation required,
- * and it will break up the request if the block mappings require it.
- * The pb_iodone routine in the buffer supplied will only be called
+ * Initiate I/O on a buffer, based on the flags supplied.
+ * The b_iodone routine in the buffer supplied will only be called
* when all of the subsidiary I/O requests, if any, have been completed.
- * pagebuf_iostart calls the pagebuf_ioinitiate routine or
- * pagebuf_iorequest, if the former routine is not defined, to start
- * the I/O on a given low-level request.
*/
int
-pagebuf_iostart( /* start I/O on a buffer */
- xfs_buf_t *pb, /* buffer to start */
- page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */
- /* PBF_WRITE, PBF_DELWRI, */
- /* PBF_DONT_BLOCK */
+xfs_buf_iostart(
+ xfs_buf_t *bp,
+ xfs_buf_flags_t flags)
{
int status = 0;
- PB_TRACE(pb, "iostart", (unsigned long)flags);
+ XB_TRACE(bp, "iostart", (unsigned long)flags);
- if (flags & PBF_DELWRI) {
- pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
- pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);
- pagebuf_delwri_queue(pb, 1);
+ if (flags & XBF_DELWRI) {
+ bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
+ bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
+ xfs_buf_delwri_queue(bp, 1);
return status;
}
- pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
- PBF_READ_AHEAD | _PBF_RUN_QUEUES);
- pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
- PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+ bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
+ XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+ bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
+ XBF_READ_AHEAD | _XBF_RUN_QUEUES);
- BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
+ BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
/* For writes allow an alternate strategy routine to precede
* the actual I/O request (which may not be issued at all in
* a shutdown situation, for example).
*/
- status = (flags & PBF_WRITE) ?
- pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
+ status = (flags & XBF_WRITE) ?
+ xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
/* Wait for I/O if we are not an async request.
* Note: async I/O request completion will release the buffer,
* and that can already be done by this point. So using the
* buffer pointer from here on, after async I/O, is invalid.
*/
- if (!status && !(flags & PBF_ASYNC))
- status = pagebuf_iowait(pb);
+ if (!status && !(flags & XBF_ASYNC))
+ status = xfs_buf_iowait(bp);
return status;
}
-/*
- * Helper routine for pagebuf_iorequest
- */
-
STATIC __inline__ int
-_pagebuf_iolocked(
- xfs_buf_t *pb)
+_xfs_buf_iolocked(
+ xfs_buf_t *bp)
{
- ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
- if (pb->pb_flags & PBF_READ)
- return pb->pb_locked;
+ ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
+ if (bp->b_flags & XBF_READ)
+ return bp->b_locked;
return 0;
}
STATIC __inline__ void
-_pagebuf_iodone(
- xfs_buf_t *pb,
+_xfs_buf_ioend(
+ xfs_buf_t *bp,
int schedule)
{
- if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
- pb->pb_locked = 0;
- pagebuf_iodone(pb, schedule);
+ if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
+ bp->b_locked = 0;
+ xfs_buf_ioend(bp, schedule);
}
}
STATIC int
-bio_end_io_pagebuf(
+xfs_buf_bio_end_io(
struct bio *bio,
unsigned int bytes_done,
int error)
{
- xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private;
- unsigned int blocksize = pb->pb_target->pbr_bsize;
+ xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
+ unsigned int blocksize = bp->b_target->bt_bsize;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
if (bio->bi_size)
return 1;
if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- pb->pb_error = EIO;
+ bp->b_error = EIO;
do {
struct page *page = bvec->bv_page;
- if (unlikely(pb->pb_error)) {
- if (pb->pb_flags & PBF_READ)
+ if (unlikely(bp->b_error)) {
+ if (bp->b_flags & XBF_READ)
ClearPageUptodate(page);
SetPageError(page);
- } else if (blocksize == PAGE_CACHE_SIZE) {
+ } else if (blocksize >= PAGE_CACHE_SIZE) {
SetPageUptodate(page);
} else if (!PagePrivate(page) &&
- (pb->pb_flags & _PBF_PAGE_CACHE)) {
+ (bp->b_flags & _XBF_PAGE_CACHE)) {
set_page_region(page, bvec->bv_offset, bvec->bv_len);
}
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
- if (_pagebuf_iolocked(pb)) {
+ if (_xfs_buf_iolocked(bp)) {
unlock_page(page);
}
} while (bvec >= bio->bi_io_vec);
- _pagebuf_iodone(pb, 1);
+ _xfs_buf_ioend(bp, 1);
bio_put(bio);
return 0;
}
STATIC void
-_pagebuf_ioapply(
- xfs_buf_t *pb)
+_xfs_buf_ioapply(
+ xfs_buf_t *bp)
{
int i, rw, map_i, total_nr_pages, nr_pages;
struct bio *bio;
- int offset = pb->pb_offset;
- int size = pb->pb_count_desired;
- sector_t sector = pb->pb_bn;
- unsigned int blocksize = pb->pb_target->pbr_bsize;
- int locking = _pagebuf_iolocked(pb);
+ int offset = bp->b_offset;
+ int size = bp->b_count_desired;
+ sector_t sector = bp->b_bn;
+ unsigned int blocksize = bp->b_target->bt_bsize;
+ int locking = _xfs_buf_iolocked(bp);
- total_nr_pages = pb->pb_page_count;
+ total_nr_pages = bp->b_page_count;
map_i = 0;
- if (pb->pb_flags & _PBF_RUN_QUEUES) {
- pb->pb_flags &= ~_PBF_RUN_QUEUES;
- rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC;
+ if (bp->b_flags & _XBF_RUN_QUEUES) {
+ bp->b_flags &= ~_XBF_RUN_QUEUES;
+ rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
} else {
- rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
+ rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
}
- if (pb->pb_flags & PBF_ORDERED) {
- ASSERT(!(pb->pb_flags & PBF_READ));
+ if (bp->b_flags & XBF_ORDERED) {
+ ASSERT(!(bp->b_flags & XBF_READ));
rw = WRITE_BARRIER;
}
- /* Special code path for reading a sub page size pagebuf in --
+ /* Special code path for reading a sub page size buffer in --
* we populate up the whole page, and hence the other metadata
* in the same page. This optimization is only valid when the
- * filesystem block size and the page size are equal.
+ * filesystem block size is not smaller than the page size.
*/
- if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
- (pb->pb_flags & PBF_READ) && locking &&
- (blocksize == PAGE_CACHE_SIZE)) {
+ if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
+ (bp->b_flags & XBF_READ) && locking &&
+ (blocksize >= PAGE_CACHE_SIZE)) {
bio = bio_alloc(GFP_NOIO, 1);
- bio->bi_bdev = pb->pb_target->pbr_bdev;
+ bio->bi_bdev = bp->b_target->bt_bdev;
bio->bi_sector = sector - (offset >> BBSHIFT);
- bio->bi_end_io = bio_end_io_pagebuf;
- bio->bi_private = pb;
+ bio->bi_end_io = xfs_buf_bio_end_io;
+ bio->bi_private = bp;
- bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
+ bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
size = 0;
- atomic_inc(&pb->pb_io_remaining);
+ atomic_inc(&bp->b_io_remaining);
goto submit_io;
}
/* Lock down the pages which we need to for the request */
- if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
+ if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
for (i = 0; size; i++) {
int nbytes = PAGE_CACHE_SIZE - offset;
- struct page *page = pb->pb_pages[i];
+ struct page *page = bp->b_pages[i];
if (nbytes > size)
nbytes = size;
@@ -1276,30 +1197,30 @@ _pagebuf_ioapply(
size -= nbytes;
offset = 0;
}
- offset = pb->pb_offset;
- size = pb->pb_count_desired;
+ offset = bp->b_offset;
+ size = bp->b_count_desired;
}
next_chunk:
- atomic_inc(&pb->pb_io_remaining);
+ atomic_inc(&bp->b_io_remaining);
nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
if (nr_pages > total_nr_pages)
nr_pages = total_nr_pages;
bio = bio_alloc(GFP_NOIO, nr_pages);
- bio->bi_bdev = pb->pb_target->pbr_bdev;
+ bio->bi_bdev = bp->b_target->bt_bdev;
bio->bi_sector = sector;
- bio->bi_end_io = bio_end_io_pagebuf;
- bio->bi_private = pb;
+ bio->bi_end_io = xfs_buf_bio_end_io;
+ bio->bi_private = bp;
for (; size && nr_pages; nr_pages--, map_i++) {
- int nbytes = PAGE_CACHE_SIZE - offset;
+ int rbytes, nbytes = PAGE_CACHE_SIZE - offset;
if (nbytes > size)
nbytes = size;
- if (bio_add_page(bio, pb->pb_pages[map_i],
- nbytes, offset) < nbytes)
+ rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+ if (rbytes < nbytes)
break;
offset = 0;
@@ -1315,107 +1236,102 @@ submit_io:
goto next_chunk;
} else {
bio_put(bio);
- pagebuf_ioerror(pb, EIO);
+ xfs_buf_ioerror(bp, EIO);
}
}
-/*
- * pagebuf_iorequest -- the core I/O request routine.
- */
int
-pagebuf_iorequest( /* start real I/O */
- xfs_buf_t *pb) /* buffer to convey to device */
+xfs_buf_iorequest(
+ xfs_buf_t *bp)
{
- PB_TRACE(pb, "iorequest", 0);
+ XB_TRACE(bp, "iorequest", 0);
- if (pb->pb_flags & PBF_DELWRI) {
- pagebuf_delwri_queue(pb, 1);
+ if (bp->b_flags & XBF_DELWRI) {
+ xfs_buf_delwri_queue(bp, 1);
return 0;
}
- if (pb->pb_flags & PBF_WRITE) {
- _pagebuf_wait_unpin(pb);
+ if (bp->b_flags & XBF_WRITE) {
+ xfs_buf_wait_unpin(bp);
}
- pagebuf_hold(pb);
+ xfs_buf_hold(bp);
/* Set the count to 1 initially, this will stop an I/O
* completion callout which happens before we have started
- * all the I/O from calling pagebuf_iodone too early.
+ * all the I/O from calling xfs_buf_ioend too early.
*/
- atomic_set(&pb->pb_io_remaining, 1);
- _pagebuf_ioapply(pb);
- _pagebuf_iodone(pb, 0);
+ atomic_set(&bp->b_io_remaining, 1);
+ _xfs_buf_ioapply(bp);
+ _xfs_buf_ioend(bp, 0);
- pagebuf_rele(pb);
+ xfs_buf_rele(bp);
return 0;
}
/*
- * pagebuf_iowait
- *
- * pagebuf_iowait waits for I/O to complete on the buffer supplied.
- * It returns immediately if no I/O is pending. In any case, it returns
- * the error code, if any, or 0 if there is no error.
+ * Waits for I/O to complete on the buffer supplied.
+ * It returns immediately if no I/O is pending.
+ * It returns the I/O error code, if any, or 0 if there was no error.
*/
int
-pagebuf_iowait(
- xfs_buf_t *pb)
+xfs_buf_iowait(
+ xfs_buf_t *bp)
{
- PB_TRACE(pb, "iowait", 0);
- if (atomic_read(&pb->pb_io_remaining))
- blk_run_address_space(pb->pb_target->pbr_mapping);
- down(&pb->pb_iodonesema);
- PB_TRACE(pb, "iowaited", (long)pb->pb_error);
- return pb->pb_error;
+ XB_TRACE(bp, "iowait", 0);
+ if (atomic_read(&bp->b_io_remaining))
+ blk_run_address_space(bp->b_target->bt_mapping);
+ down(&bp->b_iodonesema);
+ XB_TRACE(bp, "iowaited", (long)bp->b_error);
+ return bp->b_error;
}
-caddr_t
-pagebuf_offset(
- xfs_buf_t *pb,
+xfs_caddr_t
+xfs_buf_offset(
+ xfs_buf_t *bp,
size_t offset)
{
struct page *page;
- offset += pb->pb_offset;
+ if (bp->b_flags & XBF_MAPPED)
+ return XFS_BUF_PTR(bp) + offset;
- page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
- return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
+ offset += bp->b_offset;
+ page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
+ return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
}
/*
- * pagebuf_iomove
- *
* Move data into or out of a buffer.
*/
void
-pagebuf_iomove(
- xfs_buf_t *pb, /* buffer to process */
+xfs_buf_iomove(
+ xfs_buf_t *bp, /* buffer to process */
size_t boff, /* starting buffer offset */
size_t bsize, /* length to copy */
caddr_t data, /* data address */
- page_buf_rw_t mode) /* read/write flag */
+ xfs_buf_rw_t mode) /* read/write/zero flag */
{
size_t bend, cpoff, csize;
struct page *page;
bend = boff + bsize;
while (boff < bend) {
- page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
- cpoff = page_buf_poff(boff + pb->pb_offset);
+ page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
+ cpoff = xfs_buf_poff(boff + bp->b_offset);
csize = min_t(size_t,
- PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
+ PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
switch (mode) {
- case PBRW_ZERO:
+ case XBRW_ZERO:
memset(page_address(page) + cpoff, 0, csize);
break;
- case PBRW_READ:
+ case XBRW_READ:
memcpy(data, page_address(page) + cpoff, csize);
break;
- case PBRW_WRITE:
+ case XBRW_WRITE:
memcpy(page_address(page) + cpoff, data, csize);
}
@@ -1425,12 +1341,12 @@ pagebuf_iomove(
}
/*
- * Handling of buftargs.
+ * Handling of buffer targets (buftargs).
*/
/*
- * Wait for any bufs with callbacks that have been submitted but
- * have not yet returned... walk the hash list for the target.
+ * Wait for any bufs with callbacks that have been submitted but
+ * have not yet returned... walk the hash list for the target.
*/
void
xfs_wait_buftarg(
@@ -1444,15 +1360,15 @@ xfs_wait_buftarg(
hash = &btp->bt_hash[i];
again:
spin_lock(&hash->bh_lock);
- list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
- ASSERT(btp == bp->pb_target);
- if (!(bp->pb_flags & PBF_FS_MANAGED)) {
+ list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
+ ASSERT(btp == bp->b_target);
+ if (!(bp->b_flags & XBF_FS_MANAGED)) {
spin_unlock(&hash->bh_lock);
/*
* Catch superblock reference count leaks
* immediately
*/
- BUG_ON(bp->pb_bn == 0);
+ BUG_ON(bp->b_bn == 0);
delay(100);
goto again;
}
@@ -1462,9 +1378,9 @@ again:
}
/*
- * Allocate buffer hash table for a given target.
- * For devices containing metadata (i.e. not the log/realtime devices)
- * we need to allocate a much larger hash table.
+ * Allocate buffer hash table for a given target.
+ * For devices containing metadata (i.e. not the log/realtime devices)
+ * we need to allocate a much larger hash table.
*/
STATIC void
xfs_alloc_bufhash(
@@ -1487,11 +1403,34 @@ STATIC void
xfs_free_bufhash(
xfs_buftarg_t *btp)
{
- kmem_free(btp->bt_hash,
- (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+ kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
btp->bt_hash = NULL;
}
+/*
+ * buftarg list for delwrite queue processing
+ */
+STATIC LIST_HEAD(xfs_buftarg_list);
+STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
+
+STATIC void
+xfs_register_buftarg(
+ xfs_buftarg_t *btp)
+{
+ spin_lock(&xfs_buftarg_lock);
+ list_add(&btp->bt_list, &xfs_buftarg_list);
+ spin_unlock(&xfs_buftarg_lock);
+}
+
+STATIC void
+xfs_unregister_buftarg(
+ xfs_buftarg_t *btp)
+{
+ spin_lock(&xfs_buftarg_lock);
+ list_del(&btp->bt_list);
+ spin_unlock(&xfs_buftarg_lock);
+}
+
void
xfs_free_buftarg(
xfs_buftarg_t *btp,
@@ -1499,9 +1438,16 @@ xfs_free_buftarg(
{
xfs_flush_buftarg(btp, 1);
if (external)
- xfs_blkdev_put(btp->pbr_bdev);
+ xfs_blkdev_put(btp->bt_bdev);
xfs_free_bufhash(btp);
- iput(btp->pbr_mapping->host);
+ iput(btp->bt_mapping->host);
+
+ /* Unregister the buftarg first so that we don't get a
+ * wakeup finding a non-existent task
+ */
+ xfs_unregister_buftarg(btp);
+ kthread_stop(btp->bt_task);
+
kmem_free(btp, sizeof(*btp));
}
@@ -1512,11 +1458,11 @@ xfs_setsize_buftarg_flags(
unsigned int sectorsize,
int verbose)
{
- btp->pbr_bsize = blocksize;
- btp->pbr_sshift = ffs(sectorsize) - 1;
- btp->pbr_smask = sectorsize - 1;
+ btp->bt_bsize = blocksize;
+ btp->bt_sshift = ffs(sectorsize) - 1;
+ btp->bt_smask = sectorsize - 1;
- if (set_blocksize(btp->pbr_bdev, sectorsize)) {
+ if (set_blocksize(btp->bt_bdev, sectorsize)) {
printk(KERN_WARNING
"XFS: Cannot set_blocksize to %u on device %s\n",
sectorsize, XFS_BUFTARG_NAME(btp));
@@ -1536,10 +1482,10 @@ xfs_setsize_buftarg_flags(
}
/*
-* When allocating the initial buffer target we have not yet
-* read in the superblock, so don't know what sized sectors
-* are being used is at this early stage. Play safe.
-*/
+ * When allocating the initial buffer target we have not yet
+ * read in the superblock, so don't know what sized sectors
+ * are being used is at this early stage. Play safe.
+ */
STATIC int
xfs_setsize_buftarg_early(
xfs_buftarg_t *btp,
@@ -1587,10 +1533,30 @@ xfs_mapping_buftarg(
mapping->a_ops = &mapping_aops;
mapping->backing_dev_info = bdi;
mapping_set_gfp_mask(mapping, GFP_NOFS);
- btp->pbr_mapping = mapping;
+ btp->bt_mapping = mapping;
return 0;
}
+STATIC int
+xfs_alloc_delwrite_queue(
+ xfs_buftarg_t *btp)
+{
+ int error = 0;
+
+ INIT_LIST_HEAD(&btp->bt_list);
+ INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+ spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
+ btp->bt_flags = 0;
+ btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
+ if (IS_ERR(btp->bt_task)) {
+ error = PTR_ERR(btp->bt_task);
+ goto out_error;
+ }
+ xfs_register_buftarg(btp);
+out_error:
+ return error;
+}
+
xfs_buftarg_t *
xfs_alloc_buftarg(
struct block_device *bdev,
@@ -1600,12 +1566,14 @@ xfs_alloc_buftarg(
btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
- btp->pbr_dev = bdev->bd_dev;
- btp->pbr_bdev = bdev;
+ btp->bt_dev = bdev->bd_dev;
+ btp->bt_bdev = bdev;
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
if (xfs_mapping_buftarg(btp, bdev))
goto error;
+ if (xfs_alloc_delwrite_queue(btp))
+ goto error;
xfs_alloc_bufhash(btp, external);
return btp;
@@ -1616,83 +1584,81 @@ error:
/*
- * Pagebuf delayed write buffer handling
+ * Delayed write buffer handling
*/
-
-STATIC LIST_HEAD(pbd_delwrite_queue);
-STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
-
STATIC void
-pagebuf_delwri_queue(
- xfs_buf_t *pb,
+xfs_buf_delwri_queue(
+ xfs_buf_t *bp,
int unlock)
{
- PB_TRACE(pb, "delwri_q", (long)unlock);
- ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
- (PBF_DELWRI|PBF_ASYNC));
+ struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
+ spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
+
+ XB_TRACE(bp, "delwri_q", (long)unlock);
+ ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
- spin_lock(&pbd_delwrite_lock);
+ spin_lock(dwlk);
/* If already in the queue, dequeue and place at tail */
- if (!list_empty(&pb->pb_list)) {
- ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
- if (unlock) {
- atomic_dec(&pb->pb_hold);
- }
- list_del(&pb->pb_list);
+ if (!list_empty(&bp->b_list)) {
+ ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+ if (unlock)
+ atomic_dec(&bp->b_hold);
+ list_del(&bp->b_list);
}
- pb->pb_flags |= _PBF_DELWRI_Q;
- list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
- pb->pb_queuetime = jiffies;
- spin_unlock(&pbd_delwrite_lock);
+ bp->b_flags |= _XBF_DELWRI_Q;
+ list_add_tail(&bp->b_list, dwq);
+ bp->b_queuetime = jiffies;
+ spin_unlock(dwlk);
if (unlock)
- pagebuf_unlock(pb);
+ xfs_buf_unlock(bp);
}
void
-pagebuf_delwri_dequeue(
- xfs_buf_t *pb)
+xfs_buf_delwri_dequeue(
+ xfs_buf_t *bp)
{
+ spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
int dequeued = 0;
- spin_lock(&pbd_delwrite_lock);
- if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
- ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
- list_del_init(&pb->pb_list);
+ spin_lock(dwlk);
+ if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
+ ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+ list_del_init(&bp->b_list);
dequeued = 1;
}
- pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
- spin_unlock(&pbd_delwrite_lock);
+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+ spin_unlock(dwlk);
if (dequeued)
- pagebuf_rele(pb);
+ xfs_buf_rele(bp);
- PB_TRACE(pb, "delwri_dq", (long)dequeued);
+ XB_TRACE(bp, "delwri_dq", (long)dequeued);
}
STATIC void
-pagebuf_runall_queues(
+xfs_buf_runall_queues(
struct workqueue_struct *queue)
{
flush_workqueue(queue);
}
-/* Defines for pagebuf daemon */
-STATIC struct task_struct *xfsbufd_task;
-STATIC int xfsbufd_force_flush;
-STATIC int xfsbufd_force_sleep;
-
STATIC int
xfsbufd_wakeup(
int priority,
gfp_t mask)
{
- if (xfsbufd_force_sleep)
- return 0;
- xfsbufd_force_flush = 1;
- barrier();
- wake_up_process(xfsbufd_task);
+ xfs_buftarg_t *btp;
+
+ spin_lock(&xfs_buftarg_lock);
+ list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
+ if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
+ continue;
+ set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
+ wake_up_process(btp->bt_task);
+ }
+ spin_unlock(&xfs_buftarg_lock);
return 0;
}
@@ -1702,67 +1668,70 @@ xfsbufd(
{
struct list_head tmp;
unsigned long age;
- xfs_buftarg_t *target;
- xfs_buf_t *pb, *n;
+ xfs_buftarg_t *target = (xfs_buftarg_t *)data;
+ xfs_buf_t *bp, *n;
+ struct list_head *dwq = &target->bt_delwrite_queue;
+ spinlock_t *dwlk = &target->bt_delwrite_lock;
current->flags |= PF_MEMALLOC;
INIT_LIST_HEAD(&tmp);
do {
if (unlikely(freezing(current))) {
- xfsbufd_force_sleep = 1;
+ set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
refrigerator();
} else {
- xfsbufd_force_sleep = 0;
+ clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
}
schedule_timeout_interruptible(
xfs_buf_timer_centisecs * msecs_to_jiffies(10));
age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
- spin_lock(&pbd_delwrite_lock);
- list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
- PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
- ASSERT(pb->pb_flags & PBF_DELWRI);
-
- if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
- if (!xfsbufd_force_flush &&
+ spin_lock(dwlk);
+ list_for_each_entry_safe(bp, n, dwq, b_list) {
+ XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
+ ASSERT(bp->b_flags & XBF_DELWRI);
+
+ if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
+ if (!test_bit(XBT_FORCE_FLUSH,
+ &target->bt_flags) &&
time_before(jiffies,
- pb->pb_queuetime + age)) {
- pagebuf_unlock(pb);
+ bp->b_queuetime + age)) {
+ xfs_buf_unlock(bp);
break;
}
- pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
- pb->pb_flags |= PBF_WRITE;
- list_move(&pb->pb_list, &tmp);
+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+ bp->b_flags |= XBF_WRITE;
+ list_move(&bp->b_list, &tmp);
}
}
- spin_unlock(&pbd_delwrite_lock);
+ spin_unlock(dwlk);
while (!list_empty(&tmp)) {
- pb = list_entry(tmp.next, xfs_buf_t, pb_list);
- target = pb->pb_target;
+ bp = list_entry(tmp.next, xfs_buf_t, b_list);
+ ASSERT(target == bp->b_target);
- list_del_init(&pb->pb_list);
- pagebuf_iostrategy(pb);
+ list_del_init(&bp->b_list);
+ xfs_buf_iostrategy(bp);
- blk_run_address_space(target->pbr_mapping);
+ blk_run_address_space(target->bt_mapping);
}
if (as_list_len > 0)
purge_addresses();
- xfsbufd_force_flush = 0;
+ clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
} while (!kthread_should_stop());
return 0;
}
/*
- * Go through all incore buffers, and release buffers if they belong to
- * the given device. This is used in filesystem error handling to
- * preserve the consistency of its metadata.
+ * Go through all incore buffers, and release buffers if they belong to
+ * the given device. This is used in filesystem error handling to
+ * preserve the consistency of its metadata.
*/
int
xfs_flush_buftarg(
@@ -1770,73 +1739,72 @@ xfs_flush_buftarg(
int wait)
{
struct list_head tmp;
- xfs_buf_t *pb, *n;
+ xfs_buf_t *bp, *n;
int pincount = 0;
+ struct list_head *dwq = &target->bt_delwrite_queue;
+ spinlock_t *dwlk = &target->bt_delwrite_lock;
- pagebuf_runall_queues(xfsdatad_workqueue);
- pagebuf_runall_queues(xfslogd_workqueue);
+ xfs_buf_runall_queues(xfsdatad_workqueue);
+ xfs_buf_runall_queues(xfslogd_workqueue);
INIT_LIST_HEAD(&tmp);
- spin_lock(&pbd_delwrite_lock);
- list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
-
- if (pb->pb_target != target)
- continue;
-
- ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
- PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
- if (pagebuf_ispin(pb)) {
+ spin_lock(dwlk);
+ list_for_each_entry_safe(bp, n, dwq, b_list) {
+ ASSERT(bp->b_target == target);
+ ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
+ XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
+ if (xfs_buf_ispin(bp)) {
pincount++;
continue;
}
- list_move(&pb->pb_list, &tmp);
+ list_move(&bp->b_list, &tmp);
}
- spin_unlock(&pbd_delwrite_lock);
+ spin_unlock(dwlk);
/*
* Dropped the delayed write list lock, now walk the temporary list
*/
- list_for_each_entry_safe(pb, n, &tmp, pb_list) {
- pagebuf_lock(pb);
- pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
- pb->pb_flags |= PBF_WRITE;
+ list_for_each_entry_safe(bp, n, &tmp, b_list) {
+ xfs_buf_lock(bp);
+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+ bp->b_flags |= XBF_WRITE;
if (wait)
- pb->pb_flags &= ~PBF_ASYNC;
+ bp->b_flags &= ~XBF_ASYNC;
else
- list_del_init(&pb->pb_list);
+ list_del_init(&bp->b_list);
- pagebuf_iostrategy(pb);
+ xfs_buf_iostrategy(bp);
}
/*
* Remaining list items must be flushed before returning
*/
while (!list_empty(&tmp)) {
- pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+ bp = list_entry(tmp.next, xfs_buf_t, b_list);
- list_del_init(&pb->pb_list);
- xfs_iowait(pb);
- xfs_buf_relse(pb);
+ list_del_init(&bp->b_list);
+ xfs_iowait(bp);
+ xfs_buf_relse(bp);
}
if (wait)
- blk_run_address_space(target->pbr_mapping);
+ blk_run_address_space(target->bt_mapping);
return pincount;
}
int __init
-pagebuf_init(void)
+xfs_buf_init(void)
{
int error = -ENOMEM;
-#ifdef PAGEBUF_TRACE
- pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+#ifdef XFS_BUF_TRACE
+ xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
#endif
- pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
- if (!pagebuf_zone)
+ xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+ if (!xfs_buf_zone)
goto out_free_trace_buf;
xfslogd_workqueue = create_workqueue("xfslogd");
@@ -1847,42 +1815,33 @@ pagebuf_init(void)
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
- xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
- if (IS_ERR(xfsbufd_task)) {
- error = PTR_ERR(xfsbufd_task);
+ xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
+ if (!xfs_buf_shake)
goto out_destroy_xfsdatad_workqueue;
- }
-
- pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
- if (!pagebuf_shake)
- goto out_stop_xfsbufd;
return 0;
- out_stop_xfsbufd:
- kthread_stop(xfsbufd_task);
out_destroy_xfsdatad_workqueue:
destroy_workqueue(xfsdatad_workqueue);
out_destroy_xfslogd_workqueue:
destroy_workqueue(xfslogd_workqueue);
out_free_buf_zone:
- kmem_zone_destroy(pagebuf_zone);
+ kmem_zone_destroy(xfs_buf_zone);
out_free_trace_buf:
-#ifdef PAGEBUF_TRACE
- ktrace_free(pagebuf_trace_buf);
+#ifdef XFS_BUF_TRACE
+ ktrace_free(xfs_buf_trace_buf);
#endif
return error;
}
void
-pagebuf_terminate(void)
+xfs_buf_terminate(void)
{
- kmem_shake_deregister(pagebuf_shake);
- kthread_stop(xfsbufd_task);
+ kmem_shake_deregister(xfs_buf_shake);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
- kmem_zone_destroy(pagebuf_zone);
-#ifdef PAGEBUF_TRACE
- ktrace_free(pagebuf_trace_buf);
+ kmem_zone_destroy(xfs_buf_zone);
+#ifdef XFS_BUF_TRACE
+ ktrace_free(xfs_buf_trace_buf);
#endif
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 237a35b915d..4dd6592d5a4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -32,44 +32,47 @@
* Base types
*/
-#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
-
-#define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
-#define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
-#define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
-#define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum page_buf_rw_e {
- PBRW_READ = 1, /* transfer into target memory */
- PBRW_WRITE = 2, /* transfer from target memory */
- PBRW_ZERO = 3 /* Zero target memory */
-} page_buf_rw_t;
-
-
-typedef enum page_buf_flags_e { /* pb_flags values */
- PBF_READ = (1 << 0), /* buffer intended for reading from device */
- PBF_WRITE = (1 << 1), /* buffer intended for writing to device */
- PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */
- PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
- PBF_DONE = (1 << 5), /* all pages in the buffer uptodate */
- PBF_DELWRI = (1 << 6), /* buffer has dirty pages */
- PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
- PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
- PBF_ORDERED = (1 << 11), /* use ordered writes */
- PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
+#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
+
+#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
+#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum {
+ XBRW_READ = 1, /* transfer into target memory */
+ XBRW_WRITE = 2, /* transfer from target memory */
+ XBRW_ZERO = 3, /* Zero target memory */
+} xfs_buf_rw_t;
+
+typedef enum {
+ XBF_READ = (1 << 0), /* buffer intended for reading from device */
+ XBF_WRITE = (1 << 1), /* buffer intended for writing to device */
+ XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */
+ XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
+ XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */
+ XBF_DELWRI = (1 << 6), /* buffer has dirty pages */
+ XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
+ XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
+ XBF_ORDERED = (1 << 11), /* use ordered writes */
+ XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
/* flags used only as arguments to access routines */
- PBF_LOCK = (1 << 14), /* lock requested */
- PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
- PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
+ XBF_LOCK = (1 << 14), /* lock requested */
+ XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
+ XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
/* flags used only internally */
- _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
- _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
- _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
- _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
-} page_buf_flags_t;
+ _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
+ _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
+ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
+ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
+} xfs_buf_flags_t;
+typedef enum {
+ XBT_FORCE_SLEEP = (0 << 1),
+ XBT_FORCE_FLUSH = (1 << 1),
+} xfs_buftarg_flags_t;
typedef struct xfs_bufhash {
struct list_head bh_list;
@@ -77,477 +80,350 @@ typedef struct xfs_bufhash {
} xfs_bufhash_t;
typedef struct xfs_buftarg {
- dev_t pbr_dev;
- struct block_device *pbr_bdev;
- struct address_space *pbr_mapping;
- unsigned int pbr_bsize;
- unsigned int pbr_sshift;
- size_t pbr_smask;
-
- /* per-device buffer hash table */
+ dev_t bt_dev;
+ struct block_device *bt_bdev;
+ struct address_space *bt_mapping;
+ unsigned int bt_bsize;
+ unsigned int bt_sshift;
+ size_t bt_smask;
+
+ /* per device buffer hash table */
uint bt_hashmask;
uint bt_hashshift;
xfs_bufhash_t *bt_hash;
+
+ /* per device delwri queue */
+ struct task_struct *bt_task;
+ struct list_head bt_list;
+ struct list_head bt_delwrite_queue;
+ spinlock_t bt_delwrite_lock;
+ unsigned long bt_flags;
} xfs_buftarg_t;
/*
- * xfs_buf_t: Buffer structure for page cache-based buffers
+ * xfs_buf_t: Buffer structure for pagecache-based buffers
+ *
+ * This buffer structure is used by the pagecache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer.
*
- * This buffer structure is used by the page cache buffer management routines
- * to refer to an assembly of pages forming a logical buffer. The actual I/O
- * is performed with buffer_head structures, as required by drivers.
- *
- * The buffer structure is used on temporary basis only, and discarded when
- * released. The real data storage is recorded in the page cache. Metadata is
+ * The buffer structure is used on a temporary basis only, and discarded when
+ * released. The real data storage is recorded in the pagecache. Buffers are
* hashed to the block device on which the file system resides.
*/
struct xfs_buf;
+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
+typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
+typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
-/* call-back function on I/O completion */
-typedef void (*page_buf_iodone_t)(struct xfs_buf *);
-/* call-back function on I/O completion */
-typedef void (*page_buf_relse_t)(struct xfs_buf *);
-/* pre-write function */
-typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
-
-#define PB_PAGES 2
+#define XB_PAGES 2
typedef struct xfs_buf {
- struct semaphore pb_sema; /* semaphore for lockables */
- unsigned long pb_queuetime; /* time buffer was queued */
- atomic_t pb_pin_count; /* pin count */
- wait_queue_head_t pb_waiters; /* unpin waiters */
- struct list_head pb_list;
- page_buf_flags_t pb_flags; /* status flags */
- struct list_head pb_hash_list; /* hash table list */
- xfs_bufhash_t *pb_hash; /* hash table list start */
- xfs_buftarg_t *pb_target; /* buffer target (device) */
- atomic_t pb_hold; /* reference count */
- xfs_daddr_t pb_bn; /* block number for I/O */
- loff_t pb_file_offset; /* offset in file */
- size_t pb_buffer_length; /* size of buffer in bytes */
- size_t pb_count_desired; /* desired transfer size */
- void *pb_addr; /* virtual address of buffer */
- struct work_struct pb_iodone_work;
- atomic_t pb_io_remaining;/* #outstanding I/O requests */
- page_buf_iodone_t pb_iodone; /* I/O completion function */
- page_buf_relse_t pb_relse; /* releasing function */
- page_buf_bdstrat_t pb_strat; /* pre-write function */
- struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */
- void *pb_fspriv;
- void *pb_fspriv2;
- void *pb_fspriv3;
- unsigned short pb_error; /* error code on I/O */
- unsigned short pb_locked; /* page array is locked */
- unsigned int pb_page_count; /* size of page array */
- unsigned int pb_offset; /* page offset in first page */
- struct page **pb_pages; /* array of page pointers */
- struct page *pb_page_array[PB_PAGES]; /* inline pages */
-#ifdef PAGEBUF_LOCK_TRACKING
- int pb_last_holder;
+ struct semaphore b_sema; /* semaphore for lockables */
+ unsigned long b_queuetime; /* time buffer was queued */
+ atomic_t b_pin_count; /* pin count */
+ wait_queue_head_t b_waiters; /* unpin waiters */
+ struct list_head b_list;
+ xfs_buf_flags_t b_flags; /* status flags */
+ struct list_head b_hash_list; /* hash table list */
+ xfs_bufhash_t *b_hash; /* hash table list start */
+ xfs_buftarg_t *b_target; /* buffer target (device) */
+ atomic_t b_hold; /* reference count */
+ xfs_daddr_t b_bn; /* block number for I/O */
+ xfs_off_t b_file_offset; /* offset in file */
+ size_t b_buffer_length;/* size of buffer in bytes */
+ size_t b_count_desired;/* desired transfer size */
+ void *b_addr; /* virtual address of buffer */
+ struct work_struct b_iodone_work;
+ atomic_t b_io_remaining; /* #outstanding I/O requests */
+ xfs_buf_iodone_t b_iodone; /* I/O completion function */
+ xfs_buf_relse_t b_relse; /* releasing function */
+ xfs_buf_bdstrat_t b_strat; /* pre-write function */
+ struct semaphore b_iodonesema; /* Semaphore for I/O waiters */
+ void *b_fspriv;
+ void *b_fspriv2;
+ void *b_fspriv3;
+ unsigned short b_error; /* error code on I/O */
+ unsigned short b_locked; /* page array is locked */
+ unsigned int b_page_count; /* size of page array */
+ unsigned int b_offset; /* page offset in first page */
+ struct page **b_pages; /* array of page pointers */
+ struct page *b_page_array[XB_PAGES]; /* inline pages */
+#ifdef XFS_BUF_LOCK_TRACKING
+ int b_last_holder;
#endif
} xfs_buf_t;
/* Finding and Reading Buffers */
-
-extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */
- /* the block is in memory */
- xfs_buftarg_t *, /* inode for block */
- loff_t, /* starting offset of range */
- size_t, /* length of range */
- page_buf_flags_t, /* PBF_LOCK */
- xfs_buf_t *); /* newly allocated buffer */
-
+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
+ xfs_buf_flags_t, xfs_buf_t *);
#define xfs_incore(buftarg,blkno,len,lockit) \
- _pagebuf_find(buftarg, blkno ,len, lockit, NULL)
-
-extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */
- xfs_buftarg_t *, /* inode for buffer */
- loff_t, /* starting offset of range */
- size_t, /* length of range */
- page_buf_flags_t); /* PBF_LOCK, PBF_READ, */
- /* PBF_ASYNC */
+ _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
+extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+ xfs_buf_flags_t);
#define xfs_buf_get(target, blkno, len, flags) \
- xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
-
-extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */
- xfs_buftarg_t *, /* inode for buffer */
- loff_t, /* starting offset of range */
- size_t, /* length of range */
- page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */
+ xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
+extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+ xfs_buf_flags_t);
#define xfs_buf_read(target, blkno, len, flags) \
- xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
-
-extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
- /* no memory or disk address */
- size_t len,
- xfs_buftarg_t *); /* mount point "fake" inode */
-
-extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */
- /* without disk address */
- size_t len,
- xfs_buftarg_t *); /* mount point "fake" inode */
-
-extern int pagebuf_associate_memory(
- xfs_buf_t *,
- void *,
- size_t);
-
-extern void pagebuf_hold( /* increment reference count */
- xfs_buf_t *); /* buffer to hold */
+ xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
-extern void pagebuf_readahead( /* read ahead into cache */
- xfs_buftarg_t *, /* target for buffer (or NULL) */
- loff_t, /* starting offset of range */
- size_t, /* length of range */
- page_buf_flags_t); /* additional read flags */
+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
+extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
+extern void xfs_buf_hold(xfs_buf_t *);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
+ xfs_buf_flags_t);
/* Releasing Buffers */
-
-extern void pagebuf_free( /* deallocate a buffer */
- xfs_buf_t *); /* buffer to deallocate */
-
-extern void pagebuf_rele( /* release hold on a buffer */
- xfs_buf_t *); /* buffer to release */
+extern void xfs_buf_free(xfs_buf_t *);
+extern void xfs_buf_rele(xfs_buf_t *);
/* Locking and Unlocking Buffers */
-
-extern int pagebuf_cond_lock( /* lock buffer, if not locked */
- /* (returns -EBUSY if locked) */
- xfs_buf_t *); /* buffer to lock */
-
-extern int pagebuf_lock_value( /* return count on lock */
- xfs_buf_t *); /* buffer to check */
-
-extern int pagebuf_lock( /* lock buffer */
- xfs_buf_t *); /* buffer to lock */
-
-extern void pagebuf_unlock( /* unlock buffer */
- xfs_buf_t *); /* buffer to unlock */
+extern int xfs_buf_cond_lock(xfs_buf_t *);
+extern int xfs_buf_lock_value(xfs_buf_t *);
+extern void xfs_buf_lock(xfs_buf_t *);
+extern void xfs_buf_unlock(xfs_buf_t *);
/* Buffer Read and Write Routines */
-
-extern void pagebuf_iodone( /* mark buffer I/O complete */
- xfs_buf_t *, /* buffer to mark */
- int); /* run completion locally, or in
- * a helper thread. */
-
-extern void pagebuf_ioerror( /* mark buffer in error (or not) */
- xfs_buf_t *, /* buffer to mark */
- int); /* error to store (0 if none) */
-
-extern int pagebuf_iostart( /* start I/O on a buffer */
- xfs_buf_t *, /* buffer to start */
- page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */
- /* PBF_READ, PBF_WRITE, */
- /* PBF_DELWRI */
-
-extern int pagebuf_iorequest( /* start real I/O */
- xfs_buf_t *); /* buffer to convey to device */
-
-extern int pagebuf_iowait( /* wait for buffer I/O done */
- xfs_buf_t *); /* buffer to wait on */
-
-extern void pagebuf_iomove( /* move data in/out of pagebuf */
- xfs_buf_t *, /* buffer to manipulate */
- size_t, /* starting buffer offset */
- size_t, /* length in buffer */
- caddr_t, /* data pointer */
- page_buf_rw_t); /* direction */
-
-static inline int pagebuf_iostrategy(xfs_buf_t *pb)
+extern void xfs_buf_ioend(xfs_buf_t *, int);
+extern void xfs_buf_ioerror(xfs_buf_t *, int);
+extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
+extern int xfs_buf_iorequest(xfs_buf_t *);
+extern int xfs_buf_iowait(xfs_buf_t *);
+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
+ xfs_buf_rw_t);
+
+static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
{
- return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+ return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
}
-static inline int pagebuf_geterror(xfs_buf_t *pb)
+static inline int xfs_buf_geterror(xfs_buf_t *bp)
{
- return pb ? pb->pb_error : ENOMEM;
+ return bp ? bp->b_error : ENOMEM;
}
/* Buffer Utility Routines */
-
-extern caddr_t pagebuf_offset( /* pointer at offset in buffer */
- xfs_buf_t *, /* buffer to offset into */
- size_t); /* offset */
+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
/* Pinning Buffer Storage in Memory */
-
-extern void pagebuf_pin( /* pin buffer in memory */
- xfs_buf_t *); /* buffer to pin */
-
-extern void pagebuf_unpin( /* unpin buffered data */
- xfs_buf_t *); /* buffer to unpin */
-
-extern int pagebuf_ispin( /* check if buffer is pinned */
- xfs_buf_t *); /* buffer to check */
+extern void xfs_buf_pin(xfs_buf_t *);
+extern void xfs_buf_unpin(xfs_buf_t *);
+extern int xfs_buf_ispin(xfs_buf_t *);
/* Delayed Write Buffer Routines */
-
-extern void pagebuf_delwri_dequeue(xfs_buf_t *);
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
/* Buffer Daemon Setup Routines */
+extern int xfs_buf_init(void);
+extern void xfs_buf_terminate(void);
-extern int pagebuf_init(void);
-extern void pagebuf_terminate(void);
-
-
-#ifdef PAGEBUF_TRACE
-extern ktrace_t *pagebuf_trace_buf;
-extern void pagebuf_trace(
- xfs_buf_t *, /* buffer being traced */
- char *, /* description of operation */
- void *, /* arbitrary diagnostic value */
- void *); /* return address */
+#ifdef XFS_BUF_TRACE
+extern ktrace_t *xfs_buf_trace_buf;
+extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
#else
-# define pagebuf_trace(pb, id, ptr, ra) do { } while (0)
+#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)
#endif
-#define pagebuf_target_name(target) \
- ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+#define xfs_buf_target_name(target) \
+ ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
+#define XFS_B_ASYNC XBF_ASYNC
+#define XFS_B_DELWRI XBF_DELWRI
+#define XFS_B_READ XBF_READ
+#define XFS_B_WRITE XBF_WRITE
+#define XFS_B_STALE XBF_STALE
-/* These are just for xfs_syncsub... it sets an internal variable
- * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
- */
-#define XFS_B_ASYNC PBF_ASYNC
-#define XFS_B_DELWRI PBF_DELWRI
-#define XFS_B_READ PBF_READ
-#define XFS_B_WRITE PBF_WRITE
-#define XFS_B_STALE PBF_STALE
-
-#define XFS_BUF_TRYLOCK PBF_TRYLOCK
-#define XFS_INCORE_TRYLOCK PBF_TRYLOCK
-#define XFS_BUF_LOCK PBF_LOCK
-#define XFS_BUF_MAPPED PBF_MAPPED
-
-#define BUF_BUSY PBF_DONT_BLOCK
-
-#define XFS_BUF_BFLAGS(x) ((x)->pb_flags)
-#define XFS_BUF_ZEROFLAGS(x) \
- ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
-
-#define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE)
-#define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE)
-#define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE)
-#define XFS_BUF_SUPER_STALE(x) do { \
- XFS_BUF_STALE(x); \
- pagebuf_delwri_dequeue(x); \
- XFS_BUF_DONE(x); \
- } while (0)
+#define XFS_BUF_TRYLOCK XBF_TRYLOCK
+#define XFS_INCORE_TRYLOCK XBF_TRYLOCK
+#define XFS_BUF_LOCK XBF_LOCK
+#define XFS_BUF_MAPPED XBF_MAPPED
-#define XFS_BUF_MANAGE PBF_FS_MANAGED
-#define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED)
-
-#define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x)
-#define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI)
-
-#define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no)
-#define XFS_BUF_GETERROR(x) pagebuf_geterror(x)
-#define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0)
-
-#define XFS_BUF_DONE(x) ((x)->pb_flags |= PBF_DONE)
-#define XFS_BUF_UNDONE(x) ((x)->pb_flags &= ~PBF_DONE)
-#define XFS_BUF_ISDONE(x) ((x)->pb_flags & PBF_DONE)
-
-#define XFS_BUF_BUSY(x) do { } while (0)
-#define XFS_BUF_UNBUSY(x) do { } while (0)
-#define XFS_BUF_ISBUSY(x) (1)
-
-#define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC)
-#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)
-#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)
-
-#define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED)
-#define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED)
-#define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED)
-
-#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")
-#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")
-#define XFS_BUF_ISSHUT(x) (0)
-
-#define XFS_BUF_HOLD(x) pagebuf_hold(x)
-#define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ)
-#define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ)
-#define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ)
-
-#define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE)
-#define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE)
-#define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE)
-
-#define XFS_BUF_ISUNINITIAL(x) (0)
-#define XFS_BUF_UNUNINITIAL(x) (0)
-
-#define XFS_BUF_BP_ISMAPPED(bp) 1
-
-#define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone
-#define XFS_BUF_SET_IODONE_FUNC(buf, func) \
- (buf)->pb_iodone = (func)
-#define XFS_BUF_CLR_IODONE_FUNC(buf) \
- (buf)->pb_iodone = NULL
-#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \
- (buf)->pb_strat = (func)
-#define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \
- (buf)->pb_strat = NULL
-
-#define XFS_BUF_FSPRIVATE(buf, type) \
- ((type)(buf)->pb_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(buf, value) \
- (buf)->pb_fspriv = (void *)(value)
-#define XFS_BUF_FSPRIVATE2(buf, type) \
- ((type)(buf)->pb_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(buf, value) \
- (buf)->pb_fspriv2 = (void *)(value)
-#define XFS_BUF_FSPRIVATE3(buf, type) \
- ((type)(buf)->pb_fspriv3)
-#define XFS_BUF_SET_FSPRIVATE3(buf, value) \
- (buf)->pb_fspriv3 = (void *)(value)
-#define XFS_BUF_SET_START(buf)
-
-#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
- (buf)->pb_relse = (value)
-
-#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)
-
-static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
-{
- if (bp->pb_flags & PBF_MAPPED)
- return XFS_BUF_PTR(bp) + offset;
- return (xfs_caddr_t) pagebuf_offset(bp, offset);
-}
+#define BUF_BUSY XBF_DONT_BLOCK
+
+#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
+#define XFS_BUF_ZEROFLAGS(bp) \
+ ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))
+
+#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE)
+#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE)
+#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE)
+#define XFS_BUF_SUPER_STALE(bp) do { \
+ XFS_BUF_STALE(bp); \
+ xfs_buf_delwri_dequeue(bp); \
+ XFS_BUF_DONE(bp); \
+ } while (0)
-#define XFS_BUF_SET_PTR(bp, val, count) \
- pagebuf_associate_memory(bp, val, count)
-#define XFS_BUF_ADDR(bp) ((bp)->pb_bn)
-#define XFS_BUF_SET_ADDR(bp, blk) \
- ((bp)->pb_bn = (xfs_daddr_t)(blk))
-#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off) \
- ((bp)->pb_file_offset = (off))
-#define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt) \
- ((bp)->pb_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt) \
- ((bp)->pb_buffer_length = (cnt))
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)
-#define XFS_BUF_SET_REF(bp, ref)
-
-#define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp)
-
-#define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp)
-#define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0)
-#define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp)
-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
-
-/* setup the buffer target from a buftarg structure */
-#define XFS_BUF_SET_TARGET(bp, target) \
- (bp)->pb_target = (target)
-#define XFS_BUF_TARGET(bp) ((bp)->pb_target)
-#define XFS_BUFTARG_NAME(target) \
- pagebuf_target_name(target)
-
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)
-#define XFS_BUF_SET_REF(bp, ref)
-
-static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
+#define XFS_BUF_MANAGE XBF_FS_MANAGED
+#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
+
+#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
+#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
+
+#define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no)
+#define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp)
+#define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0)
+
+#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
+#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE)
+#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE)
+
+#define XFS_BUF_BUSY(bp) do { } while (0)
+#define XFS_BUF_UNBUSY(bp) do { } while (0)
+#define XFS_BUF_ISBUSY(bp) (1)
+
+#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC)
+#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
+#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
+
+#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)
+#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
+#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
+
+#define XFS_BUF_SHUT(bp) do { } while (0)
+#define XFS_BUF_UNSHUT(bp) do { } while (0)
+#define XFS_BUF_ISSHUT(bp) (0)
+
+#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
+#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
+#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
+#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ)
+
+#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE)
+#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
+#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
+
+#define XFS_BUF_ISUNINITIAL(bp) (0)
+#define XFS_BUF_UNUNINITIAL(bp) (0)
+
+#define XFS_BUF_BP_ISMAPPED(bp) (1)
+
+#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
+#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
+#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
+#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))
+#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)
+
+#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
+#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
+#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val))
+#define XFS_BUF_SET_START(bp) do { } while (0)
+#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
+
+#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
+#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)
+#define XFS_BUF_ADDR(bp) ((bp)->b_bn)
+#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off))
+#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
+
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0)
+#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
+#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
+
+#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp)
+
+#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
+
+#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
+#define XFS_BUF_TARGET(bp) ((bp)->b_target)
+#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)
+
+static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
{
- bp->pb_fspriv3 = mp;
- bp->pb_strat = xfs_bdstrat_cb;
- pagebuf_delwri_dequeue(bp);
- return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
+ bp->b_fspriv3 = mp;
+ bp->b_strat = xfs_bdstrat_cb;
+ xfs_buf_delwri_dequeue(bp);
+ return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
}
-static inline void xfs_buf_relse(xfs_buf_t *bp)
+static inline void xfs_buf_relse(xfs_buf_t *bp)
{
- if (!bp->pb_relse)
- pagebuf_unlock(bp);
- pagebuf_rele(bp);
+ if (!bp->b_relse)
+ xfs_buf_unlock(bp);
+ xfs_buf_rele(bp);
}
-#define xfs_bpin(bp) pagebuf_pin(bp)
-#define xfs_bunpin(bp) pagebuf_unpin(bp)
+#define xfs_bpin(bp) xfs_buf_pin(bp)
+#define xfs_bunpin(bp) xfs_buf_unpin(bp)
#define xfs_buftrace(id, bp) \
- pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+ xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
-#define xfs_biodone(pb) \
- pagebuf_iodone(pb, 0)
+#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
-#define xfs_biomove(pb, off, len, data, rw) \
- pagebuf_iomove((pb), (off), (len), (data), \
- ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+#define xfs_biomove(bp, off, len, data, rw) \
+ xfs_buf_iomove((bp), (off), (len), (data), \
+ ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ)
-#define xfs_biozero(pb, off, len) \
- pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+#define xfs_biozero(bp, off, len) \
+ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-static inline int XFS_bwrite(xfs_buf_t *pb)
+static inline int XFS_bwrite(xfs_buf_t *bp)
{
- int iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+ int iowait = (bp->b_flags & XBF_ASYNC) == 0;
int error = 0;
if (!iowait)
- pb->pb_flags |= _PBF_RUN_QUEUES;
+ bp->b_flags |= _XBF_RUN_QUEUES;
- pagebuf_delwri_dequeue(pb);
- pagebuf_iostrategy(pb);
+ xfs_buf_delwri_dequeue(bp);
+ xfs_buf_iostrategy(bp);
if (iowait) {
- error = pagebuf_iowait(pb);
- xfs_buf_relse(pb);
+ error = xfs_buf_iowait(bp);
+ xfs_buf_relse(bp);
}
return error;
}
-#define XFS_bdwrite(pb) \
- pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
+#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
{
- bp->pb_strat = xfs_bdstrat_cb;
- bp->pb_fspriv3 = mp;
-
- return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
+ bp->b_strat = xfs_bdstrat_cb;
+ bp->b_fspriv3 = mp;
+ return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
}
-#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
-#define xfs_iowait(pb) pagebuf_iowait(pb)
+#define xfs_iowait(bp) xfs_buf_iowait(bp)
#define xfs_baread(target, rablkno, ralen) \
- pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
-
-#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target))
-#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
-#define xfs_buf_free(bp) pagebuf_free(bp)
+ xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
/*
* Handling of buftargs.
*/
-
extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
extern void xfs_free_buftarg(xfs_buftarg_t *, int);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-#define xfs_getsize_buftarg(buftarg) \
- block_size((buftarg)->pbr_bdev)
-#define xfs_readonly_buftarg(buftarg) \
- bdev_read_only((buftarg)->pbr_bdev)
-#define xfs_binval(buftarg) \
- xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg) \
- xfs_flush_buftarg(buftarg, 1)
+#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
+#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
+
+#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 06111d0bbae..ced4404339c 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -509,16 +509,14 @@ linvfs_open_exec(
vnode_t *vp = LINVFS_GET_VP(inode);
xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
int error = 0;
- bhv_desc_t *bdp;
xfs_inode_t *ip;
if (vp->v_vfsp->vfs_flag & VFS_DMI) {
- bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
- if (!bdp) {
+ ip = xfs_vtoi(vp);
+ if (!ip) {
error = -EINVAL;
goto open_exec_out;
}
- ip = XFS_BHVTOI(bdp);
if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
0, 0, 0, NULL);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 21667ba6dcd..4db47790415 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -146,13 +146,10 @@ xfs_find_handle(
if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
xfs_inode_t *ip;
- bhv_desc_t *bhv;
int lock_mode;
/* need to get access to the xfs_inode to read the generation */
- bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
- ASSERT(bhv);
- ip = XFS_BHVTOI(bhv);
+ ip = xfs_vtoi(vp);
ASSERT(ip);
lock_mode = xfs_ilock_map_shared(ip);
@@ -751,9 +748,8 @@ xfs_ioctl(
(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
- /* The size dio will do in one go */
- da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
+ da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+ da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
if (copy_to_user(arg, &da, sizeof(da)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9b8ee3470ec..4bd3d03b23e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -54,11 +54,46 @@
#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/namei.h>
+#include <linux/security.h>
#define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \
(S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME))
/*
+ * Get a XFS inode from a given vnode.
+ */
+xfs_inode_t *
+xfs_vtoi(
+ struct vnode *vp)
+{
+ bhv_desc_t *bdp;
+
+ bdp = bhv_lookup_range(VN_BHV_HEAD(vp),
+ VNODE_POSITION_XFS, VNODE_POSITION_XFS);
+ if (unlikely(bdp == NULL))
+ return NULL;
+ return XFS_BHVTOI(bdp);
+}
+
+/*
+ * Bring the atime in the XFS inode uptodate.
+ * Used before logging the inode to disk or when the Linux inode goes away.
+ */
+void
+xfs_synchronize_atime(
+ xfs_inode_t *ip)
+{
+ vnode_t *vp;
+
+ vp = XFS_ITOV_NULL(ip);
+ if (vp) {
+ struct inode *inode = &vp->v_inode;
+ ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+ }
+}
+
+/*
* Change the requested timestamp in the given inode.
* We don't lock across timestamp updates, and we don't log them but
* we do record the fact that there is dirty information in core.
@@ -77,23 +112,6 @@ xfs_ichgtime(
struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
timespec_t tv;
- /*
- * We're not supposed to change timestamps in readonly-mounted
- * filesystems. Throw it away if anyone asks us.
- */
- if (unlikely(IS_RDONLY(inode)))
- return;
-
- /*
- * Don't update access timestamps on reads if mounted "noatime".
- * Throw it away if anyone asks us.
- */
- if (unlikely(
- (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
- (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
- XFS_ICHGTIME_ACC))
- return;
-
nanotime(&tv);
if (flags & XFS_ICHGTIME_MOD) {
inode->i_mtime = tv;
@@ -130,8 +148,6 @@ xfs_ichgtime(
* Variant on the above which avoids querying the system clock
* in situations where we know the Linux inode timestamps have
* just been updated (and so we can update our inode cheaply).
- * We also skip the readonly and noatime checks here, they are
- * also catered for already.
*/
void
xfs_ichgtime_fast(
@@ -142,20 +158,16 @@ xfs_ichgtime_fast(
timespec_t *tvp;
/*
- * We're not supposed to change timestamps in readonly-mounted
- * filesystems. Throw it away if anyone asks us.
+ * Atime updates for read() & friends are handled lazily now, and
+ * explicit updates must go through xfs_ichgtime()
*/
- if (unlikely(IS_RDONLY(inode)))
- return;
+ ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
/*
- * Don't update access timestamps on reads if mounted "noatime".
- * Throw it away if anyone asks us.
+ * We're not supposed to change timestamps in readonly-mounted
+ * filesystems. Throw it away if anyone asks us.
*/
- if (unlikely(
- (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
- ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
- XFS_ICHGTIME_ACC)))
+ if (unlikely(IS_RDONLY(inode)))
return;
if (flags & XFS_ICHGTIME_MOD) {
@@ -163,11 +175,6 @@ xfs_ichgtime_fast(
ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
}
- if (flags & XFS_ICHGTIME_ACC) {
- tvp = &inode->i_atime;
- ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec;
- ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec;
- }
if (flags & XFS_ICHGTIME_CHG) {
tvp = &inode->i_ctime;
ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
@@ -214,6 +221,39 @@ validate_fields(
}
/*
+ * Hook in SELinux. This is not quite correct yet, what we really need
+ * here (as we do for default ACLs) is a mechanism by which creation of
+ * these attrs can be journalled at inode creation time (along with the
+ * inode, of course, such that log replay can't cause these to be lost).
+ */
+STATIC int
+linvfs_init_security(
+ struct vnode *vp,
+ struct inode *dir)
+{
+ struct inode *ip = LINVFS_GET_IP(vp);
+ size_t length;
+ void *value;
+ char *name;
+ int error;
+
+ error = security_inode_init_security(ip, dir, &name, &value, &length);
+ if (error) {
+ if (error == -EOPNOTSUPP)
+ return 0;
+ return -error;
+ }
+
+ VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);
+ if (!error)
+ VMODIFY(vp);
+
+ kfree(name);
+ kfree(value);
+ return error;
+}
+
+/*
* Determine whether a process has a valid fs_struct (kernel daemons
* like knfsd don't have an fs_struct).
*
@@ -278,6 +318,9 @@ linvfs_mknod(
break;
}
+ if (!error)
+ error = linvfs_init_security(vp, dir);
+
if (default_acl) {
if (!error) {
error = _ACL_INHERIT(vp, &va, default_acl);
@@ -294,8 +337,6 @@ linvfs_mknod(
teardown.d_inode = ip = LINVFS_GET_IP(vp);
teardown.d_name = dentry->d_name;
- vn_mark_bad(vp);
-
if (S_ISDIR(mode))
VOP_RMDIR(dvp, &teardown, NULL, err2);
else
@@ -506,7 +547,7 @@ linvfs_follow_link(
ASSERT(dentry);
ASSERT(nd);
- link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
+ link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);
if (!link) {
nd_set_link(nd, ERR_PTR(-ENOMEM));
return NULL;
@@ -522,12 +563,12 @@ linvfs_follow_link(
vp = LINVFS_GET_VP(dentry->d_inode);
iov.iov_base = link;
- iov.iov_len = MAXNAMELEN;
+ iov.iov_len = MAXPATHLEN;
uio->uio_iov = &iov;
uio->uio_offset = 0;
uio->uio_segflg = UIO_SYSSPACE;
- uio->uio_resid = MAXNAMELEN;
+ uio->uio_resid = MAXPATHLEN;
uio->uio_iovcnt = 1;
VOP_READLINK(vp, uio, 0, NULL, error);
@@ -535,7 +576,7 @@ linvfs_follow_link(
kfree(link);
link = ERR_PTR(-error);
} else {
- link[MAXNAMELEN - uio->uio_resid] = '\0';
+ link[MAXPATHLEN - uio->uio_resid] = '\0';
}
kfree(uio);
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index ee784b63acb..6899a6b4a50 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations;
extern struct file_operations linvfs_invis_file_operations;
extern struct file_operations linvfs_dir_operations;
-extern struct address_space_operations linvfs_aops;
-
-extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern void linvfs_unwritten_done(struct buffer_head *, int);
-
extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
int, unsigned int, void __user *);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index d8e21ba0ccc..67389b74552 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -110,10 +110,6 @@
* delalloc and these ondisk-uninitialised buffers.
*/
BUFFER_FNS(PrivateStart, unwritten);
-static inline void set_buffer_unwritten_io(struct buffer_head *bh)
-{
- bh->b_end_io = linvfs_unwritten_done;
-}
#define restricted_chown xfs_params.restrict_chown.val
#define irix_sgid_inherit xfs_params.sgid_inherit.val
@@ -232,7 +228,7 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
#define xfs_itruncate_data(ip, off) \
(-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
#define xfs_statvfs_fsid(statp, mp) \
- ({ u64 id = huge_encode_dev((mp)->m_dev); \
+ ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
__kernel_fsid_t *fsid = &(statp)->f_fsid; \
(fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 885dfafeabe..e0ab45fbfeb 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -233,8 +233,8 @@ xfs_read(
xfs_buftarg_t *target =
(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((*offset & target->pbr_smask) ||
- (size & target->pbr_smask)) {
+ if ((*offset & target->bt_smask) ||
+ (size & target->bt_smask)) {
if (*offset == ip->i_d.di_size) {
return (0);
}
@@ -281,9 +281,6 @@ xfs_read(
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- if (likely(!(ioflags & IO_INVIS)))
- xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC);
-
unlock_isem:
if (unlikely(ioflags & IO_ISDIRECT))
mutex_unlock(&inode->i_mutex);
@@ -346,9 +343,6 @@ xfs_sendfile(
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
- if (likely(!(ioflags & IO_INVIS)))
- xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC);
-
return ret;
}
@@ -362,7 +356,6 @@ STATIC int /* error (positive) */
xfs_zero_last_block(
struct inode *ip,
xfs_iocore_t *io,
- xfs_off_t offset,
xfs_fsize_t isize,
xfs_fsize_t end_size)
{
@@ -371,19 +364,16 @@ xfs_zero_last_block(
int nimaps;
int zero_offset;
int zero_len;
- int isize_fsb_offset;
int error = 0;
xfs_bmbt_irec_t imap;
loff_t loff;
- size_t lsize;
ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
- ASSERT(offset > isize);
mp = io->io_mount;
- isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
- if (isize_fsb_offset == 0) {
+ zero_offset = XFS_B_FSB_OFFSET(mp, isize);
+ if (zero_offset == 0) {
/*
* There are no extra bytes in the last block on disk to
* zero, so return.
@@ -413,10 +403,8 @@ xfs_zero_last_block(
*/
XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
loff = XFS_FSB_TO_B(mp, last_fsb);
- lsize = XFS_FSB_TO_B(mp, 1);
- zero_offset = isize_fsb_offset;
- zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
+ zero_len = mp->m_sb.sb_blocksize - zero_offset;
error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
@@ -447,20 +435,17 @@ xfs_zero_eof(
struct inode *ip = LINVFS_GET_IP(vp);
xfs_fileoff_t start_zero_fsb;
xfs_fileoff_t end_zero_fsb;
- xfs_fileoff_t prev_zero_fsb;
xfs_fileoff_t zero_count_fsb;
xfs_fileoff_t last_fsb;
xfs_extlen_t buf_len_fsb;
- xfs_extlen_t prev_zero_count;
xfs_mount_t *mp;
int nimaps;
int error = 0;
xfs_bmbt_irec_t imap;
- loff_t loff;
- size_t lsize;
ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+ ASSERT(offset > isize);
mp = io->io_mount;
@@ -468,7 +453,7 @@ xfs_zero_eof(
* First handle zeroing the block on which isize resides.
* We only zero a part of that block so it is handled specially.
*/
- error = xfs_zero_last_block(ip, io, offset, isize, end_size);
+ error = xfs_zero_last_block(ip, io, isize, end_size);
if (error) {
ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -496,8 +481,6 @@ xfs_zero_eof(
}
ASSERT(start_zero_fsb <= end_zero_fsb);
- prev_zero_fsb = NULLFILEOFF;
- prev_zero_count = 0;
while (start_zero_fsb <= end_zero_fsb) {
nimaps = 1;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
@@ -519,10 +502,7 @@ xfs_zero_eof(
* that sits on a hole and sets the page as P_HOLE
* and calls remapf if it is a mapped file.
*/
- prev_zero_fsb = NULLFILEOFF;
- prev_zero_count = 0;
- start_zero_fsb = imap.br_startoff +
- imap.br_blockcount;
+ start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
continue;
}
@@ -543,17 +523,15 @@ xfs_zero_eof(
*/
XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
- loff = XFS_FSB_TO_B(mp, start_zero_fsb);
- lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
-
- error = xfs_iozero(ip, loff, lsize, end_size);
+ error = xfs_iozero(ip,
+ XFS_FSB_TO_B(mp, start_zero_fsb),
+ XFS_FSB_TO_B(mp, buf_len_fsb),
+ end_size);
if (error) {
goto out_lock;
}
- prev_zero_fsb = start_zero_fsb;
- prev_zero_count = buf_len_fsb;
start_zero_fsb = imap.br_startoff + buf_len_fsb;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
@@ -640,7 +618,7 @@ xfs_write(
(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((pos & target->pbr_smask) || (count & target->pbr_smask))
+ if ((pos & target->bt_smask) || (count & target->bt_smask))
return XFS_ERROR(-EINVAL);
if (!VN_CACHED(vp) && pos < i_size_read(inode))
@@ -831,6 +809,10 @@ retry:
goto retry;
}
+ isize = i_size_read(inode);
+ if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
+ *offset = isize;
+
if (*offset > xip->i_d.di_size) {
xfs_ilock(xip, XFS_ILOCK_EXCL);
if (*offset > xip->i_d.di_size) {
@@ -956,7 +938,7 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
if (!XFS_FORCED_SHUTDOWN(mp)) {
- pagebuf_iorequest(bp);
+ xfs_buf_iorequest(bp);
return 0;
} else {
xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
@@ -1009,7 +991,7 @@ xfsbdstrat(
* if (XFS_BUF_IS_GRIO(bp)) {
*/
- pagebuf_iorequest(bp);
+ xfs_buf_iorequest(bp);
return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 6c40a74be7c..8955720a2c6 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -34,7 +34,7 @@ xfs_read_xfsstats(
__uint64_t xs_write_bytes = 0;
__uint64_t xs_read_bytes = 0;
- static struct xstats_entry {
+ static const struct xstats_entry {
char *desc;
int endpoint;
} xstats[] = {
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 50027c4a561..8ba7a2fa6c1 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -109,15 +109,15 @@ struct xfsstats {
__uint32_t vn_remove; /* # times vn_remove called */
__uint32_t vn_free; /* # times vn_free called */
#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
- __uint32_t pb_get;
- __uint32_t pb_create;
- __uint32_t pb_get_locked;
- __uint32_t pb_get_locked_waited;
- __uint32_t pb_busy_locked;
- __uint32_t pb_miss_locked;
- __uint32_t pb_page_retries;
- __uint32_t pb_page_found;
- __uint32_t pb_get_read;
+ __uint32_t xb_get;
+ __uint32_t xb_create;
+ __uint32_t xb_get_locked;
+ __uint32_t xb_get_locked_waited;
+ __uint32_t xb_busy_locked;
+ __uint32_t xb_miss_locked;
+ __uint32_t xb_page_retries;
+ __uint32_t xb_page_found;
+ __uint32_t xb_get_read;
/* Extra precision counters */
__uint64_t xs_xstrat_bytes;
__uint64_t xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 6116b5bf433..f22e426d9e4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -306,13 +306,15 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
xfs_fs_cmn_err(CE_NOTE, mp,
"Disabling barriers, not supported with external log device");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ return;
}
- if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered ==
+ if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
QUEUE_ORDERED_NONE) {
xfs_fs_cmn_err(CE_NOTE, mp,
"Disabling barriers, not supported by the underlying device");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ return;
}
error = xfs_barrier_test(mp);
@@ -320,6 +322,7 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
xfs_fs_cmn_err(CE_NOTE, mp,
"Disabling barriers, trial barrier write failed");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ return;
}
}
@@ -327,7 +330,7 @@ void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
- blkdev_issue_flush(buftarg->pbr_bdev, NULL);
+ blkdev_issue_flush(buftarg->bt_bdev, NULL);
}
STATIC struct inode *
@@ -576,7 +579,7 @@ xfssyncd(
timeleft = schedule_timeout_interruptible(timeleft);
/* swsusp */
try_to_freeze();
- if (kthread_should_stop())
+ if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
break;
spin_lock(&vfsp->vfs_sync_lock);
@@ -966,9 +969,9 @@ init_xfs_fs( void )
if (error < 0)
goto undo_zones;
- error = pagebuf_init();
+ error = xfs_buf_init();
if (error < 0)
- goto undo_pagebuf;
+ goto undo_buffers;
vn_init();
xfs_init();
@@ -982,9 +985,9 @@ init_xfs_fs( void )
return 0;
undo_register:
- pagebuf_terminate();
+ xfs_buf_terminate();
-undo_pagebuf:
+undo_buffers:
linvfs_destroy_zones();
undo_zones:
@@ -998,7 +1001,7 @@ exit_xfs_fs( void )
XFS_DM_EXIT(&xfs_fs_type);
unregister_filesystem(&xfs_fs_type);
xfs_cleanup();
- pagebuf_terminate();
+ xfs_buf_terminate();
linvfs_destroy_zones();
ktrace_uninit();
}
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index e9bbcb4d624..260dd8415dd 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -106,7 +106,6 @@ vn_revalidate_core(
inode->i_blocks = vap->va_nblocks;
inode->i_mtime = vap->va_mtime;
inode->i_ctime = vap->va_ctime;
- inode->i_atime = vap->va_atime;
inode->i_blksize = vap->va_blocksize;
if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index f2bbb327c08..0fe2419461d 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -566,6 +566,25 @@ static inline int VN_BAD(struct vnode *vp)
}
/*
+ * Extracting atime values in various formats
+ */
+static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime)
+{
+ bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
+ bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
+}
+
+static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts)
+{
+ *ts = vp->v_inode.i_atime;
+}
+
+static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
+{
+ *tt = vp->v_inode.i_atime.tv_sec;
+}
+
+/*
* Some useful predicates.
*/
#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2f69822344e..2ec6b441849 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -239,7 +239,7 @@ xfs_qm_dquot_logitem_pushbuf(
* trying to duplicate our effort.
*/
ASSERT(qip->qli_pushbuf_flag != 0);
- ASSERT(qip->qli_push_owner == get_thread_id());
+ ASSERT(qip->qli_push_owner == current_pid());
/*
* If flushlock isn't locked anymore, chances are that the
@@ -333,7 +333,7 @@ xfs_qm_dquot_logitem_trylock(
qip->qli_pushbuf_flag = 1;
ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
#ifdef DEBUG
- qip->qli_push_owner = get_thread_id();
+ qip->qli_push_owner = current_pid();
#endif
/*
* The dquot is left locked.
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index bb6991a7a61..7dcdd0640c3 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1392,11 +1392,12 @@ xfs_qm_qino_alloc(
{
xfs_trans_t *tp;
int error;
- unsigned long s;
+ unsigned long s;
cred_t zerocr;
+ xfs_inode_t zeroino;
int committed;
- tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
if ((error = xfs_trans_reserve(tp,
XFS_QM_QINOCREATE_SPACE_RES(mp),
XFS_CREATE_LOG_RES(mp), 0,
@@ -1406,8 +1407,9 @@ xfs_qm_qino_alloc(
return (error);
}
memset(&zerocr, 0, sizeof(zerocr));
+ memset(&zeroino, 0, sizeof(zeroino));
- if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0,
+ if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0,
&zerocr, 0, 1, ip, &committed))) {
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT);
@@ -1918,9 +1920,7 @@ xfs_qm_quotacheck(
* at this point (because we intentionally didn't in dqget_noattach).
*/
if (error) {
- xfs_qm_dqpurge_all(mp,
- XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
- XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
+ xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
goto error_return;
}
/*
@@ -2743,6 +2743,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
xfs_dqunlock(udqp);
ASSERT(ip->i_udquot == NULL);
ip->i_udquot = udqp;
+ ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
@@ -2752,7 +2753,10 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
xfs_dqunlock(gdqp);
ASSERT(ip->i_gdquot == NULL);
ip->i_gdquot = gdqp;
- ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
+ ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
+ ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+ ip->i_d.di_gid : ip->i_d.di_projid) ==
+ be32_to_cpu(gdqp->q_core.d_id));
xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
}
}
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index bb6dc91ea26..b08b3d9345b 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -27,45 +27,12 @@ static DEFINE_SPINLOCK(xfs_err_lock);
/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
#define XFS_MAX_ERR_LEVEL 7
#define XFS_ERR_MASK ((1 << 3) - 1)
-static char *err_level[XFS_MAX_ERR_LEVEL+1] =
+static const char * const err_level[XFS_MAX_ERR_LEVEL+1] =
{KERN_EMERG, KERN_ALERT, KERN_CRIT,
KERN_ERR, KERN_WARNING, KERN_NOTICE,
KERN_INFO, KERN_DEBUG};
void
-assfail(char *a, char *f, int l)
-{
- printk("XFS assertion failed: %s, file: %s, line: %d\n", a, f, l);
- BUG();
-}
-
-#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
-
-unsigned long
-random(void)
-{
- static unsigned long RandomValue = 1;
- /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
- register long rv = RandomValue;
- register long lo;
- register long hi;
-
- hi = rv / 127773;
- lo = rv % 127773;
- rv = 16807 * lo - 2836 * hi;
- if( rv <= 0 ) rv += 2147483647;
- return( RandomValue = rv );
-}
-
-int
-get_thread_id(void)
-{
- return current->pid;
-}
-
-#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
-
-void
cmn_err(register int level, char *fmt, ...)
{
char *fp = fmt;
@@ -90,7 +57,6 @@ cmn_err(register int level, char *fmt, ...)
BUG();
}
-
void
icmn_err(register int level, char *fmt, va_list ap)
{
@@ -109,3 +75,27 @@ icmn_err(register int level, char *fmt, va_list ap)
if (level == CE_PANIC)
BUG();
}
+
+void
+assfail(char *expr, char *file, int line)
+{
+ printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
+ BUG();
+}
+
+#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
+unsigned long random(void)
+{
+ static unsigned long RandomValue = 1;
+ /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
+ register long rv = RandomValue;
+ register long lo;
+ register long hi;
+
+ hi = rv / 127773;
+ lo = rv % 127773;
+ rv = 16807 * lo - 2836 * hi;
+ if (rv <= 0) rv += 2147483647;
+ return RandomValue = rv;
+}
+#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index aff558664c3..e3bf58112e7 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -31,24 +31,23 @@ extern void icmn_err(int, char *, va_list)
__attribute__ ((format (printf, 2, 0)));
extern void cmn_err(int, char *, ...)
__attribute__ ((format (printf, 2, 3)));
+extern void assfail(char *expr, char *f, int l);
-#ifndef STATIC
-# define STATIC static
-#endif
+#define prdev(fmt,targ,args...) \
+ printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
-#ifdef DEBUG
-# define ASSERT(EX) ((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__))
-#else
-# define ASSERT(x) ((void)0)
-#endif
+#define ASSERT_ALWAYS(expr) \
+ (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-extern void assfail(char *, char *, int);
-#ifdef DEBUG
+#ifndef DEBUG
+# define ASSERT(expr) ((void)0)
+#else
+# define ASSERT(expr) ASSERT_ALWAYS(expr)
extern unsigned long random(void);
-extern int get_thread_id(void);
#endif
-#define ASSERT_ALWAYS(EX) ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__))
-#define debug_stop_all_cpus(param) /* param is "cpumask_t *" */
+#ifndef STATIC
+# define STATIC static
+#endif
#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 69ec4f540c3..a3d565a6773 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -27,6 +27,16 @@ uuid_init(void)
mutex_init(&uuid_monitor);
}
+
+/* IRIX interpretation of an uuid_t */
+typedef struct {
+ __be32 uu_timelow;
+ __be16 uu_timemid;
+ __be16 uu_timehi;
+ __be16 uu_clockseq;
+ __be16 uu_node[3];
+} xfs_uu_t;
+
/*
* uuid_getnodeuniq - obtain the node unique fields of a UUID.
*
@@ -36,16 +46,11 @@ uuid_init(void)
void
uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
{
- char *uu = (char *)uuid;
-
- /* on IRIX, this function assumes big-endian fields within
- * the uuid, so we use INT_GET to get the same result on
- * little-endian systems
- */
+ xfs_uu_t *uup = (xfs_uu_t *)uuid;
- fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) +
- INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT);
- fsid[1] = INT_GET(*(u_int32_t*)(uu ), ARCH_CONVERT);
+ fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
+ be16_to_cpu(uup->uu_timemid);
+ fsid[1] = be16_to_cpu(uup->uu_timelow);
}
void
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 68e5051d8e2..c4836890b72 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -40,6 +40,22 @@
#undef XFS_NATIVE_HOST
#endif
+#ifdef XFS_NATIVE_HOST
+#define cpu_to_be16(val) ((__be16)(val))
+#define cpu_to_be32(val) ((__be32)(val))
+#define cpu_to_be64(val) ((__be64)(val))
+#define be16_to_cpu(val) ((__uint16_t)(val))
+#define be32_to_cpu(val) ((__uint32_t)(val))
+#define be64_to_cpu(val) ((__uint64_t)(val))
+#else
+#define cpu_to_be16(val) (__swab16((__uint16_t)(val)))
+#define cpu_to_be32(val) (__swab32((__uint32_t)(val)))
+#define cpu_to_be64(val) (__swab64((__uint64_t)(val)))
+#define be16_to_cpu(val) (__swab16((__be16)(val)))
+#define be32_to_cpu(val) (__swab32((__be32)(val)))
+#define be64_to_cpu(val) (__swab64((__be64)(val)))
+#endif
+
#endif /* __KERNEL__ */
/* do we need conversion? */
@@ -186,7 +202,7 @@ static inline void be64_add(__be64 *a, __s64 b)
*/
#define XFS_GET_DIR_INO4(di) \
- (((u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+ (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
#define XFS_PUT_DIR_INO4(from, di) \
do { \
@@ -197,9 +213,9 @@ do { \
} while (0)
#define XFS_DI_HI(di) \
- (((u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+ (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
#define XFS_DI_LO(di) \
- (((u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
+ (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
#define XFS_GET_DIR_INO8(di) \
(((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 1c7421840c1..fe91eac4e2a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -128,7 +128,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
return (offset >= minforkoff) ? minforkoff : 0;
}
- if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+ if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
if (bytes <= XFS_IFORK_ASIZE(dp))
return mp->m_attroffset >> 3;
return 0;
@@ -157,7 +157,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
{
unsigned long s;
- if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+ if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
!(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
s = XFS_SB_LOCK(mp);
if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
@@ -311,7 +311,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
*/
totsize -= size;
if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
- !(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+ (mp->m_flags & XFS_MOUNT_ATTR2)) {
/*
* Last attribute now removed, revert to original
* inode format making all literal area available
@@ -330,7 +330,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
ASSERT(dp->i_d.di_forkoff);
ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
- (mp->m_flags & XFS_MOUNT_COMPAT_ATTR));
+ !(mp->m_flags & XFS_MOUNT_ATTR2));
dp->i_afp->if_ext_max =
XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
dp->i_df.if_ext_max =
@@ -739,7 +739,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
+ name_loc->namelen
+ INT_GET(name_loc->valuelen, ARCH_CONVERT);
}
- if (!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+ if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
(bytes == sizeof(struct xfs_attr_sf_hdr)))
return(-1);
return(xfs_attr_shortform_bytesfit(dp, bytes));
@@ -778,7 +778,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
goto out;
if (forkoff == -1) {
- ASSERT(!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR));
+ ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
/*
* Last attribute was removed, revert to original
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index f6143ff251a..541e34109bb 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -63,7 +63,7 @@ struct xfs_trans;
* the leaf_entry. The namespaces are independent only because we also look
* at the namespace bit when we are looking for a matching attribute name.
*
- * We also store a "incomplete" bit in the leaf_entry. It shows that an
+ * We also store an "incomplete" bit in the leaf_entry. It shows that an
* attribute is in the middle of being created and should not be shown to
* the user if we crash during the time that the bit is set. We clear the
* bit when we have finished setting up the attribute. We do this because
@@ -72,42 +72,48 @@ struct xfs_trans;
*/
#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
+typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
+ __uint16_t base; /* base of free region */
+ __uint16_t size; /* length of free region */
+} xfs_attr_leaf_map_t;
+
+typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
+ xfs_da_blkinfo_t info; /* block type, links, etc. */
+ __uint16_t count; /* count of active leaf_entry's */
+ __uint16_t usedbytes; /* num bytes of names/values stored */
+ __uint16_t firstused; /* first used byte in name area */
+ __uint8_t holes; /* != 0 if blk needs compaction */
+ __uint8_t pad1;
+ xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
+ /* N largest free regions */
+} xfs_attr_leaf_hdr_t;
+
+typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
+ xfs_dahash_t hashval; /* hash value of name */
+ __uint16_t nameidx; /* index into buffer of name/value */
+ __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
+ __uint8_t pad2; /* unused pad byte */
+} xfs_attr_leaf_entry_t;
+
+typedef struct xfs_attr_leaf_name_local {
+ __uint16_t valuelen; /* number of bytes in value */
+ __uint8_t namelen; /* length of name bytes */
+ __uint8_t nameval[1]; /* name/value bytes */
+} xfs_attr_leaf_name_local_t;
+
+typedef struct xfs_attr_leaf_name_remote {
+ xfs_dablk_t valueblk; /* block number of value bytes */
+ __uint32_t valuelen; /* number of bytes in value */
+ __uint8_t namelen; /* length of name bytes */
+ __uint8_t name[1]; /* name bytes */
+} xfs_attr_leaf_name_remote_t;
+
typedef struct xfs_attr_leafblock {
- struct xfs_attr_leaf_hdr { /* constant-structure header block */
- xfs_da_blkinfo_t info; /* block type, links, etc. */
- __uint16_t count; /* count of active leaf_entry's */
- __uint16_t usedbytes; /* num bytes of names/values stored */
- __uint16_t firstused; /* first used byte in name area */
- __uint8_t holes; /* != 0 if blk needs compaction */
- __uint8_t pad1;
- struct xfs_attr_leaf_map { /* RLE map of free bytes */
- __uint16_t base; /* base of free region */
- __uint16_t size; /* length of free region */
- } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */
- } hdr;
- struct xfs_attr_leaf_entry { /* sorted on key, not name */
- xfs_dahash_t hashval; /* hash value of name */
- __uint16_t nameidx; /* index into buffer of name/value */
- __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
- __uint8_t pad2; /* unused pad byte */
- } entries[1]; /* variable sized array */
- struct xfs_attr_leaf_name_local {
- __uint16_t valuelen; /* number of bytes in value */
- __uint8_t namelen; /* length of name bytes */
- __uint8_t nameval[1]; /* name/value bytes */
- } namelist; /* grows from bottom of buf */
- struct xfs_attr_leaf_name_remote {
- xfs_dablk_t valueblk; /* block number of value bytes */
- __uint32_t valuelen; /* number of bytes in value */
- __uint8_t namelen; /* length of name bytes */
- __uint8_t name[1]; /* name bytes */
- } valuelist; /* grows from bottom of buf */
+ xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
+ xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
+ xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */
+ xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */
} xfs_attr_leafblock_t;
-typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;
-typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;
-typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;
-typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;
-typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
/*
* Flags used in the leaf_entry[i].flags field.
@@ -150,7 +156,8 @@ xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
}
-#define XFS_ATTR_LEAF_NAME(leafp,idx) xfs_attr_leaf_name(leafp,idx)
+#define XFS_ATTR_LEAF_NAME(leafp,idx) \
+ xfs_attr_leaf_name(leafp,idx)
static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
{
return (&((char *)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e415a4698e9..70625e577c7 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2146,13 +2146,176 @@ xfs_bmap_add_extent_hole_real(
return 0; /* keep gcc quite */
}
+/*
+ * Adjust the size of the new extent based on di_extsize and rt extsize.
+ */
+STATIC int
+xfs_bmap_extsize_align(
+ xfs_mount_t *mp,
+ xfs_bmbt_irec_t *gotp, /* next extent pointer */
+ xfs_bmbt_irec_t *prevp, /* previous extent pointer */
+ xfs_extlen_t extsz, /* align to this extent size */
+ int rt, /* is this a realtime inode? */
+ int eof, /* is extent at end-of-file? */
+ int delay, /* creating delalloc extent? */
+ int convert, /* overwriting unwritten extent? */
+ xfs_fileoff_t *offp, /* in/out: aligned offset */
+ xfs_extlen_t *lenp) /* in/out: aligned length */
+{
+ xfs_fileoff_t orig_off; /* original offset */
+ xfs_extlen_t orig_alen; /* original length */
+ xfs_fileoff_t orig_end; /* original off+len */
+ xfs_fileoff_t nexto; /* next file offset */
+ xfs_fileoff_t prevo; /* previous file offset */
+ xfs_fileoff_t align_off; /* temp for offset */
+ xfs_extlen_t align_alen; /* temp for length */
+ xfs_extlen_t temp; /* temp for calculations */
+
+ if (convert)
+ return 0;
+
+ orig_off = align_off = *offp;
+ orig_alen = align_alen = *lenp;
+ orig_end = orig_off + orig_alen;
+
+ /*
+ * If this request overlaps an existing extent, then don't
+ * attempt to perform any additional alignment.
+ */
+ if (!delay && !eof &&
+ (orig_off >= gotp->br_startoff) &&
+ (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
+ return 0;
+ }
+
+ /*
+ * If the file offset is unaligned vs. the extent size
+ * we need to align it. This will be possible unless
+ * the file was previously written with a kernel that didn't
+ * perform this alignment, or if a truncate shot us in the
+ * foot.
+ */
+ temp = do_mod(orig_off, extsz);
+ if (temp) {
+ align_alen += temp;
+ align_off -= temp;
+ }
+ /*
+ * Same adjustment for the end of the requested area.
+ */
+ if ((temp = (align_alen % extsz))) {
+ align_alen += extsz - temp;
+ }
+ /*
+ * If the previous block overlaps with this proposed allocation
+ * then move the start forward without adjusting the length.
+ */
+ if (prevp->br_startoff != NULLFILEOFF) {
+ if (prevp->br_startblock == HOLESTARTBLOCK)
+ prevo = prevp->br_startoff;
+ else
+ prevo = prevp->br_startoff + prevp->br_blockcount;
+ } else
+ prevo = 0;
+ if (align_off != orig_off && align_off < prevo)
+ align_off = prevo;
+ /*
+ * If the next block overlaps with this proposed allocation
+ * then move the start back without adjusting the length,
+ * but not before offset 0.
+ * This may of course make the start overlap previous block,
+ * and if we hit the offset 0 limit then the next block
+ * can still overlap too.
+ */
+ if (!eof && gotp->br_startoff != NULLFILEOFF) {
+ if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
+ (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
+ nexto = gotp->br_startoff + gotp->br_blockcount;
+ else
+ nexto = gotp->br_startoff;
+ } else
+ nexto = NULLFILEOFF;
+ if (!eof &&
+ align_off + align_alen != orig_end &&
+ align_off + align_alen > nexto)
+ align_off = nexto > align_alen ? nexto - align_alen : 0;
+ /*
+ * If we're now overlapping the next or previous extent that
+ * means we can't fit an extsz piece in this hole. Just move
+ * the start forward to the first valid spot and set
+ * the length so we hit the end.
+ */
+ if (align_off != orig_off && align_off < prevo)
+ align_off = prevo;
+ if (align_off + align_alen != orig_end &&
+ align_off + align_alen > nexto &&
+ nexto != NULLFILEOFF) {
+ ASSERT(nexto > prevo);
+ align_alen = nexto - align_off;
+ }
+
+ /*
+ * If realtime, and the result isn't a multiple of the realtime
+ * extent size we need to remove blocks until it is.
+ */
+ if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+ /*
+ * We're not covering the original request, or
+ * we won't be able to once we fix the length.
+ */
+ if (orig_off < align_off ||
+ orig_end > align_off + align_alen ||
+ align_alen - temp < orig_alen)
+ return XFS_ERROR(EINVAL);
+ /*
+ * Try to fix it by moving the start up.
+ */
+ if (align_off + temp <= orig_off) {
+ align_alen -= temp;
+ align_off += temp;
+ }
+ /*
+ * Try to fix it by moving the end in.
+ */
+ else if (align_off + align_alen - temp >= orig_end)
+ align_alen -= temp;
+ /*
+ * Set the start to the minimum then trim the length.
+ */
+ else {
+ align_alen -= orig_off - align_off;
+ align_off = orig_off;
+ align_alen -= align_alen % mp->m_sb.sb_rextsize;
+ }
+ /*
+ * Result doesn't cover the request, fail it.
+ */
+ if (orig_off < align_off || orig_end > align_off + align_alen)
+ return XFS_ERROR(EINVAL);
+ } else {
+ ASSERT(orig_off >= align_off);
+ ASSERT(orig_end <= align_off + align_alen);
+ }
+
+#ifdef DEBUG
+ if (!eof && gotp->br_startoff != NULLFILEOFF)
+ ASSERT(align_off + align_alen <= gotp->br_startoff);
+ if (prevp->br_startoff != NULLFILEOFF)
+ ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
+#endif
+
+ *lenp = align_alen;
+ *offp = align_off;
+ return 0;
+}
+
#define XFS_ALLOC_GAP_UNITS 4
/*
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
* It figures out where to ask the underlying allocator to put the new extent.
*/
-STATIC int /* error */
+STATIC int
xfs_bmap_alloc(
xfs_bmalloca_t *ap) /* bmap alloc argument struct */
{
@@ -2163,10 +2326,10 @@ xfs_bmap_alloc(
xfs_mount_t *mp; /* mount point structure */
int nullfb; /* true if ap->firstblock isn't set */
int rt; /* true if inode is realtime */
-#ifdef __KERNEL__
- xfs_extlen_t prod=0; /* product factor for allocators */
- xfs_extlen_t ralen=0; /* realtime allocation length */
-#endif
+ xfs_extlen_t prod = 0; /* product factor for allocators */
+ xfs_extlen_t ralen = 0; /* realtime allocation length */
+ xfs_extlen_t align; /* minimum allocation alignment */
+ xfs_rtblock_t rtx;
#define ISVALID(x,y) \
(rt ? \
@@ -2182,125 +2345,25 @@ xfs_bmap_alloc(
nullfb = ap->firstblock == NULLFSBLOCK;
rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-#ifdef __KERNEL__
if (rt) {
- xfs_extlen_t extsz; /* file extent size for rt */
- xfs_fileoff_t nexto; /* next file offset */
- xfs_extlen_t orig_alen; /* original ap->alen */
- xfs_fileoff_t orig_end; /* original off+len */
- xfs_fileoff_t orig_off; /* original ap->off */
- xfs_extlen_t mod_off; /* modulus calculations */
- xfs_fileoff_t prevo; /* previous file offset */
- xfs_rtblock_t rtx; /* realtime extent number */
- xfs_extlen_t temp; /* temp for rt calculations */
-
- /*
- * Set prod to match the realtime extent size.
- */
- if (!(extsz = ap->ip->i_d.di_extsize))
- extsz = mp->m_sb.sb_rextsize;
- prod = extsz / mp->m_sb.sb_rextsize;
- orig_off = ap->off;
- orig_alen = ap->alen;
- orig_end = orig_off + orig_alen;
- /*
- * If the file offset is unaligned vs. the extent size
- * we need to align it. This will be possible unless
- * the file was previously written with a kernel that didn't
- * perform this alignment.
- */
- mod_off = do_mod(orig_off, extsz);
- if (mod_off) {
- ap->alen += mod_off;
- ap->off -= mod_off;
- }
- /*
- * Same adjustment for the end of the requested area.
- */
- if ((temp = (ap->alen % extsz)))
- ap->alen += extsz - temp;
- /*
- * If the previous block overlaps with this proposed allocation
- * then move the start forward without adjusting the length.
- */
- prevo =
- ap->prevp->br_startoff == NULLFILEOFF ?
- 0 :
- (ap->prevp->br_startoff +
- ap->prevp->br_blockcount);
- if (ap->off != orig_off && ap->off < prevo)
- ap->off = prevo;
- /*
- * If the next block overlaps with this proposed allocation
- * then move the start back without adjusting the length,
- * but not before offset 0.
- * This may of course make the start overlap previous block,
- * and if we hit the offset 0 limit then the next block
- * can still overlap too.
- */
- nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
- NULLFILEOFF : ap->gotp->br_startoff;
- if (!ap->eof &&
- ap->off + ap->alen != orig_end &&
- ap->off + ap->alen > nexto)
- ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
- /*
- * If we're now overlapping the next or previous extent that
- * means we can't fit an extsz piece in this hole. Just move
- * the start forward to the first valid spot and set
- * the length so we hit the end.
- */
- if ((ap->off != orig_off && ap->off < prevo) ||
- (ap->off + ap->alen != orig_end &&
- ap->off + ap->alen > nexto)) {
- ap->off = prevo;
- ap->alen = nexto - prevo;
- }
- /*
- * If the result isn't a multiple of rtextents we need to
- * remove blocks until it is.
- */
- if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
- /*
- * We're not covering the original request, or
- * we won't be able to once we fix the length.
- */
- if (orig_off < ap->off ||
- orig_end > ap->off + ap->alen ||
- ap->alen - temp < orig_alen)
- return XFS_ERROR(EINVAL);
- /*
- * Try to fix it by moving the start up.
- */
- if (ap->off + temp <= orig_off) {
- ap->alen -= temp;
- ap->off += temp;
- }
- /*
- * Try to fix it by moving the end in.
- */
- else if (ap->off + ap->alen - temp >= orig_end)
- ap->alen -= temp;
- /*
- * Set the start to the minimum then trim the length.
- */
- else {
- ap->alen -= orig_off - ap->off;
- ap->off = orig_off;
- ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
- }
- /*
- * Result doesn't cover the request, fail it.
- */
- if (orig_off < ap->off || orig_end > ap->off + ap->alen)
- return XFS_ERROR(EINVAL);
- }
+ align = ap->ip->i_d.di_extsize ?
+ ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
+ /* Set prod to match the extent size */
+ prod = align / mp->m_sb.sb_rextsize;
+
+ error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+ align, rt, ap->eof, 0,
+ ap->conv, &ap->off, &ap->alen);
+ if (error)
+ return error;
+ ASSERT(ap->alen);
ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+
/*
* If the offset & length are not perfectly aligned
* then kill prod, it will just get us in trouble.
*/
- if (do_mod(ap->off, extsz) || ap->alen % extsz)
+ if (do_mod(ap->off, align) || ap->alen % align)
prod = 1;
/*
* Set ralen to be the actual requested length in rtextents.
@@ -2326,15 +2389,24 @@ xfs_bmap_alloc(
ap->rval = rtx * mp->m_sb.sb_rextsize;
} else
ap->rval = 0;
+ } else {
+ align = (ap->userdata && ap->ip->i_d.di_extsize &&
+ (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
+ ap->ip->i_d.di_extsize : 0;
+ if (unlikely(align)) {
+ error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+ align, rt,
+ ap->eof, 0, ap->conv,
+ &ap->off, &ap->alen);
+ ASSERT(!error);
+ ASSERT(ap->alen);
+ }
+ if (nullfb)
+ ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+ else
+ ap->rval = ap->firstblock;
}
-#else
- if (rt)
- ap->rval = 0;
-#endif /* __KERNEL__ */
- else if (nullfb)
- ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
- else
- ap->rval = ap->firstblock;
+
/*
* If allocating at eof, and there's a previous real block,
* try to use it's last block as our starting point.
@@ -2598,11 +2670,12 @@ xfs_bmap_alloc(
args.total = ap->total;
args.minlen = ap->minlen;
}
- if (ap->ip->i_d.di_extsize) {
+ if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+ (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
args.prod = ap->ip->i_d.di_extsize;
if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
args.mod = (xfs_extlen_t)(args.prod - args.mod);
- } else if (mp->m_sb.sb_blocksize >= NBPP) {
+ } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
args.prod = 1;
args.mod = 0;
} else {
@@ -3580,14 +3653,16 @@ xfs_bmap_search_extents(
ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
lastxp, gotp, prevp);
- rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME;
- if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) {
+ rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+ if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
"start_block : %llx start_off : %llx blkcnt : %llx "
"extent-state : %x \n",
- (ip->i_mount)->m_fsname,(long long)ip->i_ino,
- gotp->br_startblock, gotp->br_startoff,
- gotp->br_blockcount,gotp->br_state);
+ (ip->i_mount)->m_fsname, (long long)ip->i_ino,
+ (unsigned long long)gotp->br_startblock,
+ (unsigned long long)gotp->br_startoff,
+ (unsigned long long)gotp->br_blockcount,
+ gotp->br_state);
}
return ep;
}
@@ -3875,7 +3950,7 @@ xfs_bmap_add_attrfork(
ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
if (!ip->i_d.di_forkoff)
ip->i_d.di_forkoff = mp->m_attroffset >> 3;
- else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
+ else if (mp->m_flags & XFS_MOUNT_ATTR2)
version = 2;
break;
default:
@@ -4023,13 +4098,13 @@ xfs_bmap_compute_maxlevels(
*/
if (whichfork == XFS_DATA_FORK) {
maxleafents = MAXEXTNUM;
- sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
- mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+ sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
+ XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;
} else {
maxleafents = MAXAEXTNUM;
- sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
- mp->m_sb.sb_inodesize - mp->m_attroffset :
- XFS_BMDR_SPACE_CALC(MINABTPTRS);
+ sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
+ XFS_BMDR_SPACE_CALC(MINABTPTRS) :
+ mp->m_sb.sb_inodesize - mp->m_attroffset;
}
maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
minleafrecs = mp->m_bmap_dmnr[0];
@@ -4418,8 +4493,8 @@ xfs_bmap_read_extents(
num_recs = be16_to_cpu(block->bb_numrecs);
if (unlikely(i + num_recs > room)) {
ASSERT(i + num_recs <= room);
- xfs_fs_cmn_err(CE_WARN, ip->i_mount,
- "corrupt dinode %Lu, (btree extents). Unmount and run xfs_repair.",
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt dinode %Lu, (btree extents).",
(unsigned long long) ip->i_ino);
XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",
XFS_ERRLEVEL_LOW,
@@ -4590,6 +4665,7 @@ xfs_bmapi(
char contig; /* allocation must be one extent */
char delay; /* this request is for delayed alloc */
char exact; /* don't do all of wasdelayed extent */
+ char convert; /* unwritten extent I/O completion */
xfs_bmbt_rec_t *ep; /* extent list entry pointer */
int error; /* error return */
xfs_bmbt_irec_t got; /* current extent list record */
@@ -4643,7 +4719,7 @@ xfs_bmapi(
}
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- rt = XFS_IS_REALTIME_INODE(ip);
+ rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(ifp->if_ext_max ==
XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4654,6 +4730,7 @@ xfs_bmapi(
delay = (flags & XFS_BMAPI_DELAY) != 0;
trim = (flags & XFS_BMAPI_ENTIRE) == 0;
userdata = (flags & XFS_BMAPI_METADATA) == 0;
+ convert = (flags & XFS_BMAPI_CONVERT) != 0;
exact = (flags & XFS_BMAPI_EXACT) != 0;
rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
contig = (flags & XFS_BMAPI_CONTIG) != 0;
@@ -4748,15 +4825,25 @@ xfs_bmapi(
}
minlen = contig ? alen : 1;
if (delay) {
- xfs_extlen_t extsz = 0;
+ xfs_extlen_t extsz;
/* Figure out the extent size, adjust alen */
if (rt) {
if (!(extsz = ip->i_d.di_extsize))
extsz = mp->m_sb.sb_rextsize;
- alen = roundup(alen, extsz);
- extsz = alen / mp->m_sb.sb_rextsize;
+ } else {
+ extsz = ip->i_d.di_extsize;
}
+ if (extsz) {
+ error = xfs_bmap_extsize_align(mp,
+ &got, &prev, extsz,
+ rt, eof, delay, convert,
+ &aoff, &alen);
+ ASSERT(!error);
+ }
+
+ if (rt)
+ extsz = alen / mp->m_sb.sb_rextsize;
/*
* Make a transaction-less quota reservation for
@@ -4785,32 +4872,33 @@ xfs_bmapi(
xfs_bmap_worst_indlen(ip, alen);
ASSERT(indlen > 0);
- if (rt)
+ if (rt) {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FREXTENTS,
-(extsz), rsvd);
- else
+ } else {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FDBLOCKS,
-(alen), rsvd);
+ }
if (!error) {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FDBLOCKS,
-(indlen), rsvd);
- if (error && rt) {
- xfs_mod_incore_sb(ip->i_mount,
+ if (error && rt)
+ xfs_mod_incore_sb(mp,
XFS_SBS_FREXTENTS,
extsz, rsvd);
- } else if (error) {
- xfs_mod_incore_sb(ip->i_mount,
+ else if (error)
+ xfs_mod_incore_sb(mp,
XFS_SBS_FDBLOCKS,
alen, rsvd);
- }
}
if (error) {
- if (XFS_IS_QUOTA_ON(ip->i_mount))
+ if (XFS_IS_QUOTA_ON(mp))
/* unreserve the blocks now */
+ (void)
XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
mp, NULL, ip,
(long)alen, 0, rt ?
@@ -4849,6 +4937,7 @@ xfs_bmapi(
bma.firstblock = *firstblock;
bma.alen = alen;
bma.off = aoff;
+ bma.conv = convert;
bma.wasdel = wasdelay;
bma.minlen = minlen;
bma.low = flist->xbf_low;
@@ -5270,8 +5359,7 @@ xfs_bunmapi(
return 0;
}
XFS_STATS_INC(xs_blk_unmap);
- isrt = (whichfork == XFS_DATA_FORK) &&
- (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
+ isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
start = bno;
bno = start + len - 1;
ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
@@ -5443,7 +5531,7 @@ xfs_bunmapi(
}
if (wasdel) {
ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
- /* Update realtim/data freespace, unreserve quota */
+ /* Update realtime/data freespace, unreserve quota */
if (isrt) {
xfs_filblks_t rtexts;
@@ -5451,14 +5539,14 @@ xfs_bunmapi(
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
(int)rtexts, rsvd);
- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
- -((long)del.br_blockcount), 0,
+ (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+ NULL, ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
(int)del.br_blockcount, rsvd);
- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
- -((long)del.br_blockcount), 0,
+ (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+ NULL, ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
}
ip->i_delayed_blks -= del.br_blockcount;
@@ -5652,7 +5740,9 @@ xfs_getbmap(
ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
return XFS_ERROR(EINVAL);
if (whichfork == XFS_DATA_FORK) {
- if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {
+ if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
+ (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
+ ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
prealloced = 1;
fixlen = XFS_MAXIOFFSET(mp);
} else {
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 2e0717a0130..12cc63dfc2c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -62,6 +62,10 @@ typedef struct xfs_bmap_free
#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */
/* combine contig. space */
#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */
+/* XFS_BMAPI_DIRECT_IO 0x800 */
+#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */
+ /* need write cache flushing and no */
+ /* additional allocation alignments */
#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w)
static inline int xfs_bmapi_aflag(int w)
@@ -101,7 +105,8 @@ typedef struct xfs_bmalloca {
char wasdel; /* replacing a delayed allocation */
char userdata;/* set if is user data */
char low; /* low on space, using seq'l ags */
- char aeof; /* allocated space at eof */
+ char aeof; /* allocated space at eof */
+ char conv; /* overwriting unwritten extents */
} xfs_bmalloca_t;
#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 328a528b926..f57cc9ac875 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -57,7 +57,7 @@ struct xfs_mount_args {
/*
* XFS mount option flags -- args->flags1
*/
-#define XFSMNT_COMPAT_ATTR 0x00000001 /* do not use ATTR2 format */
+#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */
#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount
* compatible */
#define XFSMNT_INO64 0x00000004 /* move inode numbers up
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 070259a4254..c6191d00ad2 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -60,8 +60,6 @@ xfs_swapext(
xfs_bstat_t *sbp;
struct file *fp = NULL, *tfp = NULL;
vnode_t *vp, *tvp;
- bhv_desc_t *bdp, *tbdp;
- vn_bhv_head_t *bhp, *tbhp;
static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
int ilf_fields, tilf_fields;
int error = 0;
@@ -90,13 +88,10 @@ xfs_swapext(
goto error0;
}
- bhp = VN_BHV_HEAD(vp);
- bdp = vn_bhv_lookup(bhp, &xfs_vnodeops);
- if (bdp == NULL) {
+ ip = xfs_vtoi(vp);
+ if (ip == NULL) {
error = XFS_ERROR(EBADF);
goto error0;
- } else {
- ip = XFS_BHVTOI(bdp);
}
if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
@@ -105,13 +100,10 @@ xfs_swapext(
goto error0;
}
- tbhp = VN_BHV_HEAD(tvp);
- tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops);
- if (tbdp == NULL) {
+ tip = xfs_vtoi(tvp);
+ if (tip == NULL) {
error = XFS_ERROR(EBADF);
goto error0;
- } else {
- tip = XFS_BHVTOI(tbdp);
}
if (ip->i_mount != tip->i_mount) {
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c5a0e537ff1..79d0d9e1fba 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -199,10 +199,16 @@ typedef enum xfs_dinode_fmt
#define XFS_DFORK_DSIZE(dip,mp) \
XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_DSIZE_HOST(dip,mp) \
+ XFS_CFORK_DSIZE(&(dip)->di_core, mp)
#define XFS_DFORK_ASIZE(dip,mp) \
XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_ASIZE_HOST(dip,mp) \
+ XFS_CFORK_ASIZE(&(dip)->di_core, mp)
#define XFS_DFORK_SIZE(dip,mp,w) \
XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
+#define XFS_DFORK_SIZE_HOST(dip,mp,w) \
+ XFS_CFORK_SIZE(&(dip)->di_core, mp, w)
#define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core)
#define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core)
@@ -216,6 +222,7 @@ typedef enum xfs_dinode_fmt
#define XFS_CFORK_FMT_SET(dcp,w,n) \
((w) == XFS_DATA_FORK ? \
((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n)))
+#define XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w)
#define XFS_CFORK_NEXTENTS_DISK(dcp,w) \
((w) == XFS_DATA_FORK ? \
@@ -223,13 +230,13 @@ typedef enum xfs_dinode_fmt
INT_GET((dcp)->di_anextents, ARCH_CONVERT))
#define XFS_CFORK_NEXTENTS(dcp,w) \
((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents)
+#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
+#define XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w)
#define XFS_CFORK_NEXT_SET(dcp,w,n) \
((w) == XFS_DATA_FORK ? \
((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n)))
-#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
-
#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp))
/*
@@ -246,8 +253,10 @@ typedef enum xfs_dinode_fmt
#define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */
#define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */
#define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */
-#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */
-#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
+#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */
+#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
+#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
+#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -259,11 +268,14 @@ typedef enum xfs_dinode_fmt
#define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT)
#define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT)
#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
+#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
+#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
#define XFS_DIFLAG_ANY \
(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
- XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS)
+ XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
+ XFS_DIFLAG_EXTSZINHERIT)
#endif /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 3dd30391f55..bb87d2a700a 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -176,7 +176,7 @@ xfs_dir_mount(xfs_mount_t *mp)
uint shortcount, leafcount, count;
mp->m_dirversion = 1;
- if (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) {
+ if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
shortcount = (mp->m_attroffset -
(uint)sizeof(xfs_dir_sf_hdr_t)) /
(uint)sizeof(xfs_dir_sf_entry_t);
diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h
index 488defe86ba..8cc8afb9f6c 100644
--- a/fs/xfs/xfs_dir.h
+++ b/fs/xfs/xfs_dir.h
@@ -135,6 +135,8 @@ void xfs_dir_startup(void); /* called exactly once */
((mp)->m_dirops.xd_shortform_to_single(args))
#define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1)
+#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2)
extern xfs_dirops_t xfsv1_dirops;
+extern xfs_dirops_t xfsv2_dirops;
#endif /* __XFS_DIR_H__ */
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 7e24ffeda9e..3158f5dc431 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -72,9 +72,6 @@ typedef struct xfs_dir2_put_args {
struct uio *uio; /* uio control structure */
} xfs_dir2_put_args_t;
-#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2)
-extern xfs_dirops_t xfsv2_dirops;
-
/*
* Other interfaces used by the rest of the dir v2 code.
*/
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index ab6b09eef9a..eb8cd9a4667 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -67,34 +67,38 @@ struct xfs_trans;
*/
#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */
+typedef struct xfs_dir_leaf_map { /* RLE map of free bytes */
+ __uint16_t base; /* base of free region */
+ __uint16_t size; /* run length of free region */
+} xfs_dir_leaf_map_t;
+
+typedef struct xfs_dir_leaf_hdr { /* constant-structure header block */
+ xfs_da_blkinfo_t info; /* block type, links, etc. */
+ __uint16_t count; /* count of active leaf_entry's */
+ __uint16_t namebytes; /* num bytes of name strings stored */
+ __uint16_t firstused; /* first used byte in name area */
+ __uint8_t holes; /* != 0 if blk needs compaction */
+ __uint8_t pad1;
+ xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];
+} xfs_dir_leaf_hdr_t;
+
+typedef struct xfs_dir_leaf_entry { /* sorted on key, not name */
+ xfs_dahash_t hashval; /* hash value of name */
+ __uint16_t nameidx; /* index into buffer of name */
+ __uint8_t namelen; /* length of name string */
+ __uint8_t pad2;
+} xfs_dir_leaf_entry_t;
+
+typedef struct xfs_dir_leaf_name {
+ xfs_dir_ino_t inumber; /* inode number for this key */
+ __uint8_t name[1]; /* name string itself */
+} xfs_dir_leaf_name_t;
+
typedef struct xfs_dir_leafblock {
- struct xfs_dir_leaf_hdr { /* constant-structure header block */
- xfs_da_blkinfo_t info; /* block type, links, etc. */
- __uint16_t count; /* count of active leaf_entry's */
- __uint16_t namebytes; /* num bytes of name strings stored */
- __uint16_t firstused; /* first used byte in name area */
- __uint8_t holes; /* != 0 if blk needs compaction */
- __uint8_t pad1;
- struct xfs_dir_leaf_map {/* RLE map of free bytes */
- __uint16_t base; /* base of free region */
- __uint16_t size; /* run length of free region */
- } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */
- } hdr;
- struct xfs_dir_leaf_entry { /* sorted on key, not name */
- xfs_dahash_t hashval; /* hash value of name */
- __uint16_t nameidx; /* index into buffer of name */
- __uint8_t namelen; /* length of name string */
- __uint8_t pad2;
- } entries[1]; /* var sized array */
- struct xfs_dir_leaf_name {
- xfs_dir_ino_t inumber; /* inode number for this key */
- __uint8_t name[1]; /* name string itself */
- } namelist[1]; /* grows from bottom of buf */
+ xfs_dir_leaf_hdr_t hdr; /* constant-structure header block */
+ xfs_dir_leaf_entry_t entries[1]; /* var sized array */
+ xfs_dir_leaf_name_t namelist[1]; /* grows from bottom of buf */
} xfs_dir_leafblock_t;
-typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;
-typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;
-typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;
-typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;
/*
* Length of name for which a 512-byte block filesystem
@@ -126,11 +130,10 @@ typedef union {
#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \
((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
-typedef struct xfs_dir_put_args
-{
+typedef struct xfs_dir_put_args {
xfs_dircook_t cook; /* cookie of (next) entry */
xfs_intino_t ino; /* inode number */
- struct xfs_dirent *dbp; /* buffer pointer */
+ struct xfs_dirent *dbp; /* buffer pointer */
char *name; /* directory entry name */
int namelen; /* length of name */
int done; /* output: set if value was stored */
@@ -138,7 +141,8 @@ typedef struct xfs_dir_put_args
struct uio *uio; /* uio control structure */
} xfs_dir_put_args_t;
-#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) xfs_dir_leaf_entsize_byname(len)
+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) \
+ xfs_dir_leaf_entsize_byname(len)
static inline int xfs_dir_leaf_entsize_byname(int len)
{
return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index d7b6b5d1670..2a21c502401 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -54,7 +54,6 @@ xfs_error_trap(int e)
if (e != xfs_etrap[i])
continue;
cmn_err(CE_NOTE, "xfs_error_trap: error %d", e);
- debug_stop_all_cpus((void *)-1LL);
BUG();
break;
}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 06d8a8426c1..26b8e709a56 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,9 +18,6 @@
#ifndef __XFS_ERROR_H__
#define __XFS_ERROR_H__
-#define prdev(fmt,targ,args...) \
- printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
-
#define XFS_ERECOVER 1 /* Failure to recover log */
#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */
#define XFS_ENOLOGSPACE 3 /* Reservation too large */
@@ -182,8 +179,11 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
struct xfs_mount;
/* PRINTFLIKE4 */
extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
- char *fmt, ...);
+ char *fmt, ...);
/* PRINTFLIKE3 */
extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
+#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
+ xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
+
#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ba096f80f48..14010f1fa82 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -3,15 +3,15 @@
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * GNU Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
+ * You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -65,6 +65,8 @@ struct fsxattr {
#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
+#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
+#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d1236d6f404..163031c1e39 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -540,6 +540,32 @@ xfs_reserve_blocks(
return(0);
}
+void
+xfs_fs_log_dummy(xfs_mount_t *mp)
+{
+ xfs_trans_t *tp;
+ xfs_inode_t *ip;
+
+
+ tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
+ atomic_inc(&mp->m_active_trans);
+ if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
+ xfs_trans_cancel(tp, 0);
+ return;
+ }
+
+ ip = mp->m_rootip;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ihold(tp, ip);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_trans_set_sync(tp);
+ xfs_trans_commit(tp, 0, NULL);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+}
+
int
xfs_fs_goingdown(
xfs_mount_t *mp,
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f32713f14f9..300d0c9d61a 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,5 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
xfs_fsop_resblks_t *outval);
extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern void xfs_fs_log_dummy(xfs_mount_t *mp);
#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fc19eedbd11..8e380a1fb79 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -493,7 +493,6 @@ xfs_iget(
retry:
if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
- bhv_desc_t *bdp;
xfs_inode_t *ip;
vp = LINVFS_GET_VP(inode);
@@ -517,14 +516,12 @@ retry:
* to wait for the inode to go away.
*/
if (is_bad_inode(inode) ||
- ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp),
- &xfs_vnodeops)) == NULL)) {
+ ((ip = xfs_vtoi(vp)) == NULL)) {
iput(inode);
delay(1);
goto retry;
}
- ip = XFS_BHVTOI(bdp);
if (lock_flags != 0)
xfs_ilock(ip, lock_flags);
XFS_STATS_INC(xs_ig_found);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index df0d4572d70..1d7f5a7e063 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -404,9 +404,8 @@ xfs_iformat(
INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) +
INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) {
- xfs_fs_cmn_err(CE_WARN, ip->i_mount,
- "corrupt dinode %Lu, extent total = %d, nblocks = %Lu."
- " Unmount and run xfs_repair.",
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
(unsigned long long)ip->i_ino,
(int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)
+ INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
@@ -418,9 +417,8 @@ xfs_iformat(
}
if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) {
- xfs_fs_cmn_err(CE_WARN, ip->i_mount,
- "corrupt dinode %Lu, forkoff = 0x%x."
- " Unmount and run xfs_repair.",
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt dinode %Lu, forkoff = 0x%x.",
(unsigned long long)ip->i_ino,
(int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -451,8 +449,9 @@ xfs_iformat(
* no local regular files yet
*/
if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) {
- xfs_fs_cmn_err(CE_WARN, ip->i_mount,
- "corrupt inode (local format for regular file) %Lu. Unmount and run xfs_repair.",
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt inode %Lu "
+ "(local format for regular file).",
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat(4)",
XFS_ERRLEVEL_LOW,
@@ -462,8 +461,9 @@ xfs_iformat(
di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
- xfs_fs_cmn_err(CE_WARN, ip->i_mount,
- "corrupt inode %Lu (bad size %Ld for local inode). Unmount and run xfs_repair.",
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt inode %Lu "
+ "(bad size %Ld for local inode).",
(unsigned long long) ip->i_ino,
(long long) di_size);
XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -551,8 +551,9 @@ xfs_iformat_local(
* kmem_alloc() or memcpy() below.
*/
if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
- xfs_fs_cmn_err(CE_WARN, ip->i_mount,
- "corrupt inode %Lu (bad size %d for local fork, size = %d). Unmount and run xfs_repair.",
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt inode %Lu "
+ "(bad size %d for local fork, size = %d).",
(unsigned long long) ip->i_ino, size,
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -610,8 +611,8 @@ xfs_iformat_extents(
* kmem_alloc() or memcpy() below.
*/
if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
- xfs_fs_cmn_err(CE_WARN, ip->i_mount,
- "corrupt inode %Lu ((a)extents = %d). Unmount and run xfs_repair.",
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt inode %Lu ((a)extents = %d).",
(unsigned long long) ip->i_ino, nex);
XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
@@ -692,8 +693,8 @@ xfs_iformat_btree(
|| XFS_BMDR_SPACE_CALC(nrecs) >
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
|| XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
- xfs_fs_cmn_err(CE_WARN, ip->i_mount,
- "corrupt inode %Lu (btree). Unmount and run xfs_repair.",
+ xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+ "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino);
XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
ip->i_mount);
@@ -809,6 +810,10 @@ _xfs_dic2xflags(
flags |= XFS_XFLAG_PROJINHERIT;
if (di_flags & XFS_DIFLAG_NOSYMLINKS)
flags |= XFS_XFLAG_NOSYMLINKS;
+ if (di_flags & XFS_DIFLAG_EXTSIZE)
+ flags |= XFS_XFLAG_EXTSIZE;
+ if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
+ flags |= XFS_XFLAG_EXTSZINHERIT;
}
return flags;
@@ -1192,11 +1197,19 @@ xfs_ialloc(
if ((mode & S_IFMT) == S_IFDIR) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
- } else {
+ if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ ip->i_d.di_extsize = pip->i_d.di_extsize;
+ }
+ } else if ((mode & S_IFMT) == S_IFREG) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
di_flags |= XFS_DIFLAG_REALTIME;
ip->i_iocore.io_flags |= XFS_IOCORE_RT;
}
+ if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ ip->i_d.di_extsize = pip->i_d.di_extsize;
+ }
}
if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
xfs_inherit_noatime)
@@ -1262,7 +1275,7 @@ xfs_isize_check(
if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
return;
- if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME )
+ if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
return;
nimaps = 2;
@@ -1765,22 +1778,19 @@ xfs_igrow_start(
xfs_fsize_t new_size,
cred_t *credp)
{
- xfs_fsize_t isize;
int error;
ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
ASSERT(new_size > ip->i_d.di_size);
- error = 0;
- isize = ip->i_d.di_size;
/*
* Zero any pages that may have been created by
* xfs_write_file() beyond the end of the file
* and any blocks between the old and new file sizes.
*/
- error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize,
- new_size);
+ error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
+ ip->i_d.di_size, new_size);
return error;
}
@@ -3355,6 +3365,11 @@ xfs_iflush_int(
ip->i_update_core = 0;
SYNCHRONIZE();
+ /*
+ * Make sure to get the latest atime from the Linux inode.
+ */
+ xfs_synchronize_atime(ip);
+
if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC,
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 124d30e6143..1cfbcf18ce8 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -436,6 +436,10 @@ void xfs_ichgtime(xfs_inode_t *, int);
xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
void xfs_lock_inodes(xfs_inode_t **, int, int, uint);
+xfs_inode_t *xfs_vtoi(struct vnode *vp);
+
+void xfs_synchronize_atime(xfs_inode_t *);
+
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
#ifdef DEBUG
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7f3363c621e..36aa1fcb90a 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -271,6 +271,11 @@ xfs_inode_item_format(
if (ip->i_update_size)
ip->i_update_size = 0;
+ /*
+ * Make sure to get the latest atime from the Linux inode.
+ */
+ xfs_synchronize_atime(ip);
+
vecp->i_addr = (xfs_caddr_t)&ip->i_d;
vecp->i_len = sizeof(xfs_dinode_core_t);
XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
@@ -603,7 +608,7 @@ xfs_inode_item_trylock(
if (iip->ili_pushbuf_flag == 0) {
iip->ili_pushbuf_flag = 1;
#ifdef DEBUG
- iip->ili_push_owner = get_thread_id();
+ iip->ili_push_owner = current_pid();
#endif
/*
* Inode is left locked in shared mode.
@@ -782,7 +787,7 @@ xfs_inode_item_pushbuf(
* trying to duplicate our effort.
*/
ASSERT(iip->ili_pushbuf_flag != 0);
- ASSERT(iip->ili_push_owner == get_thread_id());
+ ASSERT(iip->ili_push_owner == current_pid());
/*
* If flushlock isn't locked anymore, chances are that the
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ca7afc83a89..788917f355c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -262,7 +262,7 @@ phase2:
case BMAPI_WRITE:
/* If we found an extent, return it */
if (nimaps &&
- (imap.br_startblock != HOLESTARTBLOCK) &&
+ (imap.br_startblock != HOLESTARTBLOCK) &&
(imap.br_startblock != DELAYSTARTBLOCK)) {
xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
offset, count, iomapp, &imap, flags);
@@ -317,6 +317,58 @@ out:
}
STATIC int
+xfs_iomap_eof_align_last_fsb(
+ xfs_mount_t *mp,
+ xfs_iocore_t *io,
+ xfs_fsize_t isize,
+ xfs_extlen_t extsize,
+ xfs_fileoff_t *last_fsb)
+{
+ xfs_fileoff_t new_last_fsb = 0;
+ xfs_extlen_t align;
+ int eof, error;
+
+ if (io->io_flags & XFS_IOCORE_RT)
+ ;
+ /*
+ * If mounted with the "-o swalloc" option, roundup the allocation
+ * request to a stripe width boundary if the file size is >=
+ * stripe width and we are allocating past the allocation eof.
+ */
+ else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
+ (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
+ new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
+ /*
+ * Roundup the allocation request to a stripe unit (m_dalign) boundary
+ * if the file size is >= stripe unit size, and we are allocating past
+ * the allocation eof.
+ */
+ else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
+ new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
+
+ /*
+ * Always round up the allocation request to an extent boundary
+ * (when file on a real-time subvolume or has di_extsize hint).
+ */
+ if (extsize) {
+ if (new_last_fsb)
+ align = roundup_64(new_last_fsb, extsize);
+ else
+ align = extsize;
+ new_last_fsb = roundup_64(*last_fsb, align);
+ }
+
+ if (new_last_fsb) {
+ error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
+ if (error)
+ return error;
+ if (eof)
+ *last_fsb = new_last_fsb;
+ }
+ return 0;
+}
+
+STATIC int
xfs_flush_space(
xfs_inode_t *ip,
int *fsynced,
@@ -362,19 +414,20 @@ xfs_iomap_write_direct(
xfs_iocore_t *io = &ip->i_iocore;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t last_fsb;
- xfs_filblks_t count_fsb;
+ xfs_filblks_t count_fsb, resaligned;
xfs_fsblock_t firstfsb;
+ xfs_extlen_t extsz, temp;
+ xfs_fsize_t isize;
int nimaps;
- int error;
int bmapi_flag;
int quota_flag;
int rt;
xfs_trans_t *tp;
xfs_bmbt_irec_t imap;
xfs_bmap_free_t free_list;
- xfs_filblks_t qblocks, resblks;
+ uint qblocks, resblks, resrtextents;
int committed;
- int resrtextents;
+ int error;
/*
* Make sure that the dquots are there. This doesn't hold
@@ -384,37 +437,52 @@ xfs_iomap_write_direct(
if (error)
return XFS_ERROR(error);
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
- count_fsb = last_fsb - offset_fsb;
- if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {
- xfs_fileoff_t map_last_fsb;
-
- map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
- if (map_last_fsb < last_fsb) {
- last_fsb = map_last_fsb;
- count_fsb = last_fsb - offset_fsb;
- }
- ASSERT(count_fsb > 0);
+ rt = XFS_IS_REALTIME_INODE(ip);
+ if (unlikely(rt)) {
+ if (!(extsz = ip->i_d.di_extsize))
+ extsz = mp->m_sb.sb_rextsize;
+ } else {
+ extsz = ip->i_d.di_extsize;
}
- /*
- * Determine if reserving space on the data or realtime partition.
- */
- if ((rt = XFS_IS_REALTIME_INODE(ip))) {
- xfs_extlen_t extsz;
+ isize = ip->i_d.di_size;
+ if (io->io_new_size > isize)
+ isize = io->io_new_size;
- if (!(extsz = ip->i_d.di_extsize))
- extsz = mp->m_sb.sb_rextsize;
- resrtextents = qblocks = (count_fsb + extsz - 1);
- do_div(resrtextents, mp->m_sb.sb_rextsize);
- resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
- quota_flag = XFS_QMOPT_RES_RTBLKS;
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+ if ((offset + count) > isize) {
+ error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+ &last_fsb);
+ if (error)
+ goto error_out;
} else {
- resrtextents = 0;
- resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
- quota_flag = XFS_QMOPT_RES_REGBLKS;
+ if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
+ last_fsb = MIN(last_fsb, (xfs_fileoff_t)
+ ret_imap->br_blockcount +
+ ret_imap->br_startoff);
}
+ count_fsb = last_fsb - offset_fsb;
+ ASSERT(count_fsb > 0);
+
+ resaligned = count_fsb;
+ if (unlikely(extsz)) {
+ if ((temp = do_mod(offset_fsb, extsz)))
+ resaligned += temp;
+ if ((temp = do_mod(resaligned, extsz)))
+ resaligned += extsz - temp;
+ }
+
+ if (unlikely(rt)) {
+ resrtextents = qblocks = resaligned;
+ resrtextents /= mp->m_sb.sb_rextsize;
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+ quota_flag = XFS_QMOPT_RES_RTBLKS;
+ } else {
+ resrtextents = 0;
+ resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+ quota_flag = XFS_QMOPT_RES_REGBLKS;
+ }
/*
* Allocate and setup the transaction
@@ -425,7 +493,6 @@ xfs_iomap_write_direct(
XFS_WRITE_LOG_RES(mp), resrtextents,
XFS_TRANS_PERM_LOG_RES,
XFS_WRITE_LOG_COUNT);
-
/*
* Check for running out of space, note: need lock to return
*/
@@ -435,20 +502,20 @@ xfs_iomap_write_direct(
if (error)
goto error_out;
- if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {
- error = (EDQUOT);
+ error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+ qblocks, 0, quota_flag);
+ if (error)
goto error1;
- }
- bmapi_flag = XFS_BMAPI_WRITE;
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
- if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))
+ bmapi_flag = XFS_BMAPI_WRITE;
+ if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
bmapi_flag |= XFS_BMAPI_PREALLOC;
/*
- * Issue the bmapi() call to allocate the blocks
+ * Issue the xfs_bmapi() call to allocate the blocks
*/
XFS_BMAP_INIT(&free_list, &firstfsb);
nimaps = 1;
@@ -483,8 +550,10 @@ xfs_iomap_write_direct(
"extent-state : %x \n",
(ip->i_mount)->m_fsname,
(long long)ip->i_ino,
- ret_imap->br_startblock, ret_imap->br_startoff,
- ret_imap->br_blockcount,ret_imap->br_state);
+ (unsigned long long)ret_imap->br_startblock,
+ (unsigned long long)ret_imap->br_startoff,
+ (unsigned long long)ret_imap->br_blockcount,
+ ret_imap->br_state);
}
return 0;
@@ -500,6 +569,63 @@ error_out:
return XFS_ERROR(error);
}
+/*
+ * If the caller is doing a write at the end of the file,
+ * then extend the allocation out to the file system's write
+ * iosize. We clean up any extra space left over when the
+ * file is closed in xfs_inactive().
+ *
+ * For sync writes, we are flushing delayed allocate space to
+ * try to make additional space available for allocation near
+ * the filesystem full boundary - preallocation hurts in that
+ * situation, of course.
+ */
+STATIC int
+xfs_iomap_eof_want_preallocate(
+ xfs_mount_t *mp,
+ xfs_iocore_t *io,
+ xfs_fsize_t isize,
+ xfs_off_t offset,
+ size_t count,
+ int ioflag,
+ xfs_bmbt_irec_t *imap,
+ int nimaps,
+ int *prealloc)
+{
+ xfs_fileoff_t start_fsb;
+ xfs_filblks_t count_fsb;
+ xfs_fsblock_t firstblock;
+ int n, error, imaps;
+
+ *prealloc = 0;
+ if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
+ return 0;
+
+ /*
+ * If there are any real blocks past eof, then don't
+ * do any speculative allocation.
+ */
+ start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
+ count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+ while (count_fsb > 0) {
+ imaps = nimaps;
+ firstblock = NULLFSBLOCK;
+ error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
+ 0, &firstblock, 0, imap, &imaps, NULL);
+ if (error)
+ return error;
+ for (n = 0; n < imaps; n++) {
+ if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
+ (imap[n].br_startblock != DELAYSTARTBLOCK))
+ return 0;
+ start_fsb += imap[n].br_blockcount;
+ count_fsb -= imap[n].br_blockcount;
+ }
+ }
+ *prealloc = 1;
+ return 0;
+}
+
int
xfs_iomap_write_delay(
xfs_inode_t *ip,
@@ -513,13 +639,15 @@ xfs_iomap_write_delay(
xfs_iocore_t *io = &ip->i_iocore;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t last_fsb;
- xfs_fsize_t isize;
+ xfs_off_t aligned_offset;
+ xfs_fileoff_t ioalign;
xfs_fsblock_t firstblock;
+ xfs_extlen_t extsz;
+ xfs_fsize_t isize;
int nimaps;
- int error;
xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
- int aeof;
- int fsynced = 0;
+ int prealloc, fsynced = 0;
+ int error;
ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
@@ -527,152 +655,57 @@ xfs_iomap_write_delay(
* Make sure that the dquots are there. This doesn't hold
* the ilock across a disk read.
*/
-
error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
if (error)
return XFS_ERROR(error);
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ if (!(extsz = ip->i_d.di_extsize))
+ extsz = mp->m_sb.sb_rextsize;
+ } else {
+ extsz = ip->i_d.di_extsize;
+ }
+
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
retry:
isize = ip->i_d.di_size;
- if (io->io_new_size > isize) {
+ if (io->io_new_size > isize)
isize = io->io_new_size;
- }
- aeof = 0;
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
- /*
- * If the caller is doing a write at the end of the file,
- * then extend the allocation (and the buffer used for the write)
- * out to the file system's write iosize. We clean up any extra
- * space left over when the file is closed in xfs_inactive().
- *
- * For sync writes, we are flushing delayed allocate space to
- * try to make additional space available for allocation near
- * the filesystem full boundary - preallocation hurts in that
- * situation, of course.
- */
- if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
- xfs_off_t aligned_offset;
- xfs_filblks_t count_fsb;
- unsigned int iosize;
- xfs_fileoff_t ioalign;
- int n;
- xfs_fileoff_t start_fsb;
+ error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
+ ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
+ if (error)
+ return error;
- /*
- * If there are any real blocks past eof, then don't
- * do any speculative allocation.
- */
- start_fsb = XFS_B_TO_FSBT(mp,
- ((xfs_ufsize_t)(offset + count - 1)));
- count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
- while (count_fsb > 0) {
- nimaps = XFS_WRITE_IMAPS;
- error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
- 0, &firstblock, 0, imap, &nimaps, NULL);
- if (error) {
- return error;
- }
- for (n = 0; n < nimaps; n++) {
- if ( !(io->io_flags & XFS_IOCORE_RT) &&
- !imap[n].br_startblock) {
- cmn_err(CE_PANIC,"Access to block "
- "zero: fs <%s> inode: %lld "
- "start_block : %llx start_off "
- ": %llx blkcnt : %llx "
- "extent-state : %x \n",
- (ip->i_mount)->m_fsname,
- (long long)ip->i_ino,
- imap[n].br_startblock,
- imap[n].br_startoff,
- imap[n].br_blockcount,
- imap[n].br_state);
- }
- if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
- (imap[n].br_startblock != DELAYSTARTBLOCK)) {
- goto write_map;
- }
- start_fsb += imap[n].br_blockcount;
- count_fsb -= imap[n].br_blockcount;
- }
- }
- iosize = mp->m_writeio_blocks;
+ if (prealloc) {
aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
- last_fsb = ioalign + iosize;
- aeof = 1;
+ last_fsb = ioalign + mp->m_writeio_blocks;
+ } else {
+ last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
}
-write_map:
- nimaps = XFS_WRITE_IMAPS;
- firstblock = NULLFSBLOCK;
- /*
- * If mounted with the "-o swalloc" option, roundup the allocation
- * request to a stripe width boundary if the file size is >=
- * stripe width and we are allocating past the allocation eof.
- */
- if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth
- && (mp->m_flags & XFS_MOUNT_SWALLOC)
- && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) {
- int eof;
- xfs_fileoff_t new_last_fsb;
-
- new_last_fsb = roundup_64(last_fsb, mp->m_swidth);
- error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
- if (error) {
- return error;
- }
- if (eof) {
- last_fsb = new_last_fsb;
- }
- /*
- * Roundup the allocation request to a stripe unit (m_dalign) boundary
- * if the file size is >= stripe unit size, and we are allocating past
- * the allocation eof.
- */
- } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign &&
- (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {
- int eof;
- xfs_fileoff_t new_last_fsb;
- new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
- error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
- if (error) {
- return error;
- }
- if (eof) {
- last_fsb = new_last_fsb;
- }
- /*
- * Round up the allocation request to a real-time extent boundary
- * if the file is on the real-time subvolume.
- */
- } else if (io->io_flags & XFS_IOCORE_RT && aeof) {
- int eof;
- xfs_fileoff_t new_last_fsb;
-
- new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize);
- error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
- if (error) {
+ if (prealloc || extsz) {
+ error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+ &last_fsb);
+ if (error)
return error;
- }
- if (eof)
- last_fsb = new_last_fsb;
}
+
+ nimaps = XFS_WRITE_IMAPS;
+ firstblock = NULLFSBLOCK;
error = xfs_bmapi(NULL, ip, offset_fsb,
(xfs_filblks_t)(last_fsb - offset_fsb),
XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
&nimaps, NULL);
- /*
- * This can be EDQUOT, if nimaps == 0
- */
- if (error && (error != ENOSPC)) {
+ if (error && (error != ENOSPC))
return XFS_ERROR(error);
- }
+
/*
* If bmapi returned us nothing, and if we didn't get back EDQUOT,
- * then we must have run out of space.
+ * then we must have run out of space - flush delalloc, and retry..
*/
if (nimaps == 0) {
xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
@@ -684,17 +717,21 @@ write_map:
goto retry;
}
- *ret_imap = imap[0];
- *nmaps = 1;
- if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
+ if (!(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld "
"start_block : %llx start_off : %llx blkcnt : %llx "
"extent-state : %x \n",
(ip->i_mount)->m_fsname,
(long long)ip->i_ino,
- ret_imap->br_startblock, ret_imap->br_startoff,
- ret_imap->br_blockcount,ret_imap->br_state);
+ (unsigned long long)ret_imap->br_startblock,
+ (unsigned long long)ret_imap->br_startoff,
+ (unsigned long long)ret_imap->br_blockcount,
+ ret_imap->br_state);
}
+
+ *ret_imap = imap[0];
+ *nmaps = 1;
+
return 0;
}
@@ -820,17 +857,21 @@ xfs_iomap_write_allocate(
*/
for (i = 0; i < nimaps; i++) {
- if ( !(io->io_flags & XFS_IOCORE_RT) &&
- !imap[i].br_startblock) {
+ if (!(io->io_flags & XFS_IOCORE_RT) &&
+ !imap[i].br_startblock) {
cmn_err(CE_PANIC,"Access to block zero: "
"fs <%s> inode: %lld "
- "start_block : %llx start_off : %llx "
+ "start_block : %llx start_off : %llx "
"blkcnt : %llx extent-state : %x \n",
(ip->i_mount)->m_fsname,
(long long)ip->i_ino,
- imap[i].br_startblock,
- imap[i].br_startoff,
- imap[i].br_blockcount,imap[i].br_state);
+ (unsigned long long)
+ imap[i].br_startblock,
+ (unsigned long long)
+ imap[i].br_startoff,
+ (unsigned long long)
+ imap[i].br_blockcount,
+ imap[i].br_state);
}
if ((offset_fsb >= imap[i].br_startoff) &&
(offset_fsb < (imap[i].br_startoff +
@@ -867,17 +908,17 @@ xfs_iomap_write_unwritten(
{
xfs_mount_t *mp = ip->i_mount;
xfs_iocore_t *io = &ip->i_iocore;
- xfs_trans_t *tp;
xfs_fileoff_t offset_fsb;
xfs_filblks_t count_fsb;
xfs_filblks_t numblks_fsb;
- xfs_bmbt_irec_t imap;
+ xfs_fsblock_t firstfsb;
+ int nimaps;
+ xfs_trans_t *tp;
+ xfs_bmbt_irec_t imap;
+ xfs_bmap_free_t free_list;
+ uint resblks;
int committed;
int error;
- int nres;
- int nimaps;
- xfs_fsblock_t firstfsb;
- xfs_bmap_free_t free_list;
xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
&ip->i_iocore, offset, count);
@@ -886,9 +927,9 @@ xfs_iomap_write_unwritten(
count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
- do {
- nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
+ do {
/*
* set up a transaction to convert the range of extents
* from unwritten to real. Do allocations in a loop until
@@ -896,7 +937,7 @@ xfs_iomap_write_unwritten(
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
- error = xfs_trans_reserve(tp, nres,
+ error = xfs_trans_reserve(tp, resblks,
XFS_WRITE_LOG_RES(mp), 0,
XFS_TRANS_PERM_LOG_RES,
XFS_WRITE_LOG_COUNT);
@@ -915,7 +956,7 @@ xfs_iomap_write_unwritten(
XFS_BMAP_INIT(&free_list, &firstfsb);
nimaps = 1;
error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
- XFS_BMAPI_WRITE, &firstfsb,
+ XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
1, &imap, &nimaps, &free_list);
if (error)
goto error_on_bmapi_transaction;
@@ -929,15 +970,17 @@ xfs_iomap_write_unwritten(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
goto error0;
-
+
if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) {
cmn_err(CE_PANIC,"Access to block zero: fs <%s> "
"inode: %lld start_block : %llx start_off : "
"%llx blkcnt : %llx extent-state : %x \n",
(ip->i_mount)->m_fsname,
(long long)ip->i_ino,
- imap.br_startblock,imap.br_startoff,
- imap.br_blockcount,imap.br_state);
+ (unsigned long long)imap.br_startblock,
+ (unsigned long long)imap.br_startoff,
+ (unsigned long long)imap.br_blockcount,
+ imap.br_state);
}
if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f63646ead81..c59450e1be4 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -56,6 +56,7 @@ xfs_bulkstat_one_iget(
{
xfs_dinode_core_t *dic; /* dinode core info pointer */
xfs_inode_t *ip; /* incore inode pointer */
+ vnode_t *vp;
int error;
error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
@@ -72,6 +73,7 @@ xfs_bulkstat_one_iget(
goto out_iput;
}
+ vp = XFS_ITOV(ip);
dic = &ip->i_d;
/* xfs_iget returns the following without needing
@@ -84,8 +86,7 @@ xfs_bulkstat_one_iget(
buf->bs_uid = dic->di_uid;
buf->bs_gid = dic->di_gid;
buf->bs_size = dic->di_size;
- buf->bs_atime.tv_sec = dic->di_atime.t_sec;
- buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
+ vn_atime_to_bstime(vp, &buf->bs_atime);
buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 29af51275ca..3d9a36e7736 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -178,6 +178,83 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
#define xlog_trace_iclog(iclog,state)
#endif /* XFS_LOG_TRACE */
+
+static void
+xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+{
+ if (*qp) {
+ tic->t_next = (*qp);
+ tic->t_prev = (*qp)->t_prev;
+ (*qp)->t_prev->t_next = tic;
+ (*qp)->t_prev = tic;
+ } else {
+ tic->t_prev = tic->t_next = tic;
+ *qp = tic;
+ }
+
+ tic->t_flags |= XLOG_TIC_IN_Q;
+}
+
+static void
+xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+{
+ if (tic == tic->t_next) {
+ *qp = NULL;
+ } else {
+ *qp = tic->t_next;
+ tic->t_next->t_prev = tic->t_prev;
+ tic->t_prev->t_next = tic->t_next;
+ }
+
+ tic->t_next = tic->t_prev = NULL;
+ tic->t_flags &= ~XLOG_TIC_IN_Q;
+}
+
+static void
+xlog_grant_sub_space(struct log *log, int bytes)
+{
+ log->l_grant_write_bytes -= bytes;
+ if (log->l_grant_write_bytes < 0) {
+ log->l_grant_write_bytes += log->l_logsize;
+ log->l_grant_write_cycle--;
+ }
+
+ log->l_grant_reserve_bytes -= bytes;
+ if ((log)->l_grant_reserve_bytes < 0) {
+ log->l_grant_reserve_bytes += log->l_logsize;
+ log->l_grant_reserve_cycle--;
+ }
+
+}
+
+static void
+xlog_grant_add_space_write(struct log *log, int bytes)
+{
+ log->l_grant_write_bytes += bytes;
+ if (log->l_grant_write_bytes > log->l_logsize) {
+ log->l_grant_write_bytes -= log->l_logsize;
+ log->l_grant_write_cycle++;
+ }
+}
+
+static void
+xlog_grant_add_space_reserve(struct log *log, int bytes)
+{
+ log->l_grant_reserve_bytes += bytes;
+ if (log->l_grant_reserve_bytes > log->l_logsize) {
+ log->l_grant_reserve_bytes -= log->l_logsize;
+ log->l_grant_reserve_cycle++;
+ }
+}
+
+static inline void
+xlog_grant_add_space(struct log *log, int bytes)
+{
+ xlog_grant_add_space_write(log, bytes);
+ xlog_grant_add_space_reserve(log, bytes);
+}
+
+
/*
* NOTES:
*
@@ -428,7 +505,7 @@ xfs_log_mount(xfs_mount_t *mp,
if (readonly)
vfsp->vfs_flag &= ~VFS_RDONLY;
- error = xlog_recover(mp->m_log, readonly);
+ error = xlog_recover(mp->m_log);
if (readonly)
vfsp->vfs_flag |= VFS_RDONLY;
@@ -1320,8 +1397,7 @@ xlog_sync(xlog_t *log,
/* move grant heads by roundoff in sync */
s = GRANT_LOCK(log);
- XLOG_GRANT_ADD_SPACE(log, roundoff, 'w');
- XLOG_GRANT_ADD_SPACE(log, roundoff, 'r');
+ xlog_grant_add_space(log, roundoff);
GRANT_UNLOCK(log, s);
/* put cycle number in every block */
@@ -1515,7 +1591,6 @@ xlog_state_finish_copy(xlog_t *log,
* print out info relating to regions written which consume
* the reservation
*/
-#if defined(XFS_LOG_RES_DEBUG)
STATIC void
xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
{
@@ -1605,11 +1680,11 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
ticket->t_res_arr_sum, ticket->t_res_o_flow,
ticket->t_res_num_ophdrs, ophdr_spc,
ticket->t_res_arr_sum +
- ticket->t_res_o_flow + ophdr_spc,
+ ticket->t_res_o_flow + ophdr_spc,
ticket->t_res_num);
for (i = 0; i < ticket->t_res_num; i++) {
- uint r_type = ticket->t_res_arr[i].r_type;
+ uint r_type = ticket->t_res_arr[i].r_type;
cmn_err(CE_WARN,
"region[%u]: %s - %u bytes\n",
i,
@@ -1618,9 +1693,6 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
ticket->t_res_arr[i].r_len);
}
}
-#else
-#define xlog_print_tic_res(mp, ticket)
-#endif
/*
* Write some region out to in-core log
@@ -2389,7 +2461,7 @@ xlog_grant_log_space(xlog_t *log,
/* something is already sleeping; insert new transaction at end */
if (log->l_reserve_headq) {
- XLOG_INS_TICKETQ(log->l_reserve_headq, tic);
+ xlog_ins_ticketq(&log->l_reserve_headq, tic);
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: sleep 1");
/*
@@ -2422,7 +2494,7 @@ redo:
log->l_grant_reserve_bytes);
if (free_bytes < need_bytes) {
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- XLOG_INS_TICKETQ(log->l_reserve_headq, tic);
+ xlog_ins_ticketq(&log->l_reserve_headq, tic);
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: sleep 2");
XFS_STATS_INC(xs_sleep_logspace);
@@ -2439,11 +2511,10 @@ redo:
s = GRANT_LOCK(log);
goto redo;
} else if (tic->t_flags & XLOG_TIC_IN_Q)
- XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+ xlog_del_ticketq(&log->l_reserve_headq, tic);
/* we've got enough space */
- XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w');
- XLOG_GRANT_ADD_SPACE(log, need_bytes, 'r');
+ xlog_grant_add_space(log, need_bytes);
#ifdef DEBUG
tail_lsn = log->l_tail_lsn;
/*
@@ -2464,7 +2535,7 @@ redo:
error_return:
if (tic->t_flags & XLOG_TIC_IN_Q)
- XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+ xlog_del_ticketq(&log->l_reserve_headq, tic);
xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");
/*
* If we are failing, make sure the ticket doesn't have any
@@ -2533,7 +2604,7 @@ xlog_regrant_write_log_space(xlog_t *log,
if (ntic != log->l_write_headq) {
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- XLOG_INS_TICKETQ(log->l_write_headq, tic);
+ xlog_ins_ticketq(&log->l_write_headq, tic);
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: sleep 1");
@@ -2565,7 +2636,7 @@ redo:
log->l_grant_write_bytes);
if (free_bytes < need_bytes) {
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- XLOG_INS_TICKETQ(log->l_write_headq, tic);
+ xlog_ins_ticketq(&log->l_write_headq, tic);
XFS_STATS_INC(xs_sleep_logspace);
sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
@@ -2581,9 +2652,10 @@ redo:
s = GRANT_LOCK(log);
goto redo;
} else if (tic->t_flags & XLOG_TIC_IN_Q)
- XLOG_DEL_TICKETQ(log->l_write_headq, tic);
+ xlog_del_ticketq(&log->l_write_headq, tic);
- XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); /* we've got enough space */
+ /* we've got enough space */
+ xlog_grant_add_space_write(log, need_bytes);
#ifdef DEBUG
tail_lsn = log->l_tail_lsn;
if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
@@ -2600,7 +2672,7 @@ redo:
error_return:
if (tic->t_flags & XLOG_TIC_IN_Q)
- XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+ xlog_del_ticketq(&log->l_reserve_headq, tic);
xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");
/*
* If we are failing, make sure the ticket doesn't have any
@@ -2633,8 +2705,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
ticket->t_cnt--;
s = GRANT_LOCK(log);
- XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
- XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
+ xlog_grant_sub_space(log, ticket->t_curr_res);
ticket->t_curr_res = ticket->t_unit_res;
XLOG_TIC_RESET_RES(ticket);
xlog_trace_loggrant(log, ticket,
@@ -2647,7 +2718,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
return;
}
- XLOG_GRANT_ADD_SPACE(log, ticket->t_unit_res, 'r');
+ xlog_grant_add_space_reserve(log, ticket->t_unit_res);
xlog_trace_loggrant(log, ticket,
"xlog_regrant_reserve_log_space: exit");
xlog_verify_grant_head(log, 0);
@@ -2683,8 +2754,7 @@ xlog_ungrant_log_space(xlog_t *log,
s = GRANT_LOCK(log);
xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");
- XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
- XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
+ xlog_grant_sub_space(log, ticket->t_curr_res);
xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");
@@ -2693,8 +2763,7 @@ xlog_ungrant_log_space(xlog_t *log,
*/
if (ticket->t_cnt > 0) {
ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
- XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'w');
- XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'r');
+ xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
}
xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index f40d4391fcf..4b2ac88dbb8 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -96,7 +96,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
/* Region types for iovec's i_type */
-#if defined(XFS_LOG_RES_DEBUG)
#define XLOG_REG_TYPE_BFORMAT 1
#define XLOG_REG_TYPE_BCHUNK 2
#define XLOG_REG_TYPE_EFI_FORMAT 3
@@ -117,21 +116,13 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
#define XLOG_REG_TYPE_COMMIT 18
#define XLOG_REG_TYPE_TRANSHDR 19
#define XLOG_REG_TYPE_MAX 19
-#endif
-#if defined(XFS_LOG_RES_DEBUG)
#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
-#else
-#define XLOG_VEC_SET_TYPE(vecp, t)
-#endif
-
typedef struct xfs_log_iovec {
xfs_caddr_t i_addr; /* beginning address of region */
int i_len; /* length in bytes of region */
-#if defined(XFS_LOG_RES_DEBUG)
- uint i_type; /* type of region */
-#endif
+ uint i_type; /* type of region */
} xfs_log_iovec_t;
typedef void* xfs_log_ticket_t;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4518b188ade..34bcbf50789 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -253,7 +253,6 @@ typedef __uint32_t xlog_tid_t;
/* Ticket reservation region accounting */
-#if defined(XFS_LOG_RES_DEBUG)
#define XLOG_TIC_LEN_MAX 15
#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
(t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
@@ -278,15 +277,9 @@ typedef __uint32_t xlog_tid_t;
* we don't care about.
*/
typedef struct xlog_res {
- uint r_len;
- uint r_type;
+ uint r_len; /* region length :4 */
+ uint r_type; /* region's transaction type :4 */
} xlog_res_t;
-#else
-#define XLOG_TIC_RESET_RES(t)
-#define XLOG_TIC_ADD_OPHDR(t)
-#define XLOG_TIC_ADD_REGION(t, len, type)
-#endif
-
typedef struct xlog_ticket {
sv_t t_sema; /* sleep on this semaphore : 20 */
@@ -301,14 +294,12 @@ typedef struct xlog_ticket {
char t_flags; /* properties of reservation : 1 */
uint t_trans_type; /* transaction type : 4 */
-#if defined (XFS_LOG_RES_DEBUG)
/* reservation array fields */
uint t_res_num; /* num in array : 4 */
- xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */
uint t_res_num_ophdrs; /* num op hdrs : 4 */
uint t_res_arr_sum; /* array sum : 4 */
uint t_res_o_flow; /* sum overflow : 4 */
-#endif
+ xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : 8 * 15 */
} xlog_ticket_t;
#endif
@@ -494,71 +485,13 @@ typedef struct log {
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
-#define XLOG_GRANT_SUB_SPACE(log,bytes,type) \
- { \
- if (type == 'w') { \
- (log)->l_grant_write_bytes -= (bytes); \
- if ((log)->l_grant_write_bytes < 0) { \
- (log)->l_grant_write_bytes += (log)->l_logsize; \
- (log)->l_grant_write_cycle--; \
- } \
- } else { \
- (log)->l_grant_reserve_bytes -= (bytes); \
- if ((log)->l_grant_reserve_bytes < 0) { \
- (log)->l_grant_reserve_bytes += (log)->l_logsize;\
- (log)->l_grant_reserve_cycle--; \
- } \
- } \
- }
-#define XLOG_GRANT_ADD_SPACE(log,bytes,type) \
- { \
- if (type == 'w') { \
- (log)->l_grant_write_bytes += (bytes); \
- if ((log)->l_grant_write_bytes > (log)->l_logsize) { \
- (log)->l_grant_write_bytes -= (log)->l_logsize; \
- (log)->l_grant_write_cycle++; \
- } \
- } else { \
- (log)->l_grant_reserve_bytes += (bytes); \
- if ((log)->l_grant_reserve_bytes > (log)->l_logsize) { \
- (log)->l_grant_reserve_bytes -= (log)->l_logsize;\
- (log)->l_grant_reserve_cycle++; \
- } \
- } \
- }
-#define XLOG_INS_TICKETQ(q, tic) \
- { \
- if (q) { \
- (tic)->t_next = (q); \
- (tic)->t_prev = (q)->t_prev; \
- (q)->t_prev->t_next = (tic); \
- (q)->t_prev = (tic); \
- } else { \
- (tic)->t_prev = (tic)->t_next = (tic); \
- (q) = (tic); \
- } \
- (tic)->t_flags |= XLOG_TIC_IN_Q; \
- }
-#define XLOG_DEL_TICKETQ(q, tic) \
- { \
- if ((tic) == (tic)->t_next) { \
- (q) = NULL; \
- } else { \
- (q) = (tic)->t_next; \
- (tic)->t_next->t_prev = (tic)->t_prev; \
- (tic)->t_prev->t_next = (tic)->t_next; \
- } \
- (tic)->t_next = (tic)->t_prev = NULL; \
- (tic)->t_flags &= ~XLOG_TIC_IN_Q; \
- }
/* common routines */
extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
extern int xlog_find_tail(xlog_t *log,
xfs_daddr_t *head_blk,
- xfs_daddr_t *tail_blk,
- int readonly);
-extern int xlog_recover(xlog_t *log, int readonly);
+ xfs_daddr_t *tail_blk);
+extern int xlog_recover(xlog_t *log);
extern int xlog_recover_finish(xlog_t *log, int mfsi_flags);
extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
extern void xlog_recover_process_iunlinks(xlog_t *log);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ab7df76806..7d46cbd6a07 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -783,8 +783,7 @@ int
xlog_find_tail(
xlog_t *log,
xfs_daddr_t *head_blk,
- xfs_daddr_t *tail_blk,
- int readonly)
+ xfs_daddr_t *tail_blk)
{
xlog_rec_header_t *rhead;
xlog_op_header_t *op_head;
@@ -2563,10 +2562,12 @@ xlog_recover_do_quotaoff_trans(
/*
* The logitem format's flag tells us if this was user quotaoff,
- * group quotaoff or both.
+ * group/project quotaoff or both.
*/
if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
log->l_quotaoffs_flag |= XFS_DQ_USER;
+ if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
+ log->l_quotaoffs_flag |= XFS_DQ_PROJ;
if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
log->l_quotaoffs_flag |= XFS_DQ_GROUP;
@@ -3890,14 +3891,13 @@ xlog_do_recover(
*/
int
xlog_recover(
- xlog_t *log,
- int readonly)
+ xlog_t *log)
{
xfs_daddr_t head_blk, tail_blk;
int error;
/* find the tail of the log */
- if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly)))
+ if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
return error;
if (tail_blk != head_blk) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 303af86739b..6088e14f84e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,7 +51,7 @@ STATIC int xfs_uuid_mount(xfs_mount_t *);
STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
STATIC void xfs_unmountfs_wait(xfs_mount_t *);
-static struct {
+static const struct {
short offset;
short type; /* 0 = integer
* 1 = binary / string (no translation)
@@ -1077,8 +1077,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
xfs_iflush_all(mp);
- XFS_QM_DQPURGEALL(mp,
- XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
+ XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
/*
* Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3432fd5a398..cd3cf9613a0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -308,7 +308,6 @@ typedef struct xfs_mount {
xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
-#define m_dev m_ddev_targp->pbr_dev
__uint8_t m_dircook_elog; /* log d-cookie entry bits */
__uint8_t m_blkbit_log; /* blocklog + NBBY */
__uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
@@ -393,7 +392,7 @@ typedef struct xfs_mount {
user */
#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment
allocations */
-#define XFS_MOUNT_COMPAT_ATTR (1ULL << 8) /* do not use attr2 format */
+#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */
/* (1ULL << 9) -- currently unused */
#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 4d4e8f4e768..81a05cfd77d 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -243,7 +243,6 @@ xfs_rename(
xfs_inode_t *inodes[4];
int target_ip_dropped = 0; /* dropped target_ip link? */
vnode_t *src_dir_vp;
- bhv_desc_t *target_dir_bdp;
int spaceres;
int target_link_zero = 0;
int num_inodes;
@@ -260,14 +259,12 @@ xfs_rename(
* Find the XFS behavior descriptor for the target directory
* vnode since it was not handed to us.
*/
- target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp),
- &xfs_vnodeops);
- if (target_dir_bdp == NULL) {
+ target_dp = xfs_vtoi(target_dir_vp);
+ if (target_dp == NULL) {
return XFS_ERROR(EXDEV);
}
src_dp = XFS_BHVTOI(src_dir_bdp);
- target_dp = XFS_BHVTOI(target_dir_bdp);
mp = src_dp->i_mount;
if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index c4b20872f07..a59c102cf21 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -238,6 +238,7 @@ xfs_bioerror_relse(
}
return (EIO);
}
+
/*
* Prints out an ALERT message about I/O error.
*/
@@ -252,11 +253,9 @@ xfs_ioerror_alert(
"I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
" (\"%s\") error %d buf count %zd",
(!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
- XFS_BUFTARG_NAME(bp->pb_target),
- (__uint64_t)blkno,
- func,
- XFS_BUF_GETERROR(bp),
- XFS_BUF_COUNT(bp));
+ XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
+ (__uint64_t)blkno, func,
+ XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
}
/*
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 4a17d335f89..bf168a91ddb 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -68,18 +68,6 @@ struct xfs_mount;
(XFS_SB_VERSION_NUMBITS | \
XFS_SB_VERSION_OKREALFBITS | \
XFS_SB_VERSION_OKSASHFBITS)
-#define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2,na,sflag,morebits) \
- (((ia) || (dia) || (extflag) || (dirv2) || (na) || (sflag) || \
- (morebits)) ? \
- (XFS_SB_VERSION_4 | \
- ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \
- ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \
- ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \
- ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0) | \
- ((na) ? XFS_SB_VERSION_LOGV2BIT : 0) | \
- ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \
- ((morebits) ? XFS_SB_VERSION_MOREBITSBIT : 0)) : \
- XFS_SB_VERSION_1)
/*
* There are two words to hold XFS "feature" bits: the original
@@ -105,11 +93,6 @@ struct xfs_mount;
(XFS_SB_VERSION2_OKREALFBITS | \
XFS_SB_VERSION2_OKSASHFBITS )
-/*
- * mkfs macro to set up sb_features2 word
- */
-#define XFS_SB_VERSION2_MKFS(resvd1, sbcntr) 0
-
typedef struct xfs_sb
{
__uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 279e043d732..d3d714e6b32 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1014,6 +1014,7 @@ xfs_trans_cancel(
xfs_log_item_t *lip;
int i;
#endif
+ xfs_mount_t *mp = tp->t_mountp;
/*
* See if the caller is being too lazy to figure out if
@@ -1026,9 +1027,10 @@ xfs_trans_cancel(
* filesystem. This happens in paths where we detect
* corruption and decide to give up.
*/
- if ((tp->t_flags & XFS_TRANS_DIRTY) &&
- !XFS_FORCED_SHUTDOWN(tp->t_mountp))
- xfs_force_shutdown(tp->t_mountp, XFS_CORRUPT_INCORE);
+ if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
+ XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
+ xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+ }
#ifdef DEBUG
if (!(flags & XFS_TRANS_ABORT)) {
licp = &(tp->t_items);
@@ -1040,7 +1042,7 @@ xfs_trans_cancel(
}
lip = lidp->lid_item;
- if (!XFS_FORCED_SHUTDOWN(tp->t_mountp))
+ if (!XFS_FORCED_SHUTDOWN(mp))
ASSERT(!(lip->li_type == XFS_LI_EFD));
}
licp = licp->lic_next;
@@ -1048,7 +1050,7 @@ xfs_trans_cancel(
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+ XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
if (tp->t_ticket) {
if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1057,7 +1059,7 @@ xfs_trans_cancel(
} else {
log_flags = 0;
}
- xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
+ xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
}
/* mark this thread as no longer being in a transaction */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a889963fdd1..d77901c07f6 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -973,7 +973,6 @@ void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
-void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index fefe1d60377..34654ec6ae1 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -55,16 +55,13 @@ xfs_get_dir_entry(
xfs_inode_t **ipp)
{
vnode_t *vp;
- bhv_desc_t *bdp;
vp = VNAME_TO_VNODE(dentry);
- bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
- if (!bdp) {
- *ipp = NULL;
+
+ *ipp = xfs_vtoi(vp);
+ if (!*ipp)
return XFS_ERROR(ENOENT);
- }
VN_HOLD(vp);
- *ipp = XFS_BHVTOI(bdp);
return 0;
}
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 7bdbd991ab1..b6ad370fab3 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -53,6 +53,7 @@
#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_clnt.h"
+#include "xfs_fsops.h"
STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
@@ -290,8 +291,8 @@ xfs_start_flags(
mp->m_flags |= XFS_MOUNT_IDELETE;
if (ap->flags & XFSMNT_DIRSYNC)
mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (ap->flags & XFSMNT_COMPAT_ATTR)
- mp->m_flags |= XFS_MOUNT_COMPAT_ATTR;
+ if (ap->flags & XFSMNT_ATTR2)
+ mp->m_flags |= XFS_MOUNT_ATTR2;
if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
@@ -312,6 +313,8 @@ xfs_start_flags(
mp->m_flags |= XFS_MOUNT_NOUUID;
if (ap->flags & XFSMNT_BARRIER)
mp->m_flags |= XFS_MOUNT_BARRIER;
+ else
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
return 0;
}
@@ -330,10 +333,11 @@ xfs_finish_flags(
/* Fail a mount where the logbuf is smaller then the log stripe */
if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
- if ((ap->logbufsize == -1) &&
+ if ((ap->logbufsize <= 0) &&
(mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
mp->m_logbsize = mp->m_sb.sb_logsunit;
- } else if (ap->logbufsize < mp->m_sb.sb_logsunit) {
+ } else if (ap->logbufsize > 0 &&
+ ap->logbufsize < mp->m_sb.sb_logsunit) {
cmn_err(CE_WARN,
"XFS: logbuf size must be greater than or equal to log stripe size");
return XFS_ERROR(EINVAL);
@@ -347,6 +351,10 @@ xfs_finish_flags(
}
}
+ if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+ mp->m_flags |= XFS_MOUNT_ATTR2;
+ }
+
/*
* prohibit r/w mounts of read-only filesystems
*/
@@ -382,10 +390,6 @@ xfs_finish_flags(
return XFS_ERROR(EINVAL);
}
- if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
- mp->m_flags &= ~XFS_MOUNT_COMPAT_ATTR;
- }
-
return 0;
}
@@ -504,13 +508,13 @@ xfs_mount(
if (error)
goto error2;
+ if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY))
+ xfs_mountfs_check_barriers(mp);
+
error = XFS_IOINIT(vfsp, args, flags);
if (error)
goto error2;
- if ((args->flags & XFSMNT_BARRIER) &&
- !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
- xfs_mountfs_check_barriers(mp);
return 0;
error2:
@@ -655,6 +659,11 @@ xfs_mntupdate(
else
mp->m_flags &= ~XFS_MOUNT_NOATIME;
+ if (args->flags & XFSMNT_BARRIER)
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+ else
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+
if ((vfsp->vfs_flag & VFS_RDONLY) &&
!(*flags & MS_RDONLY)) {
vfsp->vfs_flag &= ~VFS_RDONLY;
@@ -1634,6 +1643,7 @@ xfs_vget(
#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
* unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
@@ -1680,7 +1690,6 @@ xfs_parseargs(
int iosize;
args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
- args->flags |= XFSMNT_COMPAT_ATTR;
#if 0 /* XXX: off by default, until some remaining issues ironed out */
args->flags |= XFSMNT_IDELETE; /* default to on */
@@ -1806,6 +1815,8 @@ xfs_parseargs(
args->flags |= XFSMNT_NOUUID;
} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
args->flags |= XFSMNT_BARRIER;
+ } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+ args->flags &= ~XFSMNT_BARRIER;
} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
args->flags &= ~XFSMNT_IDELETE;
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
@@ -1815,9 +1826,9 @@ xfs_parseargs(
} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
- args->flags &= ~XFSMNT_COMPAT_ATTR;
+ args->flags |= XFSMNT_ATTR2;
} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
- args->flags |= XFSMNT_COMPAT_ATTR;
+ args->flags &= ~XFSMNT_ATTR2;
} else if (!strcmp(this_char, "osyncisdsync")) {
/* no-op, this is now the default */
printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
@@ -1892,7 +1903,6 @@ xfs_showargs(
{ XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
{ XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
{ XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
- { XFS_MOUNT_BARRIER, "," MNTOPT_BARRIER },
{ XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP },
{ 0, NULL }
};
@@ -1914,33 +1924,28 @@ xfs_showargs(
if (mp->m_logbufs > 0)
seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
-
if (mp->m_logbsize > 0)
seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
if (mp->m_logname)
seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
-
if (mp->m_rtname)
seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
if (mp->m_dalign > 0)
seq_printf(m, "," MNTOPT_SUNIT "=%d",
(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-
if (mp->m_swidth > 0)
seq_printf(m, "," MNTOPT_SWIDTH "=%d",
(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
- if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
- seq_printf(m, "," MNTOPT_ATTR2);
-
if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
seq_printf(m, "," MNTOPT_LARGEIO);
+ if (mp->m_flags & XFS_MOUNT_BARRIER)
+ seq_printf(m, "," MNTOPT_BARRIER);
if (!(vfsp->vfs_flag & VFS_32BITINODES))
seq_printf(m, "," MNTOPT_64BITINODE);
-
if (vfsp->vfs_flag & VFS_GRPID)
seq_printf(m, "," MNTOPT_GRPID);
@@ -1959,6 +1964,7 @@ xfs_freeze(
/* Push the superblock and write an unmount record */
xfs_log_unmount_write(mp);
xfs_unmountfs_writesb(mp);
+ xfs_fs_log_dummy(mp);
}
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e92cacde02f..8076cc981e1 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -185,8 +185,7 @@ xfs_getattr(
break;
}
- vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
- vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
+ vn_atime_to_timespec(vp, &vap->va_atime);
vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
@@ -544,24 +543,6 @@ xfs_setattr(
}
/*
- * Can't set extent size unless the file is marked, or
- * about to be marked as a realtime file.
- *
- * This check will be removed when fixed size extents
- * with buffered data writes is implemented.
- *
- */
- if ((mask & XFS_AT_EXTSIZE) &&
- ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
- vap->va_extsize) &&
- (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
- ((mask & XFS_AT_XFLAGS) &&
- (vap->va_xflags & XFS_XFLAG_REALTIME))))) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
-
- /*
* Can't change realtime flag if any extents are allocated.
*/
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
@@ -823,13 +804,17 @@ xfs_setattr(
di_flags |= XFS_DIFLAG_RTINHERIT;
if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
di_flags |= XFS_DIFLAG_NOSYMLINKS;
- } else {
+ if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
if (vap->va_xflags & XFS_XFLAG_REALTIME) {
di_flags |= XFS_DIFLAG_REALTIME;
ip->i_iocore.io_flags |= XFS_IOCORE_RT;
} else {
ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
}
+ if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
+ di_flags |= XFS_DIFLAG_EXTSIZE;
}
ip->i_d.di_flags = di_flags;
}
@@ -999,10 +984,6 @@ xfs_readlink(
goto error_return;
}
- if (!(ioflags & IO_INVIS)) {
- xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
- }
-
/*
* See if the symlink is stored inline.
*/
@@ -1234,7 +1215,8 @@ xfs_inactive_free_eofblocks(
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!error && (nimaps != 0) &&
- (imap.br_startblock != HOLESTARTBLOCK)) {
+ (imap.br_startblock != HOLESTARTBLOCK ||
+ ip->i_delayed_blks)) {
/*
* Attach the dquots to the inode up front.
*/
@@ -1569,9 +1551,11 @@ xfs_release(
if (ip->i_d.di_nlink != 0) {
if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
- ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
+ ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
+ ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
- (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) {
+ (!(ip->i_d.di_flags &
+ (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
if ((error = xfs_inactive_free_eofblocks(mp, ip)))
return (error);
/* Update linux inode block count after free above */
@@ -1628,7 +1612,8 @@ xfs_inactive(
* only one with a reference to the inode.
*/
truncate = ((ip->i_d.di_nlink == 0) &&
- ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) &&
+ ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) ||
+ (ip->i_delayed_blks > 0)) &&
((ip->i_d.di_mode & S_IFMT) == S_IFREG));
mp = ip->i_mount;
@@ -1646,10 +1631,12 @@ xfs_inactive(
if (ip->i_d.di_nlink != 0) {
if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
- ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
- (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
- (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) ||
- (ip->i_delayed_blks != 0))) {
+ ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
+ ip->i_delayed_blks > 0)) &&
+ (ip->i_df.if_flags & XFS_IFEXTENTS) &&
+ (!(ip->i_d.di_flags &
+ (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
+ (ip->i_delayed_blks != 0)))) {
if ((error = xfs_inactive_free_eofblocks(mp, ip)))
return (VN_INACTIVE_CACHE);
/* Update linux inode block count after free above */
@@ -2593,7 +2580,6 @@ xfs_link(
int cancel_flags;
int committed;
vnode_t *target_dir_vp;
- bhv_desc_t *src_bdp;
int resblks;
char *target_name = VNAME(dentry);
int target_namelen;
@@ -2606,8 +2592,7 @@ xfs_link(
if (VN_ISDIR(src_vp))
return XFS_ERROR(EPERM);
- src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
- sip = XFS_BHVTOI(src_bdp);
+ sip = xfs_vtoi(src_vp);
tdp = XFS_BHVTOI(target_dir_bdp);
mp = tdp->i_mount;
if (XFS_FORCED_SHUTDOWN(mp))
@@ -3240,7 +3225,6 @@ xfs_readdir(
xfs_trans_t *tp = NULL;
int error = 0;
uint lock_mode;
- xfs_off_t start_offset;
vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__,
(inst_t *)__return_address);
@@ -3251,11 +3235,7 @@ xfs_readdir(
}
lock_mode = xfs_ilock_map_shared(dp);
- start_offset = uiop->uio_offset;
error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
- if (start_offset != uiop->uio_offset) {
- xfs_ichgtime(dp, XFS_ICHGTIME_ACC);
- }
xfs_iunlock_map_shared(dp, lock_mode);
return error;
}
@@ -3832,7 +3812,12 @@ xfs_reclaim(
vn_iowait(vp);
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
- ASSERT(VN_CACHED(vp) == 0);
+
+ /*
+ * Make sure the atime in the XFS inode is correct before freeing the
+ * Linux inode.
+ */
+ xfs_synchronize_atime(ip);
/* If we have nothing to flush with this inode then complete the
* teardown now, otherwise break the link between the xfs inode
@@ -4002,42 +3987,36 @@ xfs_alloc_file_space(
int alloc_type,
int attr_flags)
{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_off_t count;
xfs_filblks_t allocated_fsb;
xfs_filblks_t allocatesize_fsb;
- int committed;
- xfs_off_t count;
- xfs_filblks_t datablocks;
- int error;
+ xfs_extlen_t extsz, temp;
+ xfs_fileoff_t startoffset_fsb;
xfs_fsblock_t firstfsb;
- xfs_bmap_free_t free_list;
- xfs_bmbt_irec_t *imapp;
- xfs_bmbt_irec_t imaps[1];
- xfs_mount_t *mp;
- int numrtextents;
- int reccount;
- uint resblks;
+ int nimaps;
+ int bmapi_flag;
+ int quota_flag;
int rt;
- int rtextsize;
- xfs_fileoff_t startoffset_fsb;
xfs_trans_t *tp;
- int xfs_bmapi_flags;
+ xfs_bmbt_irec_t imaps[1], *imapp;
+ xfs_bmap_free_t free_list;
+ uint qblocks, resblks, resrtextents;
+ int committed;
+ int error;
vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
- mp = ip->i_mount;
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- /*
- * determine if this is a realtime file
- */
- if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) {
- if (ip->i_d.di_extsize)
- rtextsize = ip->i_d.di_extsize;
- else
- rtextsize = mp->m_sb.sb_rextsize;
- } else
- rtextsize = 0;
+ rt = XFS_IS_REALTIME_INODE(ip);
+ if (unlikely(rt)) {
+ if (!(extsz = ip->i_d.di_extsize))
+ extsz = mp->m_sb.sb_rextsize;
+ } else {
+ extsz = ip->i_d.di_extsize;
+ }
if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
return error;
@@ -4048,8 +4027,8 @@ xfs_alloc_file_space(
count = len;
error = 0;
imapp = &imaps[0];
- reccount = 1;
- xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+ nimaps = 1;
+ bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
allocatesize_fsb = XFS_B_TO_FSB(mp, count);
@@ -4070,43 +4049,51 @@ xfs_alloc_file_space(
}
/*
- * allocate file space until done or until there is an error
+ * Allocate file space until done or until there is an error
*/
retry:
while (allocatesize_fsb && !error) {
+ xfs_fileoff_t s, e;
+
/*
- * determine if reserving space on
- * the data or realtime partition.
+ * Determine space reservations for data/realtime.
*/
- if (rt) {
- xfs_fileoff_t s, e;
-
+ if (unlikely(extsz)) {
s = startoffset_fsb;
- do_div(s, rtextsize);
- s *= rtextsize;
- e = roundup_64(startoffset_fsb + allocatesize_fsb,
- rtextsize);
- numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize;
- datablocks = 0;
+ do_div(s, extsz);
+ s *= extsz;
+ e = startoffset_fsb + allocatesize_fsb;
+ if ((temp = do_mod(startoffset_fsb, extsz)))
+ e += temp;
+ if ((temp = do_mod(e, extsz)))
+ e += extsz - temp;
+ } else {
+ s = 0;
+ e = allocatesize_fsb;
+ }
+
+ if (unlikely(rt)) {
+ resrtextents = qblocks = (uint)(e - s);
+ resrtextents /= mp->m_sb.sb_rextsize;
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+ quota_flag = XFS_QMOPT_RES_RTBLKS;
} else {
- datablocks = allocatesize_fsb;
- numrtextents = 0;
+ resrtextents = 0;
+ resblks = qblocks = \
+ XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
+ quota_flag = XFS_QMOPT_RES_REGBLKS;
}
/*
- * allocate and setup the transaction
+ * Allocate and setup the transaction.
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
- resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
- error = xfs_trans_reserve(tp,
- resblks,
- XFS_WRITE_LOG_RES(mp),
- numrtextents,
+ error = xfs_trans_reserve(tp, resblks,
+ XFS_WRITE_LOG_RES(mp), resrtextents,
XFS_TRANS_PERM_LOG_RES,
XFS_WRITE_LOG_COUNT);
-
/*
- * check for running out of space
+ * Check for running out of space
*/
if (error) {
/*
@@ -4117,8 +4104,8 @@ retry:
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
- ip->i_udquot, ip->i_gdquot, resblks, 0, 0);
+ error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+ qblocks, 0, quota_flag);
if (error)
goto error1;
@@ -4126,19 +4113,19 @@ retry:
xfs_trans_ihold(tp, ip);
/*
- * issue the bmapi() call to allocate the blocks
+ * Issue the xfs_bmapi() call to allocate the blocks
*/
XFS_BMAP_INIT(&free_list, &firstfsb);
error = xfs_bmapi(tp, ip, startoffset_fsb,
- allocatesize_fsb, xfs_bmapi_flags,
- &firstfsb, 0, imapp, &reccount,
+ allocatesize_fsb, bmapi_flag,
+ &firstfsb, 0, imapp, &nimaps,
&free_list);
if (error) {
goto error0;
}
/*
- * complete the transaction
+ * Complete the transaction
*/
error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
if (error) {
@@ -4153,7 +4140,7 @@ retry:
allocated_fsb = imapp->br_blockcount;
- if (reccount == 0) {
+ if (nimaps == 0) {
error = XFS_ERROR(ENOSPC);
break;
}
@@ -4176,9 +4163,11 @@ dmapi_enospc_check:
return error;
- error0:
+error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
xfs_bmap_cancel(&free_list);
- error1:
+ XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+
+error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto dmapi_enospc_check;
@@ -4423,8 +4412,8 @@ xfs_free_file_space(
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
- ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
- XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+ ip->i_udquot, ip->i_gdquot, resblks, 0,
+ XFS_QMOPT_RES_REGBLKS);
if (error)
goto error1;
diff --git a/mm/swap.c b/mm/swap.c
index ee6d71ccfa5..cbb48e721ab 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -384,6 +384,8 @@ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
return pagevec_count(pvec);
}
+EXPORT_SYMBOL(pagevec_lookup);
+
unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
pgoff_t *index, int tag, unsigned nr_pages)
{