From 8ab22b9abb5c55413802e4adc9aa6223324547c3 Mon Sep 17 00:00:00 2001 From: Hisashi Hifumi Date: Mon, 28 Jul 2008 15:46:36 -0700 Subject: vfs: pagecache usage optimization for pagesize!=blocksize When we read some part of a file through pagecache, if there is a pagecache of corresponding index but this page is not uptodate, read IO is issued and this page will be uptodate. I think this is good for pagesize == blocksize environment but there is room for improvement on pagesize != blocksize environment. Because in this case a page can have multiple buffers and even if a page is not uptodate, some buffers can be uptodate. So I suggest that when all buffers which correspond to a part of a file that we want to read are uptodate, use this pagecache and copy data from this pagecache to user buffer even if a page is not uptodate. This can reduce read IO and improve system throughput. I wrote a benchmark program and got result number with this program. This benchmark do: 1: mount and open a test file. 2: create a 512MB file. 3: close a file and umount. 4: mount and again open a test file. 5: pwrite randomly 300000 times on a test file. offset is aligned by IO size(1024bytes). 6: measure time of preading randomly 100000 times on a test file. The result was: 2.6.26 330 sec 2.6.26-patched 226 sec Arch:i386 Filesystem:ext3 Blocksize:1024 bytes Memory: 1GB On ext3/4, a file is written through buffer/block. So random read/write mixed workloads or random read after random write workloads are optimized with this patch under pagesize != blocksize environment. This test result showed this. The benchmark program is as follows: #include #include #include #include #include #include #include #include #include #define LEN 1024 #define LOOP 1024*512 /* 512MB */ main(void) { unsigned long i, offset, filesize; int fd; char buf[LEN]; time_t t1, t2; if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) { perror("cannot mount\n"); exit(1); } memset(buf, 0, LEN); fd = open("/root/test1/testfile", O_CREAT|O_RDWR|O_TRUNC); if (fd < 0) { perror("cannot open file\n"); exit(1); } for (i = 0; i < LOOP; i++) write(fd, buf, LEN); close(fd); if (umount("/root/test1/") < 0) { perror("cannot umount\n"); exit(1); } if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) { perror("cannot mount\n"); exit(1); } fd = open("/root/test1/testfile", O_RDWR); if (fd < 0) { perror("cannot open file\n"); exit(1); } filesize = LEN * LOOP; for (i = 0; i < 300000; i++){ offset = (random() % filesize) & (~(LEN - 1)); pwrite(fd, buf, LEN, offset); } printf("start test\n"); time(&t1); for (i = 0; i < 100000; i++){ offset = (random() % filesize) & (~(LEN - 1)); pread(fd, buf, LEN, offset); } time(&t2); printf("%ld sec\n", t2-t1); close(fd); if (umount("/root/test1/") < 0) { perror("cannot umount\n"); exit(1); } } Signed-off-by: Hisashi Hifumi Cc: Nick Piggin Cc: Christoph Hellwig Cc: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/buffer_head.h') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 82aa36c53ea..50cfe8ceb47 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -205,6 +205,8 @@ void block_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int block_read_full_page(struct page*, get_block_t*); +int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, + unsigned long from); int block_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); -- cgit v1.2.3-70-g09d2 From ca5de404ff036a29b25e9a83f6919c9f606c5841 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 2 Aug 2008 12:02:13 +0200 Subject: fs: rename buffer trylock Like the page lock change, this also requires name change, so convert the raw test_and_set bitop to a trylock. Signed-off-by: Nick Piggin Signed-off-by: Linus Torvalds --- fs/buffer.c | 4 ++-- fs/jbd/commit.c | 2 +- fs/ntfs/aops.c | 2 +- fs/ntfs/compress.c | 2 +- fs/ntfs/mft.c | 4 ++-- fs/reiserfs/inode.c | 2 +- fs/reiserfs/journal.c | 4 ++-- fs/xfs/linux-2.6/xfs_aops.c | 2 +- include/linux/buffer_head.h | 8 ++++++-- 9 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index 4dbe52948e8..38653e36e22 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, */ if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); - } else if (test_set_buffer_locked(bh)) { + } else if (!trylock_buffer(bh)) { redirty_page_for_writepage(wbc, page); continue; } @@ -3000,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) if (rw == SWRITE || rw == SWRITE_SYNC) lock_buffer(bh); - else if (test_set_buffer_locked(bh)) + else if (!trylock_buffer(bh)) continue; if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 81a9ad7177c..ae08c057e75 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -221,7 +221,7 @@ write_out_data: * blocking lock_buffer(). */ if (buffer_dirty(bh)) { - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); /* Write out all data to prevent deadlocks */ diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 00e9ccde8e4..b38f944f066 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1194,7 +1194,7 @@ lock_retry_remap: tbh = bhs[i]; if (!tbh) continue; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); /* The buffer dirty state is now irrelevant, just clean it. */ clear_buffer_dirty(tbh); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 33ff314cc50..9669541d011 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -665,7 +665,7 @@ lock_retry_remap: for (i = 0; i < nr_bhs; i++) { struct buffer_head *tbh = bhs[i]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) continue; if (unlikely(buffer_uptodate(tbh))) { unlock_buffer(tbh); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 790defb847e..17d32ca6bc3 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); @@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 192269698a8..5699171212a 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page, if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); } else { - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { redirty_page_for_writepage(wbc, page); continue; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index ce2208b2711..c21df71943a 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock, jh = JH_ENTRY(list->next); bh = jh->bh; get_bh(bh); - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { if (!buffer_dirty(bh)) { list_move(&jh->list, &tmp); goto loop_next; @@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb, { PROC_INFO_INC(p_s_sb, journal.prepare); - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { if (!wait) return 0; lock_buffer(bh); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index fa73179233a..fa47e43b8b4 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1104,7 +1104,7 @@ xfs_page_state_convert( * that we are writing into for the first time. */ type = IOMAP_NEW; - if (!test_and_set_bit(BH_Lock, &bh->b_state)) { + if (trylock_buffer(bh)) { ASSERT(buffer_mapped(bh)); if (iomap_valid) all_bh = 1; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 50cfe8ceb47..eadaab44015 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -115,7 +115,6 @@ BUFFER_FNS(Uptodate, uptodate) BUFFER_FNS(Dirty, dirty) TAS_BUFFER_FNS(Dirty, dirty) BUFFER_FNS(Lock, locked) -TAS_BUFFER_FNS(Lock, locked) BUFFER_FNS(Req, req) TAS_BUFFER_FNS(Req, req) BUFFER_FNS(Mapped, mapped) @@ -321,10 +320,15 @@ static inline void wait_on_buffer(struct buffer_head *bh) __wait_on_buffer(bh); } +static inline int trylock_buffer(struct buffer_head *bh) +{ + return likely(!test_and_set_bit(BH_Lock, &bh->b_state)); +} + static inline void lock_buffer(struct buffer_head *bh) { might_sleep(); - if (test_set_buffer_locked(bh)) + if (!trylock_buffer(bh)) __lock_buffer(bh); } -- cgit v1.2.3-70-g09d2