diff options
Diffstat (limited to 'fs')
66 files changed, 1099 insertions, 1214 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index c2e7a7ff008..c63a3c8beb7 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -712,7 +712,6 @@ int afs_writeback_all(struct afs_vnode *vnode) .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .for_writepages = 1, .range_cyclic = 1, }; int ret; diff --git a/fs/block_dev.c b/fs/block_dev.c index 3581a4e5394..71e7e03ac34 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -420,7 +420,6 @@ static void bdev_destroy_inode(struct inode *inode) { struct bdev_inode *bdi = BDEV_I(inode); - bdi->bdev.bd_inode_backing_dev_info = NULL; kmem_cache_free(bdev_cachep, bdi); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 15831d5c736..8b819279001 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1600,6 +1600,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); + sb->s_bdi = &fs_info->bdi; /* * we set the i_size on the btree inode to the max possible int. diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d6f0806c682..7b2f401e604 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -740,7 +740,6 @@ int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, .nr_to_write = mapping->nrpages * 2, .range_start = start, .range_end = end, - .for_writepages = 1, }; return btrfs_writepages(mapping, &wbc); } diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 618a60f0388..240cef14fe5 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -106,6 +106,7 @@ struct connection { #define CF_CONNECT_PENDING 3 #define CF_INIT_PENDING 4 #define CF_IS_OTHERCON 5 +#define CF_CLOSE 6 struct list_head writequeue; /* List of outgoing writequeue_entries */ spinlock_t writequeue_lock; int (*rx_action) (struct connection *); /* What to do when active */ @@ -299,6 +300,8 @@ static void lowcomms_write_space(struct sock *sk) static inline void lowcomms_connect_sock(struct connection *con) { + if (test_bit(CF_CLOSE, &con->flags)) + return; if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) queue_work(send_workqueue, &con->swork); } @@ -926,10 +929,8 @@ static void tcp_connect_to_sock(struct connection *con) goto out_err; memset(&saddr, 0, sizeof(saddr)); - if (dlm_nodeid_to_addr(con->nodeid, &saddr)) { - sock_release(sock); + if (dlm_nodeid_to_addr(con->nodeid, &saddr)) goto out_err; - } sock->sk->sk_user_data = con; con->rx_action = receive_from_sock; @@ -1284,7 +1285,6 @@ out: static void send_to_sock(struct connection *con) { int ret = 0; - ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int); const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; struct writequeue_entry *e; int len, offset; @@ -1293,8 +1293,6 @@ static void send_to_sock(struct connection *con) if (con->sock == NULL) goto out_connect; - sendpage = con->sock->ops->sendpage; - spin_lock(&con->writequeue_lock); for (;;) { e = list_entry(con->writequeue.next, struct writequeue_entry, @@ -1309,8 +1307,8 @@ static void send_to_sock(struct connection *con) ret = 0; if (len) { - ret = sendpage(con->sock, e->page, offset, len, - msg_flags); + ret = kernel_sendpage(con->sock, e->page, offset, len, + msg_flags); if (ret == -EAGAIN || ret == 0) { cond_resched(); goto out; @@ -1370,6 +1368,13 @@ int dlm_lowcomms_close(int nodeid) log_print("closing connection to node %d", nodeid); con = nodeid2con(nodeid, 0); if (con) { + clear_bit(CF_CONNECT_PENDING, &con->flags); + clear_bit(CF_WRITE_PENDING, &con->flags); + set_bit(CF_CLOSE, &con->flags); + if (cancel_work_sync(&con->swork)) + log_print("canceled swork for node %d", nodeid); + if (cancel_work_sync(&con->rwork)) + log_print("canceled rwork for node %d", nodeid); clean_one_writequeue(con); close_connection(con, true); } @@ -1395,9 +1400,10 @@ static void process_send_sockets(struct work_struct *work) if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { con->connect_action(con); + set_bit(CF_WRITE_PENDING, &con->flags); } - clear_bit(CF_WRITE_PENDING, &con->flags); - send_to_sock(con); + if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags)) + send_to_sock(con); } diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d33634119e1..451d166bbe9 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -23,6 +23,7 @@ */ #include <linux/time.h> +#include <linux/blkdev.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/writeback.h> @@ -73,7 +74,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) } if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - goto out; + goto flush; /* * The VFS has written the file data. If the inode is unaltered @@ -85,7 +86,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) .nr_to_write = 0, /* sys_fsync did this */ }; ret = sync_inode(inode, &wbc); + goto out; } +flush: + /* + * In case we didn't commit a transaction, we have to flush + * disk caches manually so that data really is on persistent + * storage + */ + if (test_opt(inode->i_sb, BARRIER)) + blkdev_issue_flush(inode->i_sb->s_bdev, NULL); out: return ret; } diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index b49908a167a..cd098a7b77f 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -172,10 +172,21 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) * so before we call here everything must be consistently dirtied against * this transaction. */ -static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) +static int truncate_restart_transaction(handle_t *handle, struct inode *inode) { + int ret; + jbd_debug(2, "restarting handle %p\n", handle); - return ext3_journal_restart(handle, blocks_for_truncate(inode)); + /* + * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle + * At this moment, get_block can be called only for blocks inside + * i_size since page cache has been already dropped and writes are + * blocked by i_mutex. So we can safely drop the truncate_mutex. + */ + mutex_unlock(&EXT3_I(inode)->truncate_mutex); + ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); + mutex_lock(&EXT3_I(inode)->truncate_mutex); + return ret; } /* @@ -2072,7 +2083,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode, ext3_journal_dirty_metadata(handle, bh); } ext3_mark_inode_dirty(handle, inode); - ext3_journal_test_restart(handle, inode); + truncate_restart_transaction(handle, inode); if (bh) { BUFFER_TRACE(bh, "retaking write access"); ext3_journal_get_write_access(handle, bh); @@ -2282,7 +2293,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, return; if (try_to_extend_transaction(handle, inode)) { ext3_mark_inode_dirty(handle, inode); - ext3_journal_test_restart(handle, inode); + truncate_restart_transaction(handle, inode); } ext3_free_blocks(handle, inode, nr, 1); @@ -2892,6 +2903,10 @@ static int ext3_do_update_inode(handle_t *handle, struct buffer_head *bh = iloc->bh; int err = 0, rc, block; +again: + /* we can't allow multiple procs in here at once, its a bit racey */ + lock_buffer(bh); + /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ if (ei->i_state & EXT3_STATE_NEW) @@ -2951,16 +2966,20 @@ static int ext3_do_update_inode(handle_t *handle, /* If this is the first large file * created, add a flag to the superblock. */ + unlock_buffer(bh); err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (err) goto out_brelse; + ext3_update_dynamic_rev(sb); EXT3_SET_RO_COMPAT_FEATURE(sb, EXT3_FEATURE_RO_COMPAT_LARGE_FILE); handle->h_sync = 1; err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + /* get our lock and start over */ + goto again; } } } @@ -2983,6 +3002,7 @@ static int ext3_do_update_inode(handle_t *handle, raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); + unlock_buffer(bh); rc = ext3_journal_dirty_metadata(handle, bh); if (!err) err = rc; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 628235cf44b..8e1e5e19d21 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -35,21 +35,29 @@ int nr_pdflush_threads; /* + * Passed into wb_writeback(), essentially a subset of writeback_control + */ +struct wb_writeback_args { + long nr_pages; + struct super_block *sb; + enum writeback_sync_modes sync_mode; + int for_kupdate; + int range_cyclic; +}; + +/* * Work items for the bdi_writeback threads */ struct bdi_work { - struct list_head list; - struct list_head wait_list; - struct rcu_head rcu_head; + struct list_head list; /* pending work list */ + struct rcu_head rcu_head; /* for RCU free/clear of work */ - unsigned long seen; - atomic_t pending; + unsigned long seen; /* threads that have seen this work */ + atomic_t pending; /* number of threads still to do work */ - struct super_block *sb; - unsigned long nr_pages; - enum writeback_sync_modes sync_mode; + struct wb_writeback_args args; /* writeback arguments */ - unsigned long state; + unsigned long state; /* flag bits, see WS_* */ }; enum { @@ -66,22 +74,13 @@ static inline bool bdi_work_on_stack(struct bdi_work *work) } static inline void bdi_work_init(struct bdi_work *work, - struct writeback_control *wbc) + struct wb_writeback_args *args) { INIT_RCU_HEAD(&work->rcu_head); - work->sb = wbc->sb; - work->nr_pages = wbc->nr_to_write; - work->sync_mode = wbc->sync_mode; + work->args = *args; work->state = WS_USED; } -static inline void bdi_work_init_on_stack(struct bdi_work *work, - struct writeback_control *wbc) -{ - bdi_work_init(work, wbc); - work->state |= WS_ONSTACK; -} - /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -98,6 +97,11 @@ static void bdi_work_clear(struct bdi_work *work) { clear_bit(WS_USED_B, &work->state); smp_mb__after_clear_bit(); + /* + * work can have disappeared at this point. bit waitq functions + * should be able to tolerate this, provided bdi_sched_wait does + * not dereference it's pointer argument. + */ wake_up_bit(&work->state, WS_USED_B); } @@ -113,7 +117,8 @@ static void bdi_work_free(struct rcu_head *head) static void wb_work_complete(struct bdi_work *work) { - const enum writeback_sync_modes sync_mode = work->sync_mode; + const enum writeback_sync_modes sync_mode = work->args.sync_mode; + int onstack = bdi_work_on_stack(work); /* * For allocated work, we can clear the done/seen bit right here. @@ -121,9 +126,9 @@ static void wb_work_complete(struct bdi_work *work) * to after the RCU grace period, since the stack could be invalidated * as soon as bdi_work_clear() has done the wakeup. */ - if (!bdi_work_on_stack(work)) + if (!onstack) bdi_work_clear(work); - if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work)) + if (sync_mode == WB_SYNC_NONE || onstack) call_rcu(&work->rcu_head, bdi_work_free); } @@ -146,21 +151,19 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) { - if (work) { - work->seen = bdi->wb_mask; - BUG_ON(!work->seen); - atomic_set(&work->pending, bdi->wb_cnt); - BUG_ON(!bdi->wb_cnt); - - /* - * Make sure stores are seen before it appears on the list - */ - smp_mb(); + work->seen = bdi->wb_mask; + BUG_ON(!work->seen); + atomic_set(&work->pending, bdi->wb_cnt); + BUG_ON(!bdi->wb_cnt); - spin_lock(&bdi->wb_lock); - list_add_tail_rcu(&work->list, &bdi->work_list); - spin_unlock(&bdi->wb_lock); - } + /* + * list_add_tail_rcu() contains the necessary barriers to + * make sure the above stores are seen before the item is + * noticed on the list + */ + spin_lock(&bdi->wb_lock); + list_add_tail_rcu(&work->list, &bdi->work_list); + spin_unlock(&bdi->wb_lock); /* * If the default thread isn't there, make sure we add it. When @@ -171,15 +174,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) else { struct bdi_writeback *wb = &bdi->wb; - /* - * If we failed allocating the bdi work item, wake up the wb - * thread always. As a safety precaution, it'll flush out - * everything - */ - if (!wb_has_dirty_io(wb)) { - if (work) - wb_clear_pending(wb, work); - } else if (wb->task) + if (wb->task) wake_up_process(wb->task); } } @@ -194,48 +189,75 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) TASK_UNINTERRUPTIBLE); } -static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc) +static void bdi_alloc_queue_work(struct backing_dev_info *bdi, + struct wb_writeback_args *args) { struct bdi_work *work; + /* + * This is WB_SYNC_NONE writeback, so if allocation fails just + * wakeup the thread for old dirty data writeback + */ work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) - bdi_work_init(work, wbc); + if (work) { + bdi_work_init(work, args); + bdi_queue_work(bdi, work); + } else { + struct bdi_writeback *wb = &bdi->wb; - return work; + if (wb->task) + wake_up_process(wb->task); + } } -void bdi_start_writeback(struct writeback_control *wbc) +/** + * bdi_sync_writeback - start and wait for writeback + * @bdi: the backing device to write from + * @sb: write inodes from this super_block + * + * Description: + * This does WB_SYNC_ALL data integrity writeback and waits for the + * IO to complete. Callers must hold the sb s_umount semaphore for + * reading, to avoid having the super disappear before we are done. + */ +static void bdi_sync_writeback(struct backing_dev_info *bdi, + struct super_block *sb) { - const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; - struct bdi_work work_stack, *work = NULL; + struct wb_writeback_args args = { + .sb = sb, + .sync_mode = WB_SYNC_ALL, + .nr_pages = LONG_MAX, + .range_cyclic = 0, + }; + struct bdi_work work; - if (!must_wait) - work = bdi_alloc_work(wbc); + bdi_work_init(&work, &args); + work.state |= WS_ONSTACK; - if (!work) { - work = &work_stack; - bdi_work_init_on_stack(work, wbc); - } + bdi_queue_work(bdi, &work); + bdi_wait_on_work_clear(&work); +} - bdi_queue_work(wbc->bdi, work); +/** + * bdi_start_writeback - start writeback + * @bdi: the backing device to write from + * @nr_pages: the number of pages to write + * + * Description: + * This does WB_SYNC_NONE opportunistic writeback. The IO is only + * started when this function returns, we make no guarentees on + * completion. Caller need not hold sb s_umount semaphore. + * + */ +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) +{ + struct wb_writeback_args args = { + .sync_mode = WB_SYNC_NONE, + .nr_pages = nr_pages, + .range_cyclic = 1, + }; - /* - * If the sync mode is WB_SYNC_ALL, block waiting for the work to - * complete. If not, we only need to wait for the work to be started, - * if we allocated it on-stack. We use the same mechanism, if the - * wait bit is set in the bdi_work struct, then threads will not - * clear pending until after they are done. - * - * Note that work == &work_stack if must_wait is true, so we don't - * need to do call_rcu() here ever, since the completion path will - * have done that for us. - */ - if (must_wait || work == &work_stack) { - bdi_wait_on_work_clear(work); - if (work != &work_stack) - call_rcu(&work->rcu_head, bdi_work_free); - } + bdi_alloc_queue_work(bdi, &args); } /* @@ -671,17 +693,16 @@ static inline bool over_bground_thresh(void) * older_than_this takes precedence over nr_to_write. So we'll only write back * all dirty pages if they are all attached to "old" mappings. */ -static long wb_writeback(struct bdi_writeback *wb, long nr_pages, - struct super_block *sb, - enum writeback_sync_modes sync_mode, int for_kupdate) +static long wb_writeback(struct bdi_writeback *wb, + struct wb_writeback_args *args) { struct writeback_control wbc = { .bdi = wb->bdi, - .sb = sb, - .sync_mode = sync_mode, + .sb = args->sb, + .sync_mode = args->sync_mode, .older_than_this = NULL, - .for_kupdate = for_kupdate, - .range_cyclic = 1, + .for_kupdate = args->for_kupdate, + .range_cyclic = args->range_cyclic, }; unsigned long oldest_jif; long wrote = 0; @@ -691,13 +712,18 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages, oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } + if (!wbc.range_cyclic) { + wbc.range_start = 0; + wbc.range_end = LLONG_MAX; + } for (;;) { /* * Don't flush anything for non-integrity writeback where * no nr_pages was given */ - if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE) + if (!args->for_kupdate && args->nr_pages <= 0 && + args->sync_mode == WB_SYNC_NONE) break; /* @@ -705,7 +731,8 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages, * periodic background writeout and we are below the * background dirty threshold, don't do anything */ - if (for_kupdate && nr_pages <= 0 && !over_bground_thresh()) + if (args->for_kupdate && args->nr_pages <= 0 && + !over_bground_thresh()) break; wbc.more_io = 0; @@ -713,7 +740,7 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages, wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; writeback_inodes_wb(wb, &wbc); - nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; + args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; /* @@ -731,7 +758,11 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages, /* * Return the next bdi_work struct that hasn't been processed by this - * wb thread yet + * wb thread yet. ->seen is initially set for each thread that exists + * for this device, when a thread first notices a piece of work it + * clears its bit. Depending on writeback type, the thread will notify + * completion on either receiving the work (WB_SYNC_NONE) or after + * it is done (WB_SYNC_ALL). */ static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) @@ -741,8 +772,9 @@ static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, rcu_read_lock(); list_for_each_entry_rcu(work, &bdi->work_list, list) { - if (!test_and_clear_bit(wb->nr, &work->seen)) + if (!test_bit(wb->nr, &work->seen)) continue; + clear_bit(wb->nr, &work->seen); ret = work; break; @@ -767,8 +799,16 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) global_page_state(NR_UNSTABLE_NFS) + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - if (nr_pages) - return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1); + if (nr_pages) { + struct wb_writeback_args args = { + .nr_pages = nr_pages, + .sync_mode = WB_SYNC_NONE, + .for_kupdate = 1, + .range_cyclic = 1, + }; + + return wb_writeback(wb, &args); + } return 0; } @@ -780,35 +820,31 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) { struct backing_dev_info *bdi = wb->bdi; struct bdi_work *work; - long nr_pages, wrote = 0; + long wrote = 0; while ((work = get_next_work_item(bdi, wb)) != NULL) { - enum writeback_sync_modes sync_mode; - - nr_pages = work->nr_pages; + struct wb_writeback_args args = work->args; /* * Override sync mode, in case we must wait for completion */ if (force_wait) - work->sync_mode = sync_mode = WB_SYNC_ALL; - else - sync_mode = work->sync_mode; + work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; /* * If this isn't a data integrity operation, just notify * that we have seen this work and we are now starting it. */ - if (sync_mode == WB_SYNC_NONE) + if (args.sync_mode == WB_SYNC_NONE) wb_clear_pending(wb, work); - wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0); + wrote += wb_writeback(wb, &args); /* * This is a data integrity writeback, so only do the * notification when we have completed the work. */ - if (sync_mode == WB_SYNC_ALL) + if (args.sync_mode == WB_SYNC_ALL) wb_clear_pending(wb, work); } @@ -849,8 +885,7 @@ int bdi_writeback_task(struct bdi_writeback *wb) } wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(wait_jiffies); + schedule_timeout_interruptible(wait_jiffies); try_to_freeze(); } @@ -858,67 +893,28 @@ int bdi_writeback_task(struct bdi_writeback *wb) } /* - * Schedule writeback for all backing devices. Expensive! If this is a data - * integrity operation, writeback will be complete when this returns. If - * we are simply called for WB_SYNC_NONE, then writeback will merely be - * scheduled to run. + * Schedule writeback for all backing devices. This does WB_SYNC_NONE + * writeback, for integrity writeback see bdi_sync_writeback(). */ -static void bdi_writeback_all(struct writeback_control *wbc) +static void bdi_writeback_all(struct super_block *sb, long nr_pages) { - const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; + struct wb_writeback_args args = { + .sb = sb, + .nr_pages = nr_pages, + .sync_mode = WB_SYNC_NONE, + }; struct backing_dev_info *bdi; - struct bdi_work *work; - LIST_HEAD(list); - -restart: - spin_lock(&bdi_lock); - list_for_each_entry(bdi, &bdi_list, bdi_list) { - struct bdi_work *work; + rcu_read_lock(); + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - /* - * If work allocation fails, do the writes inline. We drop - * the lock and restart the list writeout. This should be OK, - * since this happens rarely and because the writeout should - * eventually make more free memory available. - */ - work = bdi_alloc_work(wbc); - if (!work) { - struct writeback_control __wbc; - - /* - * Not a data integrity writeout, just continue - */ - if (!must_wait) - continue; - - spin_unlock(&bdi_lock); - __wbc = *wbc; - __wbc.bdi = bdi; - writeback_inodes_wbc(&__wbc); - goto restart; - } - if (must_wait) - list_add_tail(&work->wait_list, &list); - - bdi_queue_work(bdi, work); + bdi_alloc_queue_work(bdi, &args); } - spin_unlock(&bdi_lock); - - /* - * If this is for WB_SYNC_ALL, wait for pending work to complete - * before returning. - */ - while (!list_empty(&list)) { - work = list_entry(list.next, struct bdi_work, wait_list); - list_del(&work->wait_list); - bdi_wait_on_work_clear(work); - call_rcu(&work->rcu_head, bdi_work_free); - } + rcu_read_unlock(); } /* @@ -927,17 +923,10 @@ restart: */ void wakeup_flusher_threads(long nr_pages) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .range_cyclic = 1, - }; - if (nr_pages == 0) nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); - wbc.nr_to_write = nr_pages; - bdi_writeback_all(&wbc); + bdi_writeback_all(NULL, nr_pages); } static noinline void block_dump___mark_inode_dirty(struct inode *inode) @@ -1084,7 +1073,7 @@ EXPORT_SYMBOL(__mark_inode_dirty); * on the writer throttling path, and we get decent balancing between many * throttled threads: we don't want them all piling up on inode_sync_wait. */ -static void wait_sb_inodes(struct writeback_control *wbc) +static void wait_sb_inodes(struct super_block *sb) { struct inode *inode, *old_inode = NULL; @@ -1092,7 +1081,7 @@ static void wait_sb_inodes(struct writeback_control *wbc) * We need to be protected against the filesystem going from * r/o to r/w or vice versa. */ - WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount)); + WARN_ON(!rwsem_is_locked(&sb->s_umount)); spin_lock(&inode_lock); @@ -1103,7 +1092,7 @@ static void wait_sb_inodes(struct writeback_control *wbc) * In which case, the inode may not be on the dirty list, but * we still have to wait for that writeout. */ - list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) { + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { struct address_space *mapping; if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) @@ -1143,14 +1132,8 @@ static void wait_sb_inodes(struct writeback_control *wbc) * for IO completion of submitted IO. The number of pages submitted is * returned. */ -long writeback_inodes_sb(struct super_block *sb) +void writeback_inodes_sb(struct super_block *sb) { - struct writeback_control wbc = { - .sb = sb, - .sync_mode = WB_SYNC_NONE, - .range_start = 0, - .range_end = LLONG_MAX, - }; unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); long nr_to_write; @@ -1158,9 +1141,7 @@ long writeback_inodes_sb(struct super_block *sb) nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - wbc.nr_to_write = nr_to_write; - bdi_writeback_all(&wbc); - return nr_to_write - wbc.nr_to_write; + bdi_writeback_all(sb, nr_to_write); } EXPORT_SYMBOL(writeback_inodes_sb); @@ -1171,20 +1152,10 @@ EXPORT_SYMBOL(writeback_inodes_sb); * This function writes and waits on any dirty inode belonging to this * super_block. The number of pages synced is returned. */ -long sync_inodes_sb(struct super_block *sb) +void sync_inodes_sb(struct super_block *sb) { - struct writeback_control wbc = { - .sb = sb, - .sync_mode = WB_SYNC_ALL, - .range_start = 0, - .range_end = LLONG_MAX, - }; - long nr_to_write = LONG_MAX; /* doesn't actually matter */ - - wbc.nr_to_write = nr_to_write; - bdi_writeback_all(&wbc); - wait_sb_inodes(&wbc); - return nr_to_write - wbc.nr_to_write; + bdi_sync_writeback(sb->s_bdi, sb); + wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 99c99dfb037..3773fd63d2f 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -61,6 +61,121 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, return simple_read_from_buffer(buf, len, ppos, tmp, size); } +static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos, unsigned val) +{ + char tmp[32]; + size_t size = sprintf(tmp, "%u\n", val); + + return simple_read_from_buffer(buf, len, ppos, tmp, size); +} + +static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, unsigned *val, + unsigned global_limit) +{ + unsigned long t; + char tmp[32]; + unsigned limit = (1 << 16) - 1; + int err; + + if (*ppos || count >= sizeof(tmp) - 1) + return -EINVAL; + + if (copy_from_user(tmp, buf, count)) + return -EINVAL; + + tmp[count] = '\0'; + + err = strict_strtoul(tmp, 0, &t); + if (err) + return err; + + if (!capable(CAP_SYS_ADMIN)) + limit = min(limit, global_limit); + + if (t > limit) + return -EINVAL; + + *val = t; + + return count; +} + +static ssize_t fuse_conn_max_background_read(struct file *file, + char __user *buf, size_t len, + loff_t *ppos) +{ + struct fuse_conn *fc; + unsigned val; + + fc = fuse_ctl_file_conn_get(file); + if (!fc) + return 0; + + val = fc->max_background; + fuse_conn_put(fc); + + return fuse_conn_limit_read(file, buf, len, ppos, val); +} + +static ssize_t fuse_conn_max_background_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned val; + ssize_t ret; + + ret = fuse_conn_limit_write(file, buf, count, ppos, &val, + max_user_bgreq); + if (ret > 0) { + struct fuse_conn *fc = fuse_ctl_file_conn_get(file); + if (fc) { + fc->max_background = val; + fuse_conn_put(fc); + } + } + + return ret; +} + +static ssize_t fuse_conn_congestion_threshold_read(struct file *file, + char __user *buf, size_t len, + loff_t *ppos) +{ + struct fuse_conn *fc; + unsigned val; + + fc = fuse_ctl_file_conn_get(file); + if (!fc) + return 0; + + val = fc->congestion_threshold; + fuse_conn_put(fc); + + return fuse_conn_limit_read(file, buf, len, ppos, val); +} + +static ssize_t fuse_conn_congestion_threshold_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned val; + ssize_t ret; + + ret = fuse_conn_limit_write(file, buf, count, ppos, &val, + max_user_congthresh); + if (ret > 0) { + struct fuse_conn *fc = fuse_ctl_file_conn_get(file); + if (fc) { + fc->congestion_threshold = val; + fuse_conn_put(fc); + } + } + + return ret; +} + static const struct file_operations fuse_ctl_abort_ops = { .open = nonseekable_open, .write = fuse_conn_abort_write, @@ -71,6 +186,18 @@ static const struct file_operations fuse_ctl_waiting_ops = { .read = fuse_conn_waiting_read, }; +static const struct file_operations fuse_conn_max_background_ops = { + .open = nonseekable_open, + .read = fuse_conn_max_background_read, + .write = fuse_conn_max_background_write, +}; + +static const struct file_operations fuse_conn_congestion_threshold_ops = { + .open = nonseekable_open, + .read = fuse_conn_congestion_threshold_read, + .write = fuse_conn_congestion_threshold_write, +}; + static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, struct fuse_conn *fc, const char *name, @@ -127,9 +254,14 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) goto err; if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, - NULL, &fuse_ctl_waiting_ops) || + NULL, &fuse_ctl_waiting_ops) || !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, - NULL, &fuse_ctl_abort_ops)) + NULL, &fuse_ctl_abort_ops) || + !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600, + 1, NULL, &fuse_conn_max_background_ops) || + !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", + S_IFREG | 0600, 1, NULL, + &fuse_conn_congestion_threshold_ops)) goto err; return 0; @@ -156,7 +288,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) d_drop(dentry); dput(dentry); } - fuse_control_sb->s_root->d_inode->i_nlink--; + drop_nlink(fuse_control_sb->s_root->d_inode); } static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6484eb75acd..51d9e33d634 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) static void flush_bg_queue(struct fuse_conn *fc) { - while (fc->active_background < FUSE_MAX_BACKGROUND && + while (fc->active_background < fc->max_background && !list_empty(&fc->bg_queue)) { struct fuse_req *req; @@ -280,11 +280,11 @@ __releases(&fc->lock) list_del(&req->intr_entry); req->state = FUSE_REQ_FINISHED; if (req->background) { - if (fc->num_background == FUSE_MAX_BACKGROUND) { + if (fc->num_background == fc->max_background) { fc->blocked = 0; wake_up_all(&fc->blocked_waitq); } - if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + if (fc->num_background == fc->congestion_threshold && fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); @@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, { req->background = 1; fc->num_background++; - if (fc->num_background == FUSE_MAX_BACKGROUND) + if (fc->num_background == fc->max_background) fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + if (fc->num_background == fc->congestion_threshold && fc->bdi_initialized) { set_bdi_congested(&fc->bdi, BLK_RW_SYNC); set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 52b641fc0fa..fc9c79feb5f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -25,12 +25,6 @@ /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 -/** Maximum number of outstanding background requests */ -#define FUSE_MAX_BACKGROUND 12 - -/** Congestion starts at 75% of maximum */ -#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) - /** Bias for fi->writectr, meaning new writepages must not be sent */ #define FUSE_NOWRITE INT_MIN @@ -38,7 +32,7 @@ #define FUSE_NAME_MAX 1024 /** Number of dentries for each connection in the control filesystem */ -#define FUSE_CTL_NUM_DENTRIES 3 +#define FUSE_CTL_NUM_DENTRIES 5 /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem module will check permissions based on the file mode. Otherwise no @@ -55,6 +49,10 @@ extern struct list_head fuse_conn_list; /** Global mutex protecting fuse_conn_list and the control filesystem */ extern struct mutex fuse_mutex; +/** Module parameters */ +extern unsigned max_user_bgreq; +extern unsigned max_user_congthresh; + /** FUSE inode */ struct fuse_inode { /** Inode data */ @@ -349,6 +347,12 @@ struct fuse_conn { /** rbtree of fuse_files waiting for poll events indexed by ph */ struct rb_root polled_files; + /** Maximum number of outstanding background requests */ + unsigned max_background; + + /** Number of background requests at which congestion starts */ + unsigned congestion_threshold; + /** Number of requests currently in the background */ unsigned num_background; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4567db6f943..6da947daabd 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -14,6 +14,7 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/parser.h> #include <linux/statfs.h> #include <linux/random.h> @@ -28,10 +29,34 @@ static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); +static int set_global_limit(const char *val, struct kernel_param *kp); + +unsigned max_user_bgreq; +module_param_call(max_user_bgreq, set_global_limit, param_get_uint, + &max_user_bgreq, 0644); +__MODULE_PARM_TYPE(max_user_bgreq, "uint"); +MODULE_PARM_DESC(max_user_bgreq, + "Global limit for the maximum number of backgrounded requests an " + "unprivileged user can set"); + +unsigned max_user_congthresh; +module_param_call(max_user_congthresh, set_global_limit, param_get_uint, + &max_user_congthresh, 0644); +__MODULE_PARM_TYPE(max_user_congthresh, "uint"); +MODULE_PARM_DESC(max_user_congthresh, + "Global limit for the maximum congestion threshold an " + "unprivileged user can set"); + #define FUSE_SUPER_MAGIC 0x65735546 #define FUSE_DEFAULT_BLKSIZE 512 +/** Maximum number of outstanding background requests */ +#define FUSE_DEFAULT_MAX_BACKGROUND 12 + +/** Congestion starts at 75% of maximum */ +#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) + struct fuse_mount_data { int fd; unsigned rootmode; @@ -517,6 +542,8 @@ void fuse_conn_init(struct fuse_conn *fc) INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); atomic_set(&fc->num_waiting, 0); + fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; + fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; fc->khctr = 0; fc->polled_files = RB_ROOT; fc->reqctr = 0; @@ -727,6 +754,54 @@ static const struct super_operations fuse_super_operations = { .show_options = fuse_show_options, }; +static void sanitize_global_limit(unsigned *limit) +{ + if (*limit == 0) + *limit = ((num_physpages << PAGE_SHIFT) >> 13) / + sizeof(struct fuse_req); + + if (*limit >= 1 << 16) + *limit = (1 << 16) - 1; +} + +static int set_global_limit(const char *val, struct kernel_param *kp) +{ + int rv; + + rv = param_set_uint(val, kp); + if (rv) + return rv; + + sanitize_global_limit((unsigned *)kp->arg); + + return 0; +} + +static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) +{ + int cap_sys_admin = capable(CAP_SYS_ADMIN); + + if (arg->minor < 13) + return; + + sanitize_global_limit(&max_user_bgreq); + sanitize_global_limit(&max_user_congthresh); + + if (arg->max_background) { + fc->max_background = arg->max_background; + + if (!cap_sys_admin && fc->max_background > max_user_bgreq) + fc->max_background = max_user_bgreq; + } + if (arg->congestion_threshold) { + fc->congestion_threshold = arg->congestion_threshold; + + if (!cap_sys_admin && + fc->congestion_threshold > max_user_congthresh) + fc->congestion_threshold = max_user_congthresh; + } +} + static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_init_out *arg = &req->misc.init_out; @@ -736,6 +811,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) else { unsigned long ra_pages; + process_init_limits(fc, arg); + if (arg->minor >= 6) { ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; if (arg->flags & FUSE_ASYNC_READ) @@ -894,6 +971,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_put_conn; + sb->s_bdi = &fc->bdi; + /* Handle umasking inside the fuse code */ if (sb->s_flags & MS_POSIXACL) fc->dont_mask = 1; @@ -1148,6 +1227,9 @@ static int __init fuse_init(void) if (res) goto err_sysfs_cleanup; + sanitize_global_limit(&max_user_bgreq); + sanitize_global_limit(&max_user_congthresh); + return 0; err_sysfs_cleanup: diff --git a/fs/inode.c b/fs/inode.c index ae7b67e4866..b2ba83d2c4e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -182,9 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) if (sb->s_bdev) { struct backing_dev_info *bdi; - bdi = sb->s_bdev->bd_inode_backing_dev_info; - if (!bdi) - bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; mapping->backing_dev_info = bdi; } inode->i_private = NULL; diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 61f32f3868c..b0435dd0654 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -456,7 +456,7 @@ int cleanup_journal_tail(journal_t *journal) { transaction_t * transaction; tid_t first_tid; - unsigned long blocknr, freed; + unsigned int blocknr, freed; if (is_journal_aborted(journal)) return 1; @@ -502,8 +502,8 @@ int cleanup_journal_tail(journal_t *journal) freed = freed + journal->j_last - journal->j_first; jbd_debug(1, - "Cleaning journal tail from %d to %d (offset %lu), " - "freeing %lu\n", + "Cleaning journal tail from %d to %d (offset %u), " + "freeing %u\n", journal->j_tail_sequence, first_tid, blocknr, freed); journal->j_free += freed; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 618e21c0b7a..4bd882548c4 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -308,7 +308,7 @@ void journal_commit_transaction(journal_t *journal) int bufs; int flags; int err; - unsigned long blocknr; + unsigned int blocknr; ktime_t start_time; u64 commit_time; char *tagp = NULL; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f96f85092d1..bd3c073b485 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -276,7 +276,7 @@ static void journal_kill_thread(journal_t *journal) int journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, - unsigned long blocknr) + unsigned int blocknr) { int need_copy_out = 0; int done_copy_out = 0; @@ -567,9 +567,9 @@ int log_wait_commit(journal_t *journal, tid_t tid) * Log buffer allocation routines: */ -int journal_next_log_block(journal_t *journal, unsigned long *retp) +int journal_next_log_block(journal_t *journal, unsigned int *retp) { - unsigned long blocknr; + unsigned int blocknr; spin_lock(&journal->j_state_lock); J_ASSERT(journal->j_free > 1); @@ -590,11 +590,11 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp) * this is a no-op. If needed, we can use j_blk_offset - everything is * ready. */ -int journal_bmap(journal_t *journal, unsigned long blocknr, - unsigned long *retp) +int journal_bmap(journal_t *journal, unsigned int blocknr, + unsigned int *retp) { int err = 0; - unsigned long ret; + unsigned int ret; if (journal->j_inode) { ret = bmap(journal->j_inode, blocknr); @@ -604,7 +604,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, char b[BDEVNAME_SIZE]; printk(KERN_ALERT "%s: journal block not found " - "at offset %lu on %s\n", + "at offset %u on %s\n", __func__, blocknr, bdevname(journal->j_dev, b)); @@ -630,7 +630,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, struct journal_head *journal_get_descriptor_buffer(journal_t *journal) { struct buffer_head *bh; - unsigned long blocknr; + unsigned int blocknr; int err; err = journal_next_log_block(journal, &blocknr); @@ -774,7 +774,7 @@ journal_t * journal_init_inode (struct inode *inode) journal_t *journal = journal_init_common(); int err; int n; - unsigned long blocknr; + unsigned int blocknr; if (!journal) return NULL; @@ -846,12 +846,12 @@ static void journal_fail_superblock (journal_t *journal) static int journal_reset(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; - unsigned long first, last; + unsigned int first, last; first = be32_to_cpu(sb->s_first); last = be32_to_cpu(sb->s_maxlen); if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) { - printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n", + printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n", first, last); journal_fail_superblock(journal); return -EINVAL; @@ -885,7 +885,7 @@ static int journal_reset(journal_t *journal) **/ int journal_create(journal_t *journal) { - unsigned long blocknr; + unsigned int blocknr; struct buffer_head *bh; journal_superblock_t *sb; int i, err; @@ -969,14 +969,14 @@ void journal_update_superblock(journal_t *journal, int wait) if (sb->s_start == 0 && journal->j_tail_sequence == journal->j_transaction_sequence) { jbd_debug(1,"JBD: Skipping superblock update on recovered sb " - "(start %ld, seq %d, errno %d)\n", + "(start %u, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); goto out; } spin_lock(&journal->j_state_lock); - jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", + jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); @@ -1371,7 +1371,7 @@ int journal_flush(journal_t *journal) { int err = 0; transaction_t *transaction = NULL; - unsigned long old_tail; + unsigned int old_tail; spin_lock(&journal->j_state_lock); diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index db5e982c5dd..cb1a49ae605 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start) { int err; unsigned int max, nbufs, next; - unsigned long blocknr; + unsigned int blocknr; struct buffer_head *bh; struct buffer_head * bufs[MAXBUF]; @@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, unsigned int offset) { int err; - unsigned long blocknr; + unsigned int blocknr; struct buffer_head *bh; *bhp = NULL; @@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass) { unsigned int first_commit_ID, next_commit_ID; - unsigned long next_log_block; + unsigned int next_log_block; int err, success = 0; journal_superblock_t * sb; journal_header_t * tmp; @@ -367,14 +367,14 @@ static int do_one_pass(journal_t *journal, if (tid_geq(next_commit_ID, info->end_transaction)) break; - jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", + jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n", next_commit_ID, next_log_block, journal->j_last); /* Skip over each chunk of the transaction looking * either the next descriptor block or the final commit * record. */ - jbd_debug(3, "JBD: checking block %ld\n", next_log_block); + jbd_debug(3, "JBD: checking block %u\n", next_log_block); err = jread(&bh, journal, next_log_block); if (err) goto failed; @@ -429,7 +429,7 @@ static int do_one_pass(journal_t *journal, tagp = &bh->b_data[sizeof(journal_header_t)]; while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) <= journal->j_blocksize) { - unsigned long io_block; + unsigned int io_block; tag = (journal_block_tag_t *) tagp; flags = be32_to_cpu(tag->t_flags); @@ -443,10 +443,10 @@ static int do_one_pass(journal_t *journal, success = err; printk (KERN_ERR "JBD: IO error %d recovering " - "block %ld in log\n", + "block %u in log\n", err, io_block); } else { - unsigned long blocknr; + unsigned int blocknr; J_ASSERT(obh != NULL); blocknr = be32_to_cpu(tag->t_blocknr); @@ -581,7 +581,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, max = be32_to_cpu(header->r_count); while (offset < max) { - unsigned long blocknr; + unsigned int blocknr; int err; blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index da6cd9bdaab..ad717328343 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -101,7 +101,7 @@ struct jbd_revoke_record_s { struct list_head hash; tid_t sequence; /* Used for recovery only */ - unsigned long blocknr; + unsigned int blocknr; }; @@ -126,7 +126,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int); /* Utility functions to maintain the revoke table */ /* Borrowed from buffer.c: this is a tried and tested block hash function */ -static inline int hash(journal_t *journal, unsigned long block) +static inline int hash(journal_t *journal, unsigned int block) { struct jbd_revoke_table_s *table = journal->j_revoke; int hash_shift = table->hash_shift; @@ -136,7 +136,7 @@ static inline int hash(journal_t *journal, unsigned long block) (block << (hash_shift - 12))) & (table->hash_size - 1); } -static int insert_revoke_hash(journal_t *journal, unsigned long blocknr, +static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, tid_t seq) { struct list_head *hash_list; @@ -166,7 +166,7 @@ oom: /* Find a revoke record in the journal's hash table. */ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, - unsigned long blocknr) + unsigned int blocknr) { struct list_head *hash_list; struct jbd_revoke_record_s *record; @@ -332,7 +332,7 @@ void journal_destroy_revoke(journal_t *journal) * by one. */ -int journal_revoke(handle_t *handle, unsigned long blocknr, +int journal_revoke(handle_t *handle, unsigned int blocknr, struct buffer_head *bh_in) { struct buffer_head *bh = NULL; @@ -401,7 +401,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, } } - jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in); + jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in); err = insert_revoke_hash(journal, blocknr, handle->h_transaction->t_tid); BUFFER_TRACE(bh_in, "exit"); @@ -644,7 +644,7 @@ static void flush_descriptor(journal_t *journal, */ int journal_set_revoke(journal_t *journal, - unsigned long blocknr, + unsigned int blocknr, tid_t sequence) { struct jbd_revoke_record_s *record; @@ -668,7 +668,7 @@ int journal_set_revoke(journal_t *journal, */ int journal_test_revoke(journal_t *journal, - unsigned long blocknr, + unsigned int blocknr, tid_t sequence) { struct jbd_revoke_record_s *record; diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index c03ac11f74b..006f9ad838a 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -56,7 +56,8 @@ get_transaction(journal_t *journal, transaction_t *transaction) spin_lock_init(&transaction->t_handle_lock); /* Set up the commit timer for the new transaction. */ - journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); + journal->j_commit_timer.expires = + round_jiffies_up(transaction->t_expires); add_timer(&journal->j_commit_timer); J_ASSERT(journal->j_running_transaction == NULL); @@ -228,6 +229,8 @@ repeat_locked: __log_space_left(journal)); spin_unlock(&transaction->t_handle_lock); spin_unlock(&journal->j_state_lock); + + lock_map_acquire(&handle->h_lockdep_map); out: if (unlikely(new_transaction)) /* It's usually NULL */ kfree(new_transaction); @@ -292,9 +295,6 @@ handle_t *journal_start(journal_t *journal, int nblocks) handle = ERR_PTR(err); goto out; } - - lock_map_acquire(&handle->h_lockdep_map); - out: return handle; } @@ -416,6 +416,7 @@ int journal_restart(handle_t *handle, int nblocks) __log_start_commit(journal, transaction->t_tid); spin_unlock(&journal->j_state_lock); + lock_map_release(&handle->h_lockdep_map); handle->h_buffer_credits = nblocks; ret = start_this_handle(journal, handle); return ret; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7b4088b2364..0df600e9162 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -220,7 +220,6 @@ static int journal_submit_inode_data_buffers(struct address_space *mapping) .nr_to_write = mapping->nrpages * 2, .range_start = 0, .range_end = i_size_read(mapping->host), - .for_writepages = 1, }; ret = generic_writepages(mapping, &wbc); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 867f7050453..de935692d40 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1918,6 +1918,8 @@ static inline void nfs_initialise_sb(struct super_block *sb) if (server->flags & NFS_MOUNT_NOAC) sb->s_flags |= MS_SYNCHRONOUS; + sb->s_bdi = &server->backing_dev_info; + nfs_super_set_maxbytes(sb, server->maxfilesize); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 120acadc6a8..53eb26c16b5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1490,7 +1490,6 @@ static int nfs_write_mapping(struct address_space *mapping, int how) .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, - .for_writepages = 1, }; return __nfs_write_mapping(mapping, &wbc, how); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index d4168e269c5..ad391a8c3e7 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -591,9 +591,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); - bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; - if (!bdi) - bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; + bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; nilfs->ns_bdi = bdi ? : &default_backing_dev_info; /* Finding last segment */ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 619ba99dfe3..fbeaddf595d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -312,7 +312,7 @@ static struct attribute_group part_attr_group = { .attrs = part_attrs, }; -static struct attribute_group *part_attr_groups[] = { +static const struct attribute_group *part_attr_groups[] = { &part_attr_group, #ifdef CONFIG_BLK_DEV_IO_TRACE &blk_trace_attr_group, diff --git a/fs/super.c b/fs/super.c index 9cda337ddae..b03fea8fbfb 100644 --- a/fs/super.c +++ b/fs/super.c @@ -707,6 +707,12 @@ static int set_bdev_super(struct super_block *s, void *data) { s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; + + /* + * We set the bdi here to the queue backing, file systems can + * overwrite this in ->fill_super() + */ + s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/sync.c b/fs/sync.c index 192340930bb..c08467a5d7c 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -27,6 +27,13 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { + /* + * This should be safe, as we require bdi backing to actually + * write out data in the first place + */ + if (!sb->s_bdi) + return 0; + /* Avoid doing twice syncing and cache pruning for quota sync */ if (!wait) { writeout_quota_sb(sb, -1); @@ -101,7 +108,7 @@ restart: spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (!(sb->s_flags & MS_RDONLY) && sb->s_root) + if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi) __sync_filesystem(sb, wait); up_read(&sb->s_umount); diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 1c8991b0db1..ee1ce68fd98 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -54,29 +54,15 @@ * @nr_to_write: how many dirty pages to write-back * * This function shrinks UBIFS liability by means of writing back some amount - * of dirty inodes and their pages. Returns the amount of pages which were - * written back. The returned value does not include dirty inodes which were - * synchronized. + * of dirty inodes and their pages. * * Note, this function synchronizes even VFS inodes which are locked * (@i_mutex) by the caller of the budgeting function, because write-back does * not touch @i_mutex. */ -static int shrink_liability(struct ubifs_info *c, int nr_to_write) +static void shrink_liability(struct ubifs_info *c, int nr_to_write) { - int nr_written; - - nr_written = writeback_inodes_sb(c->vfs_sb); - if (!nr_written) { - /* - * Re-try again but wait on pages/inodes which are being - * written-back concurrently (e.g., by pdflush). - */ - nr_written = sync_inodes_sb(c->vfs_sb); - } - - dbg_budg("%d pages were written back", nr_written); - return nr_written; + writeback_inodes_sb(c->vfs_sb); } /** diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 51763aa8f4d..c4af069df1a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1980,6 +1980,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_bdi; + sb->s_bdi = &c->bdi; sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; sb->s_blocksize = UBIFS_BLOCK_SIZE; diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index aecf2519db7..d5e5559e31d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -216,7 +216,6 @@ xfs_setfilesize( if (ip->i_d.di_size < isize) { ip->i_d.di_size = isize; ip->i_update_core = 1; - ip->i_update_size = 1; xfs_mark_inode_dirty_sync(ip); } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 0542fd50764..988d8f87bc0 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -172,12 +172,21 @@ xfs_file_release( */ STATIC int xfs_file_fsync( - struct file *filp, - struct dentry *dentry, - int datasync) + struct file *file, + struct dentry *dentry, + int datasync) { - xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); - return -xfs_fsync(XFS_I(dentry->d_inode)); + struct inode *inode = dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + int error; + + /* capture size updates in I/O completion before writing the inode. */ + error = filemap_fdatawait(inode->i_mapping); + if (error) + return error; + + xfs_iflags_clear(ip, XFS_ITRUNCATED); + return -xfs_fsync(ip); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 6c32f1d63d8..da0159d99f8 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -43,7 +43,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index fde63a3c4ec..49e4a6aea73 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -812,19 +812,21 @@ write_retry: /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + loff_t end = pos + ret - 1; int error2; xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); - error2 = filemap_write_and_wait_range(mapping, pos, - pos + ret - 1); + + error2 = filemap_write_and_wait_range(mapping, pos, end); if (!error) error = error2; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(xip, iolock); - error2 = xfs_write_sync_logforce(mp, xip); + + error2 = xfs_fsync(xip); if (!error) error = error2; } diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index c3526d445f6..76fdc586193 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -20,16 +20,9 @@ DEFINE_PER_CPU(struct xfsstats, xfsstats); -STATIC int -xfs_read_xfsstats( - char *buffer, - char **start, - off_t offset, - int count, - int *eof, - void *data) +static int xfs_stat_proc_show(struct seq_file *m, void *v) { - int c, i, j, len, val; + int c, i, j, val; __uint64_t xs_xstrat_bytes = 0; __uint64_t xs_write_bytes = 0; __uint64_t xs_read_bytes = 0; @@ -60,18 +53,18 @@ xfs_read_xfsstats( }; /* Loop over all stats groups */ - for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { - len += sprintf(buffer + len, "%s", xstats[i].desc); + for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { + seq_printf(m, "%s", xstats[i].desc); /* inner loop does each group */ while (j < xstats[i].endpoint) { val = 0; /* sum over all cpus */ for_each_possible_cpu(c) val += *(((__u32*)&per_cpu(xfsstats, c) + j)); - len += sprintf(buffer + len, " %u", val); + seq_printf(m, " %u", val); j++; } - buffer[len++] = '\n'; + seq_putc(m, '\n'); } /* extra precision counters */ for_each_possible_cpu(i) { @@ -80,36 +73,38 @@ xfs_read_xfsstats( xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; } - len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n", + seq_printf(m, "xpc %Lu %Lu %Lu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); - len += sprintf(buffer + len, "debug %u\n", + seq_printf(m, "debug %u\n", #if defined(DEBUG) 1); #else 0); #endif + return 0; +} - if (offset >= len) { - *start = buffer; - *eof = 1; - return 0; - } - *start = buffer + offset; - if ((len -= offset) > count) - return count; - *eof = 1; - - return len; +static int xfs_stat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xfs_stat_proc_show, NULL); } +static const struct file_operations xfs_stat_proc_fops = { + .owner = THIS_MODULE, + .open = xfs_stat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + int xfs_init_procfs(void) { if (!proc_mkdir("fs/xfs", NULL)) goto out; - if (!create_proc_read_entry("fs/xfs/stat", 0, NULL, - xfs_read_xfsstats, NULL)) + if (!proc_create("fs/xfs/stat", 0, NULL, + &xfs_stat_proc_fops)) goto out_remove_entry; return 0; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a220d36f789..5d7c60ac77b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -579,15 +579,19 @@ xfs_showargs( else if (mp->m_qflags & XFS_UQUOTA_ACCT) seq_puts(m, "," MNTOPT_UQUOTANOENF); - if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) - seq_puts(m, "," MNTOPT_PRJQUOTA); - else if (mp->m_qflags & XFS_PQUOTA_ACCT) - seq_puts(m, "," MNTOPT_PQUOTANOENF); - - if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD)) - seq_puts(m, "," MNTOPT_GRPQUOTA); - else if (mp->m_qflags & XFS_GQUOTA_ACCT) - seq_puts(m, "," MNTOPT_GQUOTANOENF); + /* Either project or group quotas can be active, not both */ + + if (mp->m_qflags & XFS_PQUOTA_ACCT) { + if (mp->m_qflags & XFS_OQUOTA_ENFD) + seq_puts(m, "," MNTOPT_PRJQUOTA); + else + seq_puts(m, "," MNTOPT_PQUOTANOENF); + } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { + if (mp->m_qflags & XFS_OQUOTA_ENFD) + seq_puts(m, "," MNTOPT_GRPQUOTA); + else + seq_puts(m, "," MNTOPT_GQUOTANOENF); + } if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, "," MNTOPT_NOQUOTA); @@ -687,7 +691,7 @@ xfs_barrier_test( return error; } -void +STATIC void xfs_mountfs_check_barriers(xfs_mount_t *mp) { int error; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 98ef624d9ba..320be6aea49 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -749,21 +749,6 @@ __xfs_inode_clear_reclaim_tag( XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); } -void -xfs_inode_clear_reclaim_tag( - xfs_inode_t *ip) -{ - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); - - read_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - __xfs_inode_clear_reclaim_tag(mp, pag, ip); - spin_unlock(&ip->i_flags_lock); - read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); -} - STATIC int xfs_reclaim_inode_now( struct xfs_inode *ip, diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 59120602588..27920eb7a82 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -49,7 +49,6 @@ int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); -void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 21b08c0396a..83e7ea3e25f 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c @@ -48,50 +48,34 @@ struct xqmstats xqmstats; -STATIC int -xfs_qm_read_xfsquota( - char *buffer, - char **start, - off_t offset, - int count, - int *eof, - void *data) +static int xqm_proc_show(struct seq_file *m, void *v) { - int len; - /* maximum; incore; ratio free to inuse; freelist */ - len = sprintf(buffer, "%d\t%d\t%d\t%u\n", + seq_printf(m, "%d\t%d\t%d\t%u\n", ndquot, xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); - - if (offset >= len) { - *start = buffer; - *eof = 1; - return 0; - } - *start = buffer + offset; - if ((len -= offset) > count) - return count; - *eof = 1; - - return len; + return 0; } -STATIC int -xfs_qm_read_stats( - char *buffer, - char **start, - off_t offset, - int count, - int *eof, - void *data) +static int xqm_proc_open(struct inode *inode, struct file *file) { - int len; + return single_open(file, xqm_proc_show, NULL); +} + +static const struct file_operations xqm_proc_fops = { + .owner = THIS_MODULE, + .open = xqm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +static int xqmstat_proc_show(struct seq_file *m, void *v) +{ /* quota performance statistics */ - len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n", + seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", xqmstats.xs_qm_dqreclaims, xqmstats.xs_qm_dqreclaim_misses, xqmstats.xs_qm_dquot_dups, @@ -100,25 +84,27 @@ xfs_qm_read_stats( xqmstats.xs_qm_dqwants, xqmstats.xs_qm_dqshake_reclaims, xqmstats.xs_qm_dqinact_reclaims); + return 0; +} - if (offset >= len) { - *start = buffer; - *eof = 1; - return 0; - } - *start = buffer + offset; - if ((len -= offset) > count) - return count; - *eof = 1; - - return len; +static int xqmstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xqmstat_proc_show, NULL); } +static const struct file_operations xqmstat_proc_fops = { + .owner = THIS_MODULE, + .open = xqmstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + void xfs_qm_init_procfs(void) { - create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL); - create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL); + proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); + proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); } void diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index f24b50b68d0..a5d54bf4931 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -198,6 +198,15 @@ typedef struct xfs_perag xfs_agino_t pagi_count; /* number of allocated inodes */ int pagb_count; /* pagb slots in use */ xfs_perag_busy_t *pagb_list; /* unstable blocks */ + + /* + * Inode allocation search lookup optimisation. + * If the pagino matches, the search for new inodes + * doesn't need to search the near ones again straight away + */ + xfs_agino_t pagl_pagino; + xfs_agino_t pagl_leftrec; + xfs_agino_t pagl_rightrec; #ifdef __KERNEL__ spinlock_t pagb_lock; /* lock for pagb_list */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8ee5b5a76a2..8971fb09d38 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3713,7 +3713,7 @@ done: * entry (null if none). Else, *lastxp will be set to the index * of the found entry; *gotp will contain the entry. */ -xfs_bmbt_rec_host_t * /* pointer to found extent entry */ +STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ xfs_bmap_search_multi_extents( xfs_ifork_t *ifp, /* inode fork pointer */ xfs_fileoff_t bno, /* block number searched for */ diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 1b8ff9256bd..56f62d2edc3 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -392,17 +392,6 @@ xfs_bmap_count_blocks( int whichfork, int *count); -/* - * Search the extent records for the entry containing block bno. - * If bno lies in a hole, point to the next entry. If bno lies - * past eof, *eofp will be set, and *prevp will contain the last - * entry (null if none). Else, *lastxp will be set to the index - * of the found entry; *gotp will contain the entry. - */ -xfs_bmbt_rec_host_t * -xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *, - xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *); - #endif /* __KERNEL__ */ #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 5c1ade06578..eb7b702d069 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -202,16 +202,6 @@ xfs_bmbt_get_state( ext_flag); } -/* Endian flipping versions of the bmbt extraction functions */ -void -xfs_bmbt_disk_get_all( - xfs_bmbt_rec_t *r, - xfs_bmbt_irec_t *s) -{ - __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), - get_unaligned_be64(&r->l1), s); -} - /* * Extract the blockcount field from an on disk bmap extent record. */ @@ -816,6 +806,16 @@ xfs_bmbt_trace_key( *l1 = 0; } +/* Endian flipping versions of the bmbt extraction functions */ +STATIC void +xfs_bmbt_disk_get_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s) +{ + __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), + get_unaligned_be64(&r->l1), s); +} + STATIC void xfs_bmbt_trace_record( struct xfs_btree_cur *cur, diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 0e8df007615..5549d495947 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -220,7 +220,6 @@ extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); -extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 26717388acf..52b5f14d0c3 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -646,46 +646,6 @@ xfs_btree_read_bufl( } /* - * Get a buffer for the block, return it read in. - * Short-form addressing. - */ -int /* error */ -xfs_btree_read_bufs( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock, /* lock flags for read_buf */ - xfs_buf_t **bpp, /* buffer for agno/agbno */ - int refval) /* ref count value for buffer */ -{ - xfs_buf_t *bp; /* return value */ - xfs_daddr_t d; /* real disk block address */ - int error; - - ASSERT(agno != NULLAGNUMBER); - ASSERT(agbno != NULLAGBLOCK); - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, lock, &bp))) { - return error; - } - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (bp != NULL) { - switch (refval) { - case XFS_ALLOC_BTREE_REF: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); - break; - case XFS_INO_BTREE_REF: - XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval); - break; - } - } - *bpp = bp; - return 0; -} - -/* * Read-ahead the block, don't wait for it, don't return a buffer. * Long-form addressing. */ @@ -2951,7 +2911,7 @@ error0: * inode we have to copy the single block it was pointing to into the * inode. */ -int +STATIC int xfs_btree_kill_iroot( struct xfs_btree_cur *cur) { diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 4f852b735b9..7fa07062bdd 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -379,20 +379,6 @@ xfs_btree_read_bufl( int refval);/* ref count value for buffer */ /* - * Get a buffer for the block, return it read in. - * Short-form addressing. - */ -int /* error */ -xfs_btree_read_bufs( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock, /* lock flags for read_buf */ - struct xfs_buf **bpp, /* buffer for agno/agbno */ - int refval);/* ref count value for buffer */ - -/* * Read-ahead the block, don't wait for it, don't return a buffer. * Long-form addressing. */ @@ -432,7 +418,6 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); -int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); int xfs_btree_delete(struct xfs_btree_cur *, int *); int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 3120a3a5e20..ab64f3efb43 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -57,75 +57,35 @@ xfs_ialloc_cluster_alignment( } /* - * Lookup the record equal to ino in the btree given by cur. - */ -STATIC int /* error */ -xfs_inobt_lookup_eq( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -/* - * Lookup the first record greater than or equal to ino - * in the btree given by cur. + * Lookup a record by ino in the btree given by cur. */ int /* error */ -xfs_inobt_lookup_ge( +xfs_inobt_lookup( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ + xfs_lookup_t dir, /* <=, >=, == */ int *stat) /* success/failure */ { cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); + cur->bc_rec.i.ir_freecount = 0; + cur->bc_rec.i.ir_free = 0; + return xfs_btree_lookup(cur, dir, stat); } /* - * Lookup the first record less than or equal to ino - * in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup_le( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); -} - -/* - * Update the record referred to by cur to the value given - * by [ino, fcnt, free]. + * Update the record referred to by cur to the value given. * This either works (return 0) or gets an EFSCORRUPTED error. */ STATIC int /* error */ xfs_inobt_update( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free) /* free inode mask */ + xfs_inobt_rec_incore_t *irec) /* btree record */ { union xfs_btree_rec rec; - rec.inobt.ir_startino = cpu_to_be32(ino); - rec.inobt.ir_freecount = cpu_to_be32(fcnt); - rec.inobt.ir_free = cpu_to_be64(free); + rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); + rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); + rec.inobt.ir_free = cpu_to_be64(irec->ir_free); return xfs_btree_update(cur, &rec); } @@ -135,9 +95,7 @@ xfs_inobt_update( int /* error */ xfs_inobt_get_rec( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t *ino, /* output: starting inode of chunk */ - __int32_t *fcnt, /* output: number of free inodes */ - xfs_inofree_t *free, /* output: free inode mask */ + xfs_inobt_rec_incore_t *irec, /* btree record */ int *stat) /* output: success/failure */ { union xfs_btree_rec *rec; @@ -145,14 +103,136 @@ xfs_inobt_get_rec( error = xfs_btree_get_rec(cur, &rec, stat); if (!error && *stat == 1) { - *ino = be32_to_cpu(rec->inobt.ir_startino); - *fcnt = be32_to_cpu(rec->inobt.ir_freecount); - *free = be64_to_cpu(rec->inobt.ir_free); + irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); + irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); + irec->ir_free = be64_to_cpu(rec->inobt.ir_free); } return error; } /* + * Verify that the number of free inodes in the AGI is correct. + */ +#ifdef DEBUG +STATIC int +xfs_check_agi_freecount( + struct xfs_btree_cur *cur, + struct xfs_agi *agi) +{ + if (cur->bc_nlevels == 1) { + xfs_inobt_rec_incore_t rec; + int freecount = 0; + int error; + int i; + + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + return error; + + do { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + return error; + + if (i) { + freecount += rec.ir_freecount; + error = xfs_btree_increment(cur, 0, &i); + if (error) + return error; + } + } while (i == 1); + + if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) + ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); + } + return 0; +} +#else +#define xfs_check_agi_freecount(cur, agi) 0 +#endif + +/* + * Initialise a new set of inodes. + */ +STATIC void +xfs_ialloc_inode_init( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_agblock_t length, + unsigned int gen) +{ + struct xfs_buf *fbuf; + struct xfs_dinode *free; + int blks_per_cluster, nbufs, ninodes; + int version; + int i, j; + xfs_daddr_t d; + + /* + * Loop over the new block(s), filling in the inodes. + * For small block sizes, manipulate the inodes in buffers + * which are multiples of the blocks size. + */ + if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { + blks_per_cluster = 1; + nbufs = length; + ninodes = mp->m_sb.sb_inopblock; + } else { + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / + mp->m_sb.sb_blocksize; + nbufs = length / blks_per_cluster; + ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; + } + + /* + * Figure out what version number to use in the inodes we create. + * If the superblock version has caught up to the one that supports + * the new inode format, then use the new inode version. Otherwise + * use the old version so that old kernels will continue to be + * able to use the file system. + */ + if (xfs_sb_version_hasnlink(&mp->m_sb)) + version = 2; + else + version = 1; + + for (j = 0; j < nbufs; j++) { + /* + * Get the block. + */ + d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); + fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + mp->m_bsize * blks_per_cluster, + XFS_BUF_LOCK); + ASSERT(fbuf); + ASSERT(!XFS_BUF_GETERROR(fbuf)); + + /* + * Initialize all inodes in this buffer and then log them. + * + * XXX: It would be much better if we had just one transaction + * to log a whole cluster of inodes instead of all the + * individual transactions causing a lot of log traffic. + */ + xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); + for (i = 0; i < ninodes; i++) { + int ioffset = i << mp->m_sb.sb_inodelog; + uint isize = sizeof(struct xfs_dinode); + + free = xfs_make_iptr(mp, fbuf, i); + free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); + free->di_version = version; + free->di_gen = cpu_to_be32(gen); + free->di_next_unlinked = cpu_to_be32(NULLAGINO); + xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); + } + xfs_trans_inode_alloc_buf(tp, fbuf); + } +} + +/* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ @@ -164,24 +244,15 @@ xfs_ialloc_ag_alloc( { xfs_agi_t *agi; /* allocation group header */ xfs_alloc_arg_t args; /* allocation argument structure */ - int blks_per_cluster; /* fs blocks per inode cluster */ xfs_btree_cur_t *cur; /* inode btree cursor */ - xfs_daddr_t d; /* disk addr of buffer */ xfs_agnumber_t agno; int error; - xfs_buf_t *fbuf; /* new free inodes' buffer */ - xfs_dinode_t *free; /* new free inode structure */ - int i; /* inode counter */ - int j; /* block counter */ - int nbufs; /* num bufs of new inodes */ + int i; xfs_agino_t newino; /* new first inode's number */ xfs_agino_t newlen; /* new number of inodes */ - int ninodes; /* num inodes per buf */ xfs_agino_t thisino; /* current inode number, for loop */ - int version; /* inode version number to use */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ - unsigned int gen; args.tp = tp; args.mp = tp->t_mountp; @@ -202,12 +273,12 @@ xfs_ialloc_ag_alloc( */ agi = XFS_BUF_TO_AGI(agbp); newino = be32_to_cpu(agi->agi_newino); + agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + XFS_IALLOC_BLOCKS(args.mp); if (likely(newino != NULLAGINO && (args.agbno < be32_to_cpu(agi->agi_length)))) { - args.fsbno = XFS_AGB_TO_FSB(args.mp, - be32_to_cpu(agi->agi_seqno), args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); args.type = XFS_ALLOCTYPE_THIS_BNO; args.mod = args.total = args.wasdel = args.isfl = args.userdata = args.minalignslop = 0; @@ -258,8 +329,7 @@ xfs_ialloc_ag_alloc( * For now, just allocate blocks up front. */ args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, - be32_to_cpu(agi->agi_seqno), args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); /* * Allocate a fixed-size extent of inodes. */ @@ -282,8 +352,7 @@ xfs_ialloc_ag_alloc( if (isaligned && args.fsbno == NULLFSBLOCK) { args.type = XFS_ALLOCTYPE_NEAR_BNO; args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, - be32_to_cpu(agi->agi_seqno), args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); args.alignment = xfs_ialloc_cluster_alignment(&args); if ((error = xfs_alloc_vextent(&args))) return error; @@ -294,85 +363,30 @@ xfs_ialloc_ag_alloc( return 0; } ASSERT(args.len == args.minlen); - /* - * Convert the results. - */ - newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); - /* - * Loop over the new block(s), filling in the inodes. - * For small block sizes, manipulate the inodes in buffers - * which are multiples of the blocks size. - */ - if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { - blks_per_cluster = 1; - nbufs = (int)args.len; - ninodes = args.mp->m_sb.sb_inopblock; - } else { - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / - args.mp->m_sb.sb_blocksize; - nbufs = (int)args.len / blks_per_cluster; - ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; - } - /* - * Figure out what version number to use in the inodes we create. - * If the superblock version has caught up to the one that supports - * the new inode format, then use the new inode version. Otherwise - * use the old version so that old kernels will continue to be - * able to use the file system. - */ - if (xfs_sb_version_hasnlink(&args.mp->m_sb)) - version = 2; - else - version = 1; /* + * Stamp and write the inode buffers. + * * Seed the new inode cluster with a random generation number. This * prevents short-term reuse of generation numbers if a chunk is * freed and then immediately reallocated. We use random numbers * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - gen = random32(); - for (j = 0; j < nbufs; j++) { - /* - * Get the block. - */ - d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), - args.agbno + (j * blks_per_cluster)); - fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, - args.mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); - ASSERT(fbuf); - ASSERT(!XFS_BUF_GETERROR(fbuf)); + xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len, + random32()); - /* - * Initialize all inodes in this buffer and then log them. - * - * XXX: It would be much better if we had just one transaction to - * log a whole cluster of inodes instead of all the individual - * transactions causing a lot of log traffic. - */ - xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); - for (i = 0; i < ninodes; i++) { - int ioffset = i << args.mp->m_sb.sb_inodelog; - uint isize = sizeof(struct xfs_dinode); - - free = xfs_make_iptr(args.mp, fbuf, i); - free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); - free->di_version = version; - free->di_gen = cpu_to_be32(gen); - free->di_next_unlinked = cpu_to_be32(NULLAGINO); - xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); - } - xfs_trans_inode_alloc_buf(tp, fbuf); - } + /* + * Convert the results. + */ + newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); - agno = be32_to_cpu(agi->agi_seqno); down_read(&args.mp->m_peraglock); args.mp->m_perag[agno].pagi_freecount += newlen; up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); + /* * Insert records describing the new inode chunk into the btree. */ @@ -380,13 +394,17 @@ xfs_ialloc_ag_alloc( for (thisino = newino; thisino < newino + newlen; thisino += XFS_INODES_PER_CHUNK) { - if ((error = xfs_inobt_lookup_eq(cur, thisino, - XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { + cur->bc_rec.i.ir_startino = thisino; + cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; + cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; + error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); + if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } ASSERT(i == 0); - if ((error = xfs_btree_insert(cur, &i))) { + error = xfs_btree_insert(cur, &i); + if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } @@ -539,6 +557,62 @@ nextag: } /* + * Try to retrieve the next record to the left/right from the current one. + */ +STATIC int +xfs_ialloc_next_rec( + struct xfs_btree_cur *cur, + xfs_inobt_rec_incore_t *rec, + int *done, + int left) +{ + int error; + int i; + + if (left) + error = xfs_btree_decrement(cur, 0, &i); + else + error = xfs_btree_increment(cur, 0, &i); + + if (error) + return error; + *done = !i; + if (i) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + + return 0; +} + +STATIC int +xfs_ialloc_get_rec( + struct xfs_btree_cur *cur, + xfs_agino_t agino, + xfs_inobt_rec_incore_t *rec, + int *done, + int left) +{ + int error; + int i; + + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); + if (error) + return error; + *done = !i; + if (i) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + + return 0; +} + +/* * Visible inode allocation functions. */ @@ -592,8 +666,8 @@ xfs_dialloc( int j; /* result code */ xfs_mount_t *mp; /* file system mount structure */ int offset; /* index of inode in chunk */ - xfs_agino_t pagino; /* parent's a.g. relative inode # */ - xfs_agnumber_t pagno; /* parent's allocation group number */ + xfs_agino_t pagino; /* parent's AG relative inode # */ + xfs_agnumber_t pagno; /* parent's AG number */ xfs_inobt_rec_incore_t rec; /* inode allocation record */ xfs_agnumber_t tagno; /* testing allocation group number */ xfs_btree_cur_t *tcur; /* temp cursor */ @@ -716,6 +790,8 @@ nextag: */ agno = tagno; *IO_agbp = NULL; + + restart_pagno: cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); /* * If pagino is 0 (this is the root inode allocation) use newino. @@ -723,220 +799,199 @@ nextag: */ if (!pagino) pagino = be32_to_cpu(agi->agi_newino); -#ifdef DEBUG - if (cur->bc_nlevels == 1) { - int freecount = 0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - do { - if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - freecount += rec.ir_freecount; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - } while (i == 1); + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; - ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } -#endif /* - * If in the same a.g. as the parent, try to get near the parent. + * If in the same AG as the parent, try to get near the parent. */ if (pagno == agno) { - if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) + xfs_perag_t *pag = &mp->m_perag[agno]; + int doneleft; /* done, to the left */ + int doneright; /* done, to the right */ + int searchdistance = 10; + + error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_inobt_get_rec(cur, &rec, &j); + if (error) goto error0; - if (i != 0 && - (error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &j)) == 0 && - j == 1 && - rec.ir_freecount > 0) { + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + if (rec.ir_freecount > 0) { /* * Found a free inode in the same chunk - * as parent, done. + * as the parent, done. */ + goto alloc_inode; } + + + /* + * In the same AG as parent, but parent's chunk is full. + */ + + /* duplicate the cursor, search left & right simultaneously */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + /* - * In the same a.g. as parent, but parent's chunk is full. + * Skip to last blocks looked up if same parent inode. */ - else { - int doneleft; /* done, to the left */ - int doneright; /* done, to the right */ + if (pagino != NULLAGINO && + pag->pagl_pagino == pagino && + pag->pagl_leftrec != NULLAGINO && + pag->pagl_rightrec != NULLAGINO) { + error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, + &trec, &doneleft, 1); + if (error) + goto error1; + error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, + &rec, &doneright, 0); if (error) - goto error0; - ASSERT(i == 1); - ASSERT(j == 1); - /* - * Duplicate the cursor, search left & right - * simultaneously. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - goto error0; - /* - * Search left with tcur, back up 1 record. - */ - if ((error = xfs_btree_decrement(tcur, 0, &i))) goto error1; - doneleft = !i; - if (!doneleft) { - if ((error = xfs_inobt_get_rec(tcur, - &trec.ir_startino, - &trec.ir_freecount, - &trec.ir_free, &i))) - goto error1; - XFS_WANT_CORRUPTED_GOTO(i == 1, error1); - } - /* - * Search right with cur, go forward 1 record. - */ - if ((error = xfs_btree_increment(cur, 0, &i))) + } else { + /* search left with tcur, back up 1 record */ + error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); + if (error) goto error1; - doneright = !i; - if (!doneright) { - if ((error = xfs_inobt_get_rec(cur, - &rec.ir_startino, - &rec.ir_freecount, - &rec.ir_free, &i))) - goto error1; - XFS_WANT_CORRUPTED_GOTO(i == 1, error1); - } - /* - * Loop until we find the closest inode chunk - * with a free one. - */ - while (!doneleft || !doneright) { - int useleft; /* using left inode - chunk this time */ + /* search right with cur, go forward 1 record. */ + error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); + if (error) + goto error1; + } + + /* + * Loop until we find an inode chunk with a free inode. + */ + while (!doneleft || !doneright) { + int useleft; /* using left inode chunk this time */ + + if (!--searchdistance) { /* - * Figure out which block is closer, - * if both are valid. - */ - if (!doneleft && !doneright) - useleft = - pagino - - (trec.ir_startino + - XFS_INODES_PER_CHUNK - 1) < - rec.ir_startino - pagino; - else - useleft = !doneleft; - /* - * If checking the left, does it have - * free inodes? - */ - if (useleft && trec.ir_freecount) { - /* - * Yes, set it up as the chunk to use. - */ - rec = trec; - xfs_btree_del_cursor(cur, - XFS_BTREE_NOERROR); - cur = tcur; - break; - } - /* - * If checking the right, does it have - * free inodes? - */ - if (!useleft && rec.ir_freecount) { - /* - * Yes, it's already set up. - */ - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - break; - } - /* - * If used the left, get another one - * further left. - */ - if (useleft) { - if ((error = xfs_btree_decrement(tcur, 0, - &i))) - goto error1; - doneleft = !i; - if (!doneleft) { - if ((error = xfs_inobt_get_rec( - tcur, - &trec.ir_startino, - &trec.ir_freecount, - &trec.ir_free, &i))) - goto error1; - XFS_WANT_CORRUPTED_GOTO(i == 1, - error1); - } - } - /* - * If used the right, get another one - * further right. + * Not in range - save last search + * location and allocate a new inode */ - else { - if ((error = xfs_btree_increment(cur, 0, - &i))) - goto error1; - doneright = !i; - if (!doneright) { - if ((error = xfs_inobt_get_rec( - cur, - &rec.ir_startino, - &rec.ir_freecount, - &rec.ir_free, &i))) - goto error1; - XFS_WANT_CORRUPTED_GOTO(i == 1, - error1); - } - } + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto newino; + } + + /* figure out the closer block if both are valid. */ + if (!doneleft && !doneright) { + useleft = pagino - + (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < + rec.ir_startino - pagino; + } else { + useleft = !doneleft; } - ASSERT(!doneleft || !doneright); + + /* free inodes to the left? */ + if (useleft && trec.ir_freecount) { + rec = trec; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + cur = tcur; + + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto alloc_inode; + } + + /* free inodes to the right? */ + if (!useleft && rec.ir_freecount) { + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + + pag->pagl_leftrec = trec.ir_startino; + pag->pagl_rightrec = rec.ir_startino; + pag->pagl_pagino = pagino; + goto alloc_inode; + } + + /* get next record to check */ + if (useleft) { + error = xfs_ialloc_next_rec(tcur, &trec, + &doneleft, 1); + } else { + error = xfs_ialloc_next_rec(cur, &rec, + &doneright, 0); + } + if (error) + goto error1; } + + /* + * We've reached the end of the btree. because + * we are only searching a small chunk of the + * btree each search, there is obviously free + * inodes closer to the parent inode than we + * are now. restart the search again. + */ + pag->pagl_pagino = NULLAGINO; + pag->pagl_leftrec = NULLAGINO; + pag->pagl_rightrec = NULLAGINO; + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + goto restart_pagno; } + /* - * In a different a.g. from the parent. + * In a different AG from the parent. * See if the most recently allocated block has any free. */ - else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { - if ((error = xfs_inobt_lookup_eq(cur, - be32_to_cpu(agi->agi_newino), 0, 0, &i))) +newino: + if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { + error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), + XFS_LOOKUP_EQ, &i); + if (error) goto error0; - if (i == 1 && - (error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &j)) == 0 && - j == 1 && - rec.ir_freecount > 0) { - /* - * The last chunk allocated in the group still has - * a free inode. - */ - } - /* - * None left in the last group, search the whole a.g. - */ - else { + + if (i == 1) { + error = xfs_inobt_get_rec(cur, &rec, &j); if (error) goto error0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - ASSERT(i == 1); - for (;;) { - if ((error = xfs_inobt_get_rec(cur, - &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, - &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (rec.ir_freecount > 0) - break; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + if (j == 1 && rec.ir_freecount > 0) { + /* + * The last chunk allocated in the group + * still has a free inode. + */ + goto alloc_inode; } } } + + /* + * None left in the last group, search the whole AG + */ + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + for (;;) { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (rec.ir_freecount > 0) + break; + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + +alloc_inode: offset = xfs_ialloc_find_free(&rec.ir_free); ASSERT(offset >= 0); ASSERT(offset < XFS_INODES_PER_CHUNK); @@ -945,33 +1000,19 @@ nextag: ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); rec.ir_free &= ~XFS_INOBT_MASK(offset); rec.ir_freecount--; - if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, - rec.ir_free))) + error = xfs_inobt_update(cur, &rec); + if (error) goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[tagno].pagi_freecount--; up_read(&mp->m_peraglock); -#ifdef DEBUG - if (cur->bc_nlevels == 1) { - int freecount = 0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - do { - if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - freecount += rec.ir_freecount; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - } while (i == 1); - ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } -#endif + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); *inop = ino; @@ -1062,38 +1103,23 @@ xfs_difree( * Initialize the cursor. */ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); -#ifdef DEBUG - if (cur->bc_nlevels == 1) { - int freecount = 0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - do { - if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, - &rec.ir_freecount, &rec.ir_free, &i))) - goto error0; - if (i) { - freecount += rec.ir_freecount; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - } - } while (i == 1); - ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } -#endif + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; + /* * Look for the entry describing this inode. */ - if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { + if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { cmn_err(CE_WARN, - "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", + "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", error, mp->m_fsname); goto error0; } XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, - &rec.ir_free, &i))) { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) { cmn_err(CE_WARN, "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", error, mp->m_fsname); @@ -1148,12 +1174,14 @@ xfs_difree( } else { *delete = 0; - if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { + error = xfs_inobt_update(cur, &rec); + if (error) { cmn_err(CE_WARN, - "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", + "xfs_difree: xfs_inobt_update returned an error %d on %s.", error, mp->m_fsname); goto error0; } + /* * Change the inode free counts and log the ag/sb changes. */ @@ -1165,28 +1193,10 @@ xfs_difree( xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } -#ifdef DEBUG - if (cur->bc_nlevels == 1) { - int freecount = 0; + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error0; - if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) - goto error0; - do { - if ((error = xfs_inobt_get_rec(cur, - &rec.ir_startino, - &rec.ir_freecount, - &rec.ir_free, &i))) - goto error0; - if (i) { - freecount += rec.ir_freecount; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto error0; - } - } while (i == 1); - ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } -#endif xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return 0; @@ -1297,9 +1307,7 @@ xfs_imap( chunk_agbno = agbno - offset_agbno; } else { xfs_btree_cur_t *cur; /* inode btree cursor */ - xfs_agino_t chunk_agino; /* first agino in inode chunk */ - __int32_t chunk_cnt; /* count of free inodes in chunk */ - xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ + xfs_inobt_rec_incore_t chunk_rec; xfs_buf_t *agbp; /* agi buffer */ int i; /* temp state */ @@ -1315,15 +1323,14 @@ xfs_imap( } cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_lookup_le() failed"); + "xfs_inobt_lookup() failed"); goto error0; } - error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, - &chunk_free, &i); + error = xfs_inobt_get_rec(cur, &chunk_rec, &i); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_inobt_get_rec() failed"); @@ -1341,7 +1348,7 @@ xfs_imap( xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); if (error) return error; - chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); + chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino); offset_agbno = agbno - chunk_agbno; } diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index aeee8278f92..bb5385475e1 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -150,23 +150,15 @@ xfs_ialloc_pagi_init( xfs_agnumber_t agno); /* allocation group number */ /* - * Lookup the first record greater than or equal to ino - * in the btree given by cur. + * Lookup a record by ino in the btree given by cur. */ -int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); - -/* - * Lookup the first record less than or equal to ino - * in the btree given by cur. - */ -int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); +int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, + xfs_lookup_t dir, int *stat); /* * Get the data from the pointed-to record. */ -extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, - __int32_t *fcnt, xfs_inofree_t *free, int *stat); +extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, + xfs_inobt_rec_incore_t *rec, int *stat); #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index ecbf8b4d2e2..80e526489be 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -82,7 +82,6 @@ xfs_inode_alloc( memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; ip->i_update_core = 0; - ip->i_update_size = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); ip->i_size = 0; @@ -456,32 +455,6 @@ out_error_or_again: return error; } - -/* - * Look for the inode corresponding to the given ino in the hash table. - * If it is there and its i_transp pointer matches tp, return it. - * Otherwise, return NULL. - */ -xfs_inode_t * -xfs_inode_incore(xfs_mount_t *mp, - xfs_ino_t ino, - xfs_trans_t *tp) -{ - xfs_inode_t *ip; - xfs_perag_t *pag; - - pag = xfs_get_perag(mp, ino); - read_lock(&pag->pag_ici_lock); - ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino)); - read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); - - /* the returned inode must match the transaction */ - if (ip && (ip->i_transp != tp)) - return NULL; - return ip; -} - /* * Decrement reference count of an inode structure and unlock it. * diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index da428b3fe0f..c1dc7ef5a1d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -651,7 +651,7 @@ xfs_iformat_btree( return 0; } -void +STATIC void xfs_dinode_from_disk( xfs_icdinode_t *to, xfs_dinode_t *from) @@ -1247,7 +1247,7 @@ xfs_isize_check( * In that case the pages will still be in memory, but the inode size * will never have been updated. */ -xfs_fsize_t +STATIC xfs_fsize_t xfs_file_last_byte( xfs_inode_t *ip) { @@ -3837,7 +3837,7 @@ xfs_iext_inline_to_direct( /* * Resize an extent indirection array to new_size bytes. */ -void +STATIC void xfs_iext_realloc_indirect( xfs_ifork_t *ifp, /* inode fork pointer */ int new_size) /* new indirection array size */ @@ -3862,7 +3862,7 @@ xfs_iext_realloc_indirect( /* * Switch from indirection array to linear (direct) extent allocations. */ -void +STATIC void xfs_iext_indirect_to_direct( xfs_ifork_t *ifp) /* inode fork pointer */ { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 65f24a3cc99..0b38b9a869e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -261,7 +261,6 @@ typedef struct xfs_inode { /* Miscellaneous state. */ unsigned short i_flags; /* see defined flags below */ unsigned char i_update_core; /* timestamps/size is dirty */ - unsigned char i_update_size; /* di_size field is dirty */ unsigned int i_delayed_blks; /* count of delay alloc blks */ xfs_icdinode_t i_d; /* most of ondisk inode */ @@ -468,8 +467,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) /* * xfs_iget.c prototypes. */ -xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, - struct xfs_trans *); int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, uint, uint, xfs_inode_t **, xfs_daddr_t); void xfs_iput(xfs_inode_t *, uint); @@ -504,7 +501,6 @@ void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); void xfs_ichgtime(xfs_inode_t *, int); -xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, uint); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); @@ -572,8 +568,6 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_buf **, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, xfs_daddr_t, uint); -void xfs_dinode_from_disk(struct xfs_icdinode *, - struct xfs_dinode *); void xfs_dinode_to_disk(struct xfs_dinode *, struct xfs_icdinode *); void xfs_idestroy_fork(struct xfs_inode *, int); @@ -592,8 +586,6 @@ void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); void xfs_iext_realloc_direct(xfs_ifork_t *, int); -void xfs_iext_realloc_indirect(xfs_ifork_t *, int); -void xfs_iext_indirect_to_direct(xfs_ifork_t *); void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); void xfs_iext_inline_to_direct(xfs_ifork_t *, int); void xfs_iext_destroy(xfs_ifork_t *); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 977c4aec587..47d5b663c37 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -263,14 +263,6 @@ xfs_inode_item_format( } /* - * We don't have to worry about re-ordering here because - * the update_size field is protected by the inode lock - * and we have that held in exclusive mode. - */ - if (ip->i_update_size) - ip->i_update_size = 0; - - /* * Make sure to get the latest atime from the Linux inode. */ xfs_synchronize_atime(ip); @@ -712,8 +704,6 @@ xfs_inode_item_unlock( * Clear out the fields of the inode log item particular * to the current transaction. */ - iip->ili_ilock_recur = 0; - iip->ili_iolock_recur = 0; iip->ili_flags = 0; /* diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index a52ac125f05..65bae4c9b8b 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -137,8 +137,6 @@ typedef struct xfs_inode_log_item { struct xfs_inode *ili_inode; /* inode ptr */ xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ - unsigned short ili_ilock_recur; /* lock recursion count */ - unsigned short ili_iolock_recur; /* lock recursion count */ unsigned short ili_flags; /* misc flags */ unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h index 7a28191cb0d..b8e4ee4e89a 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/xfs_inum.h @@ -72,7 +72,6 @@ struct xfs_mount; #if XFS_BIG_INUMS #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) -#define XFS_INO64_OFFSET ((xfs_ino_t)(1ULL << 32)) #else #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) #endif diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index aeb2d2221c7..b68f9107e26 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -39,7 +39,7 @@ #include "xfs_error.h" #include "xfs_btree.h" -int +STATIC int xfs_internal_inum( xfs_mount_t *mp, xfs_ino_t ino) @@ -353,9 +353,6 @@ xfs_bulkstat( int end_of_ag; /* set if we've seen the ag end */ int error; /* error code */ int fmterror;/* bulkstat formatter result */ - __int32_t gcnt; /* current btree rec's count */ - xfs_inofree_t gfree; /* current btree rec's free mask */ - xfs_agino_t gino; /* current btree rec's start inode */ int i; /* loop index */ int icount; /* count of inodes good in irbuf */ size_t irbsize; /* size of irec buffer in bytes */ @@ -442,40 +439,43 @@ xfs_bulkstat( * we need to get the remainder of the chunk we're in. */ if (agino > 0) { + xfs_inobt_rec_incore_t r; + /* * Lookup the inode chunk that this inode lives in. */ - error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, + &tmp); if (!error && /* no I/O error */ tmp && /* lookup succeeded */ /* got the record, should always work */ - !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, - &gfree, &i)) && + !(error = xfs_inobt_get_rec(cur, &r, &i)) && i == 1 && /* this is the right chunk */ - agino < gino + XFS_INODES_PER_CHUNK && + agino < r.ir_startino + XFS_INODES_PER_CHUNK && /* lastino was not last in chunk */ - (chunkidx = agino - gino + 1) < + (chunkidx = agino - r.ir_startino + 1) < XFS_INODES_PER_CHUNK && /* there are some left allocated */ xfs_inobt_maskn(chunkidx, - XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { + XFS_INODES_PER_CHUNK - chunkidx) & + ~r.ir_free) { /* * Grab the chunk record. Mark all the * uninteresting inodes (because they're * before our start point) free. */ for (i = 0; i < chunkidx; i++) { - if (XFS_INOBT_MASK(i) & ~gfree) - gcnt++; + if (XFS_INOBT_MASK(i) & ~r.ir_free) + r.ir_freecount++; } - gfree |= xfs_inobt_maskn(0, chunkidx); - irbp->ir_startino = gino; - irbp->ir_freecount = gcnt; - irbp->ir_free = gfree; + r.ir_free |= xfs_inobt_maskn(0, chunkidx); + irbp->ir_startino = r.ir_startino; + irbp->ir_freecount = r.ir_freecount; + irbp->ir_free = r.ir_free; irbp++; - agino = gino + XFS_INODES_PER_CHUNK; - icount = XFS_INODES_PER_CHUNK - gcnt; + agino = r.ir_startino + XFS_INODES_PER_CHUNK; + icount = XFS_INODES_PER_CHUNK - r.ir_freecount; } else { /* * If any of those tests failed, bump the @@ -493,7 +493,7 @@ xfs_bulkstat( /* * Start of ag. Lookup the first inode chunk. */ - error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); icount = 0; } /* @@ -501,6 +501,8 @@ xfs_bulkstat( * until we run out of inodes or space in the buffer. */ while (irbp < irbufend && icount < ubcount) { + xfs_inobt_rec_incore_t r; + /* * Loop as long as we're unable to read the * inode btree. @@ -510,51 +512,55 @@ xfs_bulkstat( if (XFS_AGINO_TO_AGBNO(mp, agino) >= be32_to_cpu(agi->agi_length)) break; - error = xfs_inobt_lookup_ge(cur, agino, 0, 0, - &tmp); + error = xfs_inobt_lookup(cur, agino, + XFS_LOOKUP_GE, &tmp); cond_resched(); } /* * If ran off the end of the ag either with an error, * or the normal way, set end and stop collecting. */ - if (error || - (error = xfs_inobt_get_rec(cur, &gino, &gcnt, - &gfree, &i)) || - i == 0) { + if (error) { end_of_ag = 1; break; } + + error = xfs_inobt_get_rec(cur, &r, &i); + if (error || i == 0) { + end_of_ag = 1; + break; + } + /* * If this chunk has any allocated inodes, save it. * Also start read-ahead now for this chunk. */ - if (gcnt < XFS_INODES_PER_CHUNK) { + if (r.ir_freecount < XFS_INODES_PER_CHUNK) { /* * Loop over all clusters in the next chunk. * Do a readahead if there are any allocated * inodes in that cluster. */ - for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), - chunkidx = 0; + agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); + for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; chunkidx += nicluster, agbno += nbcluster) { - if (xfs_inobt_maskn(chunkidx, - nicluster) & ~gfree) + if (xfs_inobt_maskn(chunkidx, nicluster) + & ~r.ir_free) xfs_btree_reada_bufs(mp, agno, agbno, nbcluster); } - irbp->ir_startino = gino; - irbp->ir_freecount = gcnt; - irbp->ir_free = gfree; + irbp->ir_startino = r.ir_startino; + irbp->ir_freecount = r.ir_freecount; + irbp->ir_free = r.ir_free; irbp++; - icount += XFS_INODES_PER_CHUNK - gcnt; + icount += XFS_INODES_PER_CHUNK - r.ir_freecount; } /* * Set agino to after this chunk and bump the cursor. */ - agino = gino + XFS_INODES_PER_CHUNK; + agino = r.ir_startino + XFS_INODES_PER_CHUNK; error = xfs_btree_increment(cur, 0, &tmp); cond_resched(); } @@ -820,9 +826,7 @@ xfs_inumbers( int bufidx; xfs_btree_cur_t *cur; int error; - __int32_t gcnt; - xfs_inofree_t gfree; - xfs_agino_t gino; + xfs_inobt_rec_incore_t r; int i; xfs_ino_t ino; int left; @@ -855,7 +859,8 @@ xfs_inumbers( continue; } cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); - error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, + &tmp); if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); cur = NULL; @@ -870,9 +875,8 @@ xfs_inumbers( continue; } } - if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, - &i)) || - i == 0) { + error = xfs_inobt_get_rec(cur, &r, &i); + if (error || i == 0) { xfs_buf_relse(agbp); agbp = NULL; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); @@ -881,10 +885,12 @@ xfs_inumbers( agino = 0; continue; } - agino = gino + XFS_INODES_PER_CHUNK - 1; - buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); - buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; - buffer[bufidx].xi_allocmask = ~gfree; + agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; + buffer[bufidx].xi_startino = + XFS_AGINO_TO_INO(mp, agno, r.ir_startino); + buffer[bufidx].xi_alloccount = + XFS_INODES_PER_CHUNK - r.ir_freecount; + buffer[bufidx].xi_allocmask = ~r.ir_free; bufidx++; left--; if (bufidx == bcount) { diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 1fb04e7deb6..20792bf4594 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -99,11 +99,6 @@ xfs_bulkstat_one( void *dibuff, int *stat); -int -xfs_internal_inum( - xfs_mount_t *mp, - xfs_ino_t ino); - typedef int (*inumbers_fmt_pf)( void __user *ubuffer, /* buffer to write to */ const xfs_inogrp_t *buffer, /* buffer to read from */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index bcad5f4c1fd..679c7c4926a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -451,8 +451,6 @@ extern int xlog_find_tail(xlog_t *log, extern int xlog_recover(xlog_t *log); extern int xlog_recover_finish(xlog_t *log); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); -extern void xlog_recover_process_iunlinks(xlog_t *log); - extern struct xfs_buf *xlog_get_bp(xlog_t *, int); extern void xlog_put_bp(struct xfs_buf *); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 47da2fb4537..1099395d7d6 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3263,7 +3263,7 @@ xlog_recover_process_one_iunlink( * freeing of the inode and its removal from the list must be * atomic. */ -void +STATIC void xlog_recover_process_iunlinks( xlog_t *log) { diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5c6f092659c..8b6c9e807ef 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1568,7 +1568,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) * * The m_sb_lock must be held when this routine is called. */ -int +STATIC int xfs_mod_incore_sb_unlocked( xfs_mount_t *mp, xfs_sb_field_t field, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a5122382afd..a6c023bc0fb 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -414,13 +414,10 @@ typedef struct xfs_mod_sb { extern int xfs_log_sbcount(xfs_mount_t *, uint); extern int xfs_mountfs(xfs_mount_t *mp); -extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); extern int xfs_unmountfs_writesb(xfs_mount_t *); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); -extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, - int64_t, int); extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int); extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index afee7eb2432..4b0613d99fa 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -564,35 +564,6 @@ xfs_mru_cache_lookup( } /* - * To look up an element using its key, but leave its location in the internal - * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this - * function returns NULL. - * - * See the comments above the declaration of the xfs_mru_cache_lookup() function - * for important locking information pertaining to this call. - */ -void * -xfs_mru_cache_peek( - xfs_mru_cache_t *mru, - unsigned long key) -{ - xfs_mru_cache_elem_t *elem; - - ASSERT(mru && mru->lists); - if (!mru || !mru->lists) - return NULL; - - spin_lock(&mru->lock); - elem = radix_tree_lookup(&mru->store, key); - if (!elem) - spin_unlock(&mru->lock); - else - __release(mru_lock); /* help sparse not be stupid */ - - return elem ? elem->value : NULL; -} - -/* * To release the internal data structure spinlock after having performed an * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() * with the data store pointer. diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index dd58ea1bbeb..5d439f34b0c 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -49,7 +49,6 @@ int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); -void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key); void xfs_mru_cache_done(struct xfs_mru_cache *mru); #endif /* __XFS_MRU_CACHE_H__ */ diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index fea68615ed2..3f816ad7ff1 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -88,90 +88,6 @@ xfs_write_clear_setuid( } /* - * Handle logging requirements of various synchronous types of write. - */ -int -xfs_write_sync_logforce( - xfs_mount_t *mp, - xfs_inode_t *ip) -{ - int error = 0; - - /* - * If we're treating this as O_DSYNC and we have not updated the - * size, force the log. - */ - if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && - !(ip->i_update_size)) { - xfs_inode_log_item_t *iip = ip->i_itemp; - - /* - * If an allocation transaction occurred - * without extending the size, then we have to force - * the log up the proper point to ensure that the - * allocation is permanent. We can't count on - * the fact that buffered writes lock out direct I/O - * writes - the direct I/O write could have extended - * the size nontransactionally, then finished before - * we started. xfs_write_file will think that the file - * didn't grow but the update isn't safe unless the - * size change is logged. - * - * Force the log if we've committed a transaction - * against the inode or if someone else has and - * the commit record hasn't gone to disk (e.g. - * the inode is pinned). This guarantees that - * all changes affecting the inode are permanent - * when we return. - */ - if (iip && iip->ili_last_lsn) { - error = _xfs_log_force(mp, iip->ili_last_lsn, - XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); - } else if (xfs_ipincount(ip) > 0) { - error = _xfs_log_force(mp, (xfs_lsn_t)0, - XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); - } - - } else { - xfs_trans_t *tp; - - /* - * O_SYNC or O_DSYNC _with_ a size update are handled - * the same way. - * - * If the write was synchronous then we need to make - * sure that the inode modification time is permanent. - * We'll have updated the timestamp above, so here - * we use a synchronous transaction to log the inode. - * It's not fast, but it's necessary. - * - * If this a dsync write and the size got changed - * non-transactionally, then we need to ensure that - * the size change gets logged in a synchronous - * transaction. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); - if ((error = xfs_trans_reserve(tp, 0, - XFS_SWRITE_LOG_RES(mp), - 0, 0, 0))) { - /* Transaction reserve failed */ - xfs_trans_cancel(tp, 0); - } else { - /* Transaction reserve successful */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } - } - - return error; -} - -/* * Force a shutdown of the filesystem instantly while keeping * the filesystem consistent. We don't do an unmount here; just shutdown * the shop, make sure that absolutely nothing persistent happens to diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index f76c003ec55..f5e4874c37d 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -68,7 +68,6 @@ xfs_get_extsz_hint( * Prototypes for functions in xfs_rw.c. */ extern int xfs_write_clear_setuid(struct xfs_inode *ip); -extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip); extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bioerror(struct xfs_buf *bp); extern int xfs_bioerror_relse(struct xfs_buf *bp); @@ -78,10 +77,4 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, xfs_buf_t *bp, xfs_daddr_t blkno); -/* - * Prototypes for functions in xfs_vnodeops.c. - */ -extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, - int flags); - #endif /* __XFS_RW_H__ */ diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 775249a54f6..ed47fc77759 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -68,7 +68,7 @@ typedef struct xfs_trans_header { #define XFS_TRANS_GROWFS 14 #define XFS_TRANS_STRAT_WRITE 15 #define XFS_TRANS_DIOSTRAT 16 -#define XFS_TRANS_WRITE_SYNC 17 +/* 17 was XFS_TRANS_WRITE_SYNC */ #define XFS_TRANS_WRITEID 18 #define XFS_TRANS_ADDAFORK 19 #define XFS_TRANS_ATTRINVAL 20 diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8ee2f8c8b0a..218829e6a15 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -307,7 +307,7 @@ xfs_trans_read_buf( return (flags & XFS_BUF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); - if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { + if (XFS_BUF_GETERROR(bp) != 0) { xfs_ioerror_alert("xfs_trans_read_buf", mp, bp, blkno); error = XFS_BUF_GETERROR(bp); @@ -315,7 +315,7 @@ xfs_trans_read_buf( return error; } #ifdef DEBUG - if (xfs_do_error && (bp != NULL)) { + if (xfs_do_error) { if (xfs_error_target == target) { if (((xfs_req_num++) % xfs_error_mod) == 0) { xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 23d276af2e0..785ff101da0 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -49,30 +49,7 @@ xfs_trans_inode_broot_debug( /* - * Get and lock the inode for the caller if it is not already - * locked within the given transaction. If it is already locked - * within the transaction, just increment its lock recursion count - * and return a pointer to it. - * - * For an inode to be locked in a transaction, the inode lock, as - * opposed to the io lock, must be taken exclusively. This ensures - * that the inode can be involved in only 1 transaction at a time. - * Lock recursion is handled on the io lock, but only for lock modes - * of equal or lesser strength. That is, you can recur on the io lock - * held EXCL with a SHARED request but not vice versa. Also, if - * the inode is already a part of the transaction then you cannot - * go from not holding the io lock to having it EXCL or SHARED. - * - * Use the inode cache routine xfs_inode_incore() to find the inode - * if it is already owned by this transaction. - * - * If we don't already own the inode, use xfs_iget() to get it. - * Since the inode log item structure is embedded in the incore - * inode structure and is initialized when the inode is brought - * into memory, there is nothing to do with it here. - * - * If the given transaction pointer is NULL, just call xfs_iget(). - * This simplifies code which must handle both cases. + * Get an inode and join it to the transaction. */ int xfs_trans_iget( @@ -84,62 +61,11 @@ xfs_trans_iget( xfs_inode_t **ipp) { int error; - xfs_inode_t *ip; - - /* - * If the transaction pointer is NULL, just call the normal - * xfs_iget(). - */ - if (tp == NULL) - return xfs_iget(mp, NULL, ino, flags, lock_flags, ipp, 0); - - /* - * If we find the inode in core with this transaction - * pointer in its i_transp field, then we know we already - * have it locked. In this case we just increment the lock - * recursion count and return the inode to the caller. - * Assert that the inode is already locked in the mode requested - * by the caller. We cannot do lock promotions yet, so - * die if someone gets this wrong. - */ - if ((ip = xfs_inode_incore(tp->t_mountp, ino, tp)) != NULL) { - /* - * Make sure that the inode lock is held EXCL and - * that the io lock is never upgraded when the inode - * is already a part of the transaction. - */ - ASSERT(ip->i_itemp != NULL); - ASSERT(lock_flags & XFS_ILOCK_EXCL); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || - xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || - (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); - ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || - xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); - ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || - (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); - - if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { - ip->i_itemp->ili_iolock_recur++; - } - if (lock_flags & XFS_ILOCK_EXCL) { - ip->i_itemp->ili_ilock_recur++; - } - *ipp = ip; - return 0; - } - - ASSERT(lock_flags & XFS_ILOCK_EXCL); - error = xfs_iget(tp->t_mountp, tp, ino, flags, lock_flags, &ip, 0); - if (error) { - return error; - } - ASSERT(ip != NULL); - xfs_trans_ijoin(tp, ip, lock_flags); - *ipp = ip; - return 0; + error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); + if (!error && tp) + xfs_trans_ijoin(tp, *ipp, lock_flags); + return error; } /* @@ -163,8 +89,6 @@ xfs_trans_ijoin( xfs_inode_item_init(ip, ip->i_mount); iip = ip->i_itemp; ASSERT(iip->ili_flags == 0); - ASSERT(iip->ili_ilock_recur == 0); - ASSERT(iip->ili_iolock_recur == 0); /* * Get a log_item_desc to point at the new item. diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 492d75bae2b..a434f287962 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -611,7 +611,7 @@ xfs_fsync( xfs_inode_t *ip) { xfs_trans_t *tp; - int error; + int error = 0; int log_flushed = 0, changed = 1; xfs_itrace_entry(ip); @@ -619,14 +619,9 @@ xfs_fsync( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); - /* capture size updates in I/O completion before writing the inode. */ - error = xfs_wait_on_pages(ip, 0, -1); - if (error) - return XFS_ERROR(error); - /* * We always need to make sure that the required inode state is safe on - * disk. The vnode might be clean but we still might need to force the + * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue @@ -638,7 +633,7 @@ xfs_fsync( */ xfs_ilock(ip, XFS_ILOCK_SHARED); - if (!(ip->i_update_size || ip->i_update_core)) { + if (!ip->i_update_core) { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got @@ -718,7 +713,7 @@ xfs_fsync( * when the link count isn't zero and by xfs_dm_punch_hole() when * punching a hole to EOF. */ -int +STATIC int xfs_free_eofblocks( xfs_mount_t *mp, xfs_inode_t *ip, @@ -1476,8 +1471,8 @@ xfs_create( if (error == ENOSPC) { /* flush outstanding delalloc blocks and retry */ xfs_flush_inodes(dp); - error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); + error = xfs_trans_reserve(tp, resblks, log_res, 0, + XFS_TRANS_PERM_LOG_RES, log_count); } if (error == ENOSPC) { /* No space at all so try a "no-allocation" reservation */ |