From 9ff05123e3bfbb1d2b68ba1d9bf1f7d1dffc1453 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:37 -0700 Subject: nilfs2: segment constructor This adds the segment constructor (also called log writer). The segment constructor collects dirty buffers for every dirty inode, makes summaries of the buffers, assigns disk block addresses to the buffers, and then submits BIOs for the buffers. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/segment.c | 3187 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3187 insertions(+) create mode 100644 fs/nilfs2/segment.c (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c new file mode 100644 index 00000000000..2c4c088059f --- /dev/null +++ b/fs/nilfs2/segment.c @@ -0,0 +1,3187 @@ +/* + * segment.c - NILFS segment constructor. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nilfs.h" +#include "btnode.h" +#include "page.h" +#include "segment.h" +#include "sufile.h" +#include "cpfile.h" +#include "ifile.h" +#include "seglist.h" +#include "segbuf.h" + + +/* + * Segment constructor + */ +#define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ + +#define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments + appended in collection retry loop */ + +/* Construction mode */ +enum { + SC_LSEG_SR = 1, /* Make a logical segment having a super root */ + SC_LSEG_DSYNC, /* Flush data blocks of a given file and make + a logical segment without a super root */ + SC_FLUSH_FILE, /* Flush data files, leads to segment writes without + creating a checkpoint */ + SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without + a checkpoint */ +}; + +/* Stage numbers of dirty block collection */ +enum { + NILFS_ST_INIT = 0, + NILFS_ST_GC, /* Collecting dirty blocks for GC */ + NILFS_ST_FILE, + NILFS_ST_SKETCH, + NILFS_ST_IFILE, + NILFS_ST_CPFILE, + NILFS_ST_SUFILE, + NILFS_ST_DAT, + NILFS_ST_SR, /* Super root */ + NILFS_ST_DSYNC, /* Data sync blocks */ + NILFS_ST_DONE, +}; + +/* State flags of collection */ +#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ +#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ +#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED) + +/* Operations depending on the construction mode and file type */ +struct nilfs_sc_operations { + int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *, + struct inode *); + int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *, + struct inode *); + int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *, + struct inode *); + void (*write_data_binfo)(struct nilfs_sc_info *, + struct nilfs_segsum_pointer *, + union nilfs_binfo *); + void (*write_node_binfo)(struct nilfs_sc_info *, + struct nilfs_segsum_pointer *, + union nilfs_binfo *); +}; + +/* + * Other definitions + */ +static void nilfs_segctor_start_timer(struct nilfs_sc_info *); +static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); +static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); +static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, + int); + +#define nilfs_cnt32_gt(a, b) \ + (typecheck(__u32, a) && typecheck(__u32, b) && \ + ((__s32)(b) - (__s32)(a) < 0)) +#define nilfs_cnt32_ge(a, b) \ + (typecheck(__u32, a) && typecheck(__u32, b) && \ + ((__s32)(a) - (__s32)(b) >= 0)) +#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) +#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) + +/* + * Transaction + */ +static struct kmem_cache *nilfs_transaction_cachep; + +/** + * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info + * + * nilfs_init_transaction_cache() creates a slab cache for the struct + * nilfs_transaction_info. + * + * Return Value: On success, it returns 0. On error, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_init_transaction_cache(void) +{ + nilfs_transaction_cachep = + kmem_cache_create("nilfs2_transaction_cache", + sizeof(struct nilfs_transaction_info), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0; +} + +/** + * nilfs_detroy_transaction_cache - destroy the cache for transaction info + * + * nilfs_destroy_transaction_cache() frees the slab cache for the struct + * nilfs_transaction_info. + */ +void nilfs_destroy_transaction_cache(void) +{ + kmem_cache_destroy(nilfs_transaction_cachep); +} + +static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) +{ + struct nilfs_transaction_info *cur_ti = current->journal_info; + void *save = NULL; + + if (cur_ti) { + if (cur_ti->ti_magic == NILFS_TI_MAGIC) + return ++cur_ti->ti_count; + else { + /* + * If journal_info field is occupied by other FS, + * we save it and restore on nilfs_transaction_end(). + * But this should never happen. + */ + printk(KERN_WARNING + "NILFS warning: journal info from a different " + "FS\n"); + save = current->journal_info; + } + } + if (!ti) { + ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); + if (!ti) + return -ENOMEM; + ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC; + } else { + ti->ti_flags = 0; + } + ti->ti_count = 0; + ti->ti_save = save; + ti->ti_magic = NILFS_TI_MAGIC; + current->journal_info = ti; + return 0; +} + +/** + * nilfs_transaction_begin - start indivisible file operations. + * @sb: super block + * @ti: nilfs_transaction_info + * @vacancy_check: flags for vacancy rate checks + * + * nilfs_transaction_begin() acquires a reader/writer semaphore, called + * the segment semaphore, to make a segment construction and write tasks + * exclusive. The function is used with nilfs_transaction_end() in pairs. + * The region enclosed by these two functions can be nested. To avoid a + * deadlock, the semaphore is only acquired or released in the outermost call. + * + * This function allocates a nilfs_transaction_info struct to keep context + * information on it. It is initialized and hooked onto the current task in + * the outermost call. If a pre-allocated struct is given to @ti, it is used + * instead; othewise a new struct is assigned from a slab. + * + * When @vacancy_check flag is set, this function will check the amount of + * free space, and will wait for the GC to reclaim disk space if low capacity. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + * + * %-ERESTARTSYS - Interrupted + * + * %-ENOSPC - No space left on device + */ +int nilfs_transaction_begin(struct super_block *sb, + struct nilfs_transaction_info *ti, + int vacancy_check) +{ + struct nilfs_sb_info *sbi; + struct the_nilfs *nilfs; + int ret = nilfs_prepare_segment_lock(ti); + + if (unlikely(ret < 0)) + return ret; + if (ret > 0) + return 0; + + sbi = NILFS_SB(sb); + nilfs = sbi->s_nilfs; + down_read(&nilfs->ns_segctor_sem); + if (vacancy_check && nilfs_near_disk_full(nilfs)) { + up_read(&nilfs->ns_segctor_sem); + ret = -ENOSPC; + goto failed; + } + return 0; + + failed: + ti = current->journal_info; + current->journal_info = ti->ti_save; + if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) + kmem_cache_free(nilfs_transaction_cachep, ti); + return ret; +} + +/** + * nilfs_transaction_end - end indivisible file operations. + * @sb: super block + * @commit: commit flag (0 for no change) + * + * nilfs_transaction_end() releases the read semaphore which is + * acquired by nilfs_transaction_begin(). Its releasing is only done + * in outermost call of this function. If the nilfs_transaction_info + * was allocated dynamically, it is given back to a slab cache. + */ +int nilfs_transaction_end(struct super_block *sb, int commit) +{ + struct nilfs_transaction_info *ti = current->journal_info; + struct nilfs_sb_info *sbi; + struct nilfs_sc_info *sci; + int err = 0; + + BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); + + if (commit) + ti->ti_flags |= NILFS_TI_COMMIT; + if (ti->ti_count > 0) { + ti->ti_count--; + return 0; + } + sbi = NILFS_SB(sb); + sci = NILFS_SC(sbi); + if (sci != NULL) { + if (ti->ti_flags & NILFS_TI_COMMIT) + nilfs_segctor_start_timer(sci); + if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > + sci->sc_watermark) + nilfs_segctor_do_flush(sci, 0); + } + up_read(&sbi->s_nilfs->ns_segctor_sem); + current->journal_info = ti->ti_save; + + if (ti->ti_flags & NILFS_TI_SYNC) + err = nilfs_construct_segment(sb); + if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) + kmem_cache_free(nilfs_transaction_cachep, ti); + return err; +} + +void nilfs_relax_pressure_in_lock(struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sbi->s_nilfs; + + if (!sci || !sci->sc_flush_request) + return; + + set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); + up_read(&nilfs->ns_segctor_sem); + + down_write(&nilfs->ns_segctor_sem); + if (sci->sc_flush_request && + test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) { + struct nilfs_transaction_info *ti = current->journal_info; + + ti->ti_flags |= NILFS_TI_WRITER; + nilfs_segctor_do_immediate_flush(sci); + ti->ti_flags &= ~NILFS_TI_WRITER; + } + downgrade_write(&nilfs->ns_segctor_sem); +} + +static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, + struct nilfs_transaction_info *ti, + int gcflag) +{ + struct nilfs_transaction_info *cur_ti = current->journal_info; + + BUG_ON(cur_ti); + BUG_ON(!ti); + ti->ti_flags = NILFS_TI_WRITER; + ti->ti_count = 0; + ti->ti_save = cur_ti; + ti->ti_magic = NILFS_TI_MAGIC; + INIT_LIST_HEAD(&ti->ti_garbage); + current->journal_info = ti; + + for (;;) { + down_write(&sbi->s_nilfs->ns_segctor_sem); + if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) + break; + + nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); + + up_write(&sbi->s_nilfs->ns_segctor_sem); + yield(); + } + if (gcflag) + ti->ti_flags |= NILFS_TI_GC; +} + +static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) +{ + struct nilfs_transaction_info *ti = current->journal_info; + + BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); + BUG_ON(ti->ti_count > 0); + + up_write(&sbi->s_nilfs->ns_segctor_sem); + current->journal_info = ti->ti_save; + if (!list_empty(&ti->ti_garbage)) + nilfs_dispose_list(sbi, &ti->ti_garbage, 0); +} + +static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + unsigned bytes) +{ + struct nilfs_segment_buffer *segbuf = sci->sc_curseg; + unsigned blocksize = sci->sc_super->s_blocksize; + void *p; + + if (unlikely(ssp->offset + bytes > blocksize)) { + ssp->offset = 0; + BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh, + &segbuf->sb_segsum_buffers)); + ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh); + } + p = ssp->bh->b_data + ssp->offset; + ssp->offset += bytes; + return p; +} + +/** + * nilfs_segctor_reset_segment_buffer - reset the current segment buffer + * @sci: nilfs_sc_info + */ +static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf = sci->sc_curseg; + struct buffer_head *sumbh; + unsigned sumbytes; + unsigned flags = 0; + int err; + + if (nilfs_doing_gc()) + flags = NILFS_SS_GC; + err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime); + if (unlikely(err)) + return err; + + sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); + sumbytes = segbuf->sb_sum.sumbytes; + sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes; + sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes; + sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; + return 0; +} + +static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) +{ + sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; + if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) + return -E2BIG; /* The current segment is filled up + (internal code) */ + sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); + return nilfs_segctor_reset_segment_buffer(sci); +} + +static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf = sci->sc_curseg; + int err; + + if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) { + err = nilfs_segctor_feed_segment(sci); + if (err) + return err; + segbuf = sci->sc_curseg; + } + err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root); + if (likely(!err)) + segbuf->sb_sum.flags |= NILFS_SS_SR; + return err; +} + +/* + * Functions for making segment summary and payloads + */ +static int nilfs_segctor_segsum_block_required( + struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, + unsigned binfo_size) +{ + unsigned blocksize = sci->sc_super->s_blocksize; + /* Size of finfo and binfo is enough small against blocksize */ + + return ssp->offset + binfo_size + + (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) > + blocksize; +} + +static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, + struct inode *inode) +{ + sci->sc_curseg->sb_sum.nfinfo++; + sci->sc_binfo_ptr = sci->sc_finfo_ptr; + nilfs_segctor_map_segsum_entry( + sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); + /* skip finfo */ +} + +static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, + struct inode *inode) +{ + struct nilfs_finfo *finfo; + struct nilfs_inode_info *ii; + struct nilfs_segment_buffer *segbuf; + + if (sci->sc_blk_cnt == 0) + return; + + ii = NILFS_I(inode); + finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, + sizeof(*finfo)); + finfo->fi_ino = cpu_to_le64(inode->i_ino); + finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); + finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); + finfo->fi_cno = cpu_to_le64(ii->i_cno); + + segbuf = sci->sc_curseg; + segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + + sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1); + sci->sc_finfo_ptr = sci->sc_binfo_ptr; + sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; +} + +static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, + struct buffer_head *bh, + struct inode *inode, + unsigned binfo_size) +{ + struct nilfs_segment_buffer *segbuf; + int required, err = 0; + + retry: + segbuf = sci->sc_curseg; + required = nilfs_segctor_segsum_block_required( + sci, &sci->sc_binfo_ptr, binfo_size); + if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) { + nilfs_segctor_end_finfo(sci, inode); + err = nilfs_segctor_feed_segment(sci); + if (err) + return err; + goto retry; + } + if (unlikely(required)) { + err = nilfs_segbuf_extend_segsum(segbuf); + if (unlikely(err)) + goto failed; + } + if (sci->sc_blk_cnt == 0) + nilfs_segctor_begin_finfo(sci, inode); + + nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size); + /* Substitution to vblocknr is delayed until update_blocknr() */ + nilfs_segbuf_add_file_buffer(segbuf, bh); + sci->sc_blk_cnt++; + failed: + return err; +} + +static int nilfs_handle_bmap_error(int err, const char *fname, + struct inode *inode, struct super_block *sb) +{ + if (err == -EINVAL) { + nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", + inode->i_ino); + err = -EIO; + } + return err; +} + +/* + * Callback functions that enumerate, mark, and collect dirty blocks + */ +static int nilfs_collect_file_data(struct nilfs_sc_info *sci, + struct buffer_head *bh, struct inode *inode) +{ + int err; + + /* BUG_ON(!buffer_dirty(bh)); */ + /* excluded by scan_dirty_data_buffers() */ + err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); + if (unlikely(err < 0)) + return nilfs_handle_bmap_error(err, __func__, inode, + sci->sc_super); + + err = nilfs_segctor_add_file_block(sci, bh, inode, + sizeof(struct nilfs_binfo_v)); + if (!err) + sci->sc_datablk_cnt++; + return err; +} + +static int nilfs_collect_file_node(struct nilfs_sc_info *sci, + struct buffer_head *bh, + struct inode *inode) +{ + int err; + + /* BUG_ON(!buffer_dirty(bh)); */ + /* excluded by scan_dirty_node_buffers() */ + err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); + if (unlikely(err < 0)) + return nilfs_handle_bmap_error(err, __func__, inode, + sci->sc_super); + return 0; +} + +static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, + struct buffer_head *bh, + struct inode *inode) +{ + BUG_ON(!buffer_dirty(bh)); + return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); +} + +static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + union nilfs_binfo *binfo) +{ + struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry( + sci, ssp, sizeof(*binfo_v)); + *binfo_v = binfo->bi_v; +} + +static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + union nilfs_binfo *binfo) +{ + __le64 *vblocknr = nilfs_segctor_map_segsum_entry( + sci, ssp, sizeof(*vblocknr)); + *vblocknr = binfo->bi_v.bi_vblocknr; +} + +struct nilfs_sc_operations nilfs_sc_file_ops = { + .collect_data = nilfs_collect_file_data, + .collect_node = nilfs_collect_file_node, + .collect_bmap = nilfs_collect_file_bmap, + .write_data_binfo = nilfs_write_file_data_binfo, + .write_node_binfo = nilfs_write_file_node_binfo, +}; + +static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, + struct buffer_head *bh, struct inode *inode) +{ + int err; + + err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); + if (unlikely(err < 0)) + return nilfs_handle_bmap_error(err, __func__, inode, + sci->sc_super); + + err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); + if (!err) + sci->sc_datablk_cnt++; + return err; +} + +static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, + struct buffer_head *bh, struct inode *inode) +{ + BUG_ON(!buffer_dirty(bh)); + return nilfs_segctor_add_file_block(sci, bh, inode, + sizeof(struct nilfs_binfo_dat)); +} + +static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + union nilfs_binfo *binfo) +{ + __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp, + sizeof(*blkoff)); + *blkoff = binfo->bi_dat.bi_blkoff; +} + +static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + union nilfs_binfo *binfo) +{ + struct nilfs_binfo_dat *binfo_dat = + nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat)); + *binfo_dat = binfo->bi_dat; +} + +struct nilfs_sc_operations nilfs_sc_dat_ops = { + .collect_data = nilfs_collect_dat_data, + .collect_node = nilfs_collect_file_node, + .collect_bmap = nilfs_collect_dat_bmap, + .write_data_binfo = nilfs_write_dat_data_binfo, + .write_node_binfo = nilfs_write_dat_node_binfo, +}; + +struct nilfs_sc_operations nilfs_sc_dsync_ops = { + .collect_data = nilfs_collect_file_data, + .collect_node = NULL, + .collect_bmap = NULL, + .write_data_binfo = nilfs_write_file_data_binfo, + .write_node_binfo = NULL, +}; + +static int nilfs_lookup_dirty_data_buffers(struct inode *inode, + struct list_head *listp, + struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf = sci->sc_curseg; + struct address_space *mapping = inode->i_mapping; + struct pagevec pvec; + unsigned i, ndirties = 0, nlimit; + pgoff_t index = 0; + int err = 0; + + nlimit = sci->sc_segbuf_nblocks - + (sci->sc_nblk_this_inc + segbuf->sb_sum.nblocks); + pagevec_init(&pvec, 0); + repeat: + if (!pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, + PAGEVEC_SIZE)) + return 0; + + for (i = 0; i < pagevec_count(&pvec); i++) { + struct buffer_head *bh, *head; + struct page *page = pvec.pages[i]; + + if (mapping->host) { + lock_page(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, + 1 << inode->i_blkbits, 0); + unlock_page(page); + } + + bh = head = page_buffers(page); + do { + if (buffer_dirty(bh)) { + if (ndirties > nlimit) { + err = -E2BIG; + break; + } + get_bh(bh); + list_add_tail(&bh->b_assoc_buffers, listp); + ndirties++; + } + bh = bh->b_this_page; + } while (bh != head); + } + pagevec_release(&pvec); + cond_resched(); + + if (!err) + goto repeat; + return err; +} + +static void nilfs_lookup_dirty_node_buffers(struct inode *inode, + struct list_head *listp) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct address_space *mapping = &ii->i_btnode_cache; + struct pagevec pvec; + struct buffer_head *bh, *head; + unsigned int i; + pgoff_t index = 0; + + pagevec_init(&pvec, 0); + + while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, + PAGEVEC_SIZE)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + bh = head = page_buffers(pvec.pages[i]); + do { + if (buffer_dirty(bh)) { + get_bh(bh); + list_add_tail(&bh->b_assoc_buffers, + listp); + } + bh = bh->b_this_page; + } while (bh != head); + } + pagevec_release(&pvec); + cond_resched(); + } +} + +static void nilfs_dispose_list(struct nilfs_sb_info *sbi, + struct list_head *head, int force) +{ + struct nilfs_inode_info *ii, *n; + struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; + unsigned nv = 0; + + while (!list_empty(head)) { + spin_lock(&sbi->s_inode_lock); + list_for_each_entry_safe(ii, n, head, i_dirty) { + list_del_init(&ii->i_dirty); + if (force) { + if (unlikely(ii->i_bh)) { + brelse(ii->i_bh); + ii->i_bh = NULL; + } + } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { + set_bit(NILFS_I_QUEUED, &ii->i_state); + list_add_tail(&ii->i_dirty, + &sbi->s_dirty_files); + continue; + } + ivec[nv++] = ii; + if (nv == SC_N_INODEVEC) + break; + } + spin_unlock(&sbi->s_inode_lock); + + for (pii = ivec; nv > 0; pii++, nv--) + iput(&(*pii)->vfs_inode); + } +} + +static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + int ret = 0; + + if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) + ret++; + if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) + ret++; + if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) + ret++; + if (ret || nilfs_doing_gc()) + if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) + ret++; + return ret; +} + +static int nilfs_segctor_clean(struct nilfs_sc_info *sci) +{ + return list_empty(&sci->sc_dirty_files) && + !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && + list_empty(&sci->sc_cleaning_segments) && + (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); +} + +static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + int ret = 0; + + if (nilfs_test_metadata_dirty(sbi)) + set_bit(NILFS_SC_DIRTY, &sci->sc_flags); + + spin_lock(&sbi->s_inode_lock); + if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) + ret++; + + spin_unlock(&sbi->s_inode_lock); + return ret; +} + +static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + + nilfs_mdt_clear_dirty(sbi->s_ifile); + nilfs_mdt_clear_dirty(nilfs->ns_cpfile); + nilfs_mdt_clear_dirty(nilfs->ns_sufile); + nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); +} + +static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) +{ + struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; + struct buffer_head *bh_cp; + struct nilfs_checkpoint *raw_cp; + int err; + + /* XXX: this interface will be changed */ + err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, + &raw_cp, &bh_cp); + if (likely(!err)) { + /* The following code is duplicated with cpfile. But, it is + needed to collect the checkpoint even if it was not newly + created */ + nilfs_mdt_mark_buffer_dirty(bh_cp); + nilfs_mdt_mark_dirty(nilfs->ns_cpfile); + nilfs_cpfile_put_checkpoint( + nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); + } else { + BUG_ON(err == -EINVAL || err == -ENOENT); + } + return err; +} + +static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + struct buffer_head *bh_cp; + struct nilfs_checkpoint *raw_cp; + int err; + + err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, + &raw_cp, &bh_cp); + if (unlikely(err)) { + BUG_ON(err == -EINVAL || err == -ENOENT); + goto failed_ibh; + } + raw_cp->cp_snapshot_list.ssl_next = 0; + raw_cp->cp_snapshot_list.ssl_prev = 0; + raw_cp->cp_inodes_count = + cpu_to_le64(atomic_read(&sbi->s_inodes_count)); + raw_cp->cp_blocks_count = + cpu_to_le64(atomic_read(&sbi->s_blocks_count)); + raw_cp->cp_nblk_inc = + cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); + raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); + raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); + if (sci->sc_sketch_inode && i_size_read(sci->sc_sketch_inode) > 0) + nilfs_checkpoint_set_sketch(raw_cp); + nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); + nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); + return 0; + + failed_ibh: + return err; +} + +static void nilfs_fill_in_file_bmap(struct inode *ifile, + struct nilfs_inode_info *ii) + +{ + struct buffer_head *ibh; + struct nilfs_inode *raw_inode; + + if (test_bit(NILFS_I_BMAP, &ii->i_state)) { + ibh = ii->i_bh; + BUG_ON(!ibh); + raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, + ibh); + nilfs_bmap_write(ii->i_bmap, raw_inode); + nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); + } +} + +static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, + struct inode *ifile) +{ + struct nilfs_inode_info *ii; + + list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { + nilfs_fill_in_file_bmap(ifile, ii); + set_bit(NILFS_I_COLLECTED, &ii->i_state); + } + if (sci->sc_sketch_inode) { + ii = NILFS_I(sci->sc_sketch_inode); + if (test_bit(NILFS_I_DIRTY, &ii->i_state)) + nilfs_fill_in_file_bmap(ifile, ii); + } +} + +/* + * CRC calculation routines + */ +static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed) +{ + struct nilfs_super_root *raw_sr = + (struct nilfs_super_root *)bh_sr->b_data; + u32 crc; + + BUG_ON(NILFS_SR_BYTES > bh_sr->b_size); + crc = crc32_le(seed, + (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), + NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); + raw_sr->sr_sum = cpu_to_le32(crc); +} + +static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci, + u32 seed) +{ + struct nilfs_segment_buffer *segbuf; + + if (sci->sc_super_root) + nilfs_fill_in_super_root_crc(sci->sc_super_root, seed); + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + nilfs_segbuf_fill_in_segsum_crc(segbuf, seed); + nilfs_segbuf_fill_in_data_crc(segbuf, seed); + } +} + +static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct buffer_head *bh_sr = sci->sc_super_root; + struct nilfs_super_root *raw_sr = + (struct nilfs_super_root *)bh_sr->b_data; + unsigned isz = nilfs->ns_inode_size; + + raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); + raw_sr->sr_nongc_ctime + = cpu_to_le64(nilfs_doing_gc() ? + nilfs->ns_nongc_ctime : sci->sc_seg_ctime); + raw_sr->sr_flags = 0; + + nilfs_mdt_write_inode_direct( + nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); + nilfs_mdt_write_inode_direct( + nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); + nilfs_mdt_write_inode_direct( + nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); +} + +static void nilfs_redirty_inodes(struct list_head *head) +{ + struct nilfs_inode_info *ii; + + list_for_each_entry(ii, head, i_dirty) { + if (test_bit(NILFS_I_COLLECTED, &ii->i_state)) + clear_bit(NILFS_I_COLLECTED, &ii->i_state); + } +} + +static void nilfs_drop_collected_inodes(struct list_head *head) +{ + struct nilfs_inode_info *ii; + + list_for_each_entry(ii, head, i_dirty) { + if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) + continue; + + clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state); + set_bit(NILFS_I_UPDATED, &ii->i_state); + } +} + +static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, + struct inode *sufile) + +{ + struct list_head *head = &sci->sc_cleaning_segments; + struct nilfs_segment_entry *ent; + int err; + + list_for_each_entry(ent, head, list) { + if (!(ent->flags & NILFS_SLH_FREED)) + break; + err = nilfs_sufile_cancel_free(sufile, ent->segnum); + BUG_ON(err); + + ent->flags &= ~NILFS_SLH_FREED; + } +} + +static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci, + struct inode *sufile) +{ + struct list_head *head = &sci->sc_cleaning_segments; + struct nilfs_segment_entry *ent; + int err; + + list_for_each_entry(ent, head, list) { + err = nilfs_sufile_free(sufile, ent->segnum); + if (unlikely(err)) + return err; + ent->flags |= NILFS_SLH_FREED; + } + return 0; +} + +static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci) +{ + nilfs_dispose_segment_list(&sci->sc_cleaning_segments); +} + +static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, + struct inode *inode, + struct list_head *listp, + int (*collect)(struct nilfs_sc_info *, + struct buffer_head *, + struct inode *)) +{ + struct buffer_head *bh, *n; + int err = 0; + + if (collect) { + list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) { + list_del_init(&bh->b_assoc_buffers); + err = collect(sci, bh, inode); + brelse(bh); + if (unlikely(err)) + goto dispose_buffers; + } + return 0; + } + + dispose_buffers: + while (!list_empty(listp)) { + bh = list_entry(listp->next, struct buffer_head, + b_assoc_buffers); + list_del_init(&bh->b_assoc_buffers); + brelse(bh); + } + return err; +} + +static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, + struct inode *inode, + struct nilfs_sc_operations *sc_ops) +{ + LIST_HEAD(data_buffers); + LIST_HEAD(node_buffers); + int err, err2; + + if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { + err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, + sci); + if (err) { + err2 = nilfs_segctor_apply_buffers( + sci, inode, &data_buffers, + err == -E2BIG ? sc_ops->collect_data : NULL); + if (err == -E2BIG) + err = err2; + goto break_or_fail; + } + } + nilfs_lookup_dirty_node_buffers(inode, &node_buffers); + + if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { + err = nilfs_segctor_apply_buffers( + sci, inode, &data_buffers, sc_ops->collect_data); + if (unlikely(err)) { + /* dispose node list */ + nilfs_segctor_apply_buffers( + sci, inode, &node_buffers, NULL); + goto break_or_fail; + } + sci->sc_stage.flags |= NILFS_CF_NODE; + } + /* Collect node */ + err = nilfs_segctor_apply_buffers( + sci, inode, &node_buffers, sc_ops->collect_node); + if (unlikely(err)) + goto break_or_fail; + + nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers); + err = nilfs_segctor_apply_buffers( + sci, inode, &node_buffers, sc_ops->collect_bmap); + if (unlikely(err)) + goto break_or_fail; + + nilfs_segctor_end_finfo(sci, inode); + sci->sc_stage.flags &= ~NILFS_CF_NODE; + + break_or_fail: + return err; +} + +static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, + struct inode *inode) +{ + LIST_HEAD(data_buffers); + int err, err2; + + err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, sci); + err2 = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, + (!err || err == -E2BIG) ? + nilfs_collect_file_data : NULL); + if (err == -E2BIG) + err = err2; + if (!err) + nilfs_segctor_end_finfo(sci, inode); + return err; +} + +static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + struct list_head *head; + struct nilfs_inode_info *ii; + int err = 0; + + switch (sci->sc_stage.scnt) { + case NILFS_ST_INIT: + /* Pre-processes */ + sci->sc_stage.flags = 0; + + if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) { + sci->sc_nblk_inc = 0; + sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; + if (mode == SC_LSEG_DSYNC) { + sci->sc_stage.scnt = NILFS_ST_DSYNC; + goto dsync_mode; + } + } + + sci->sc_stage.dirty_file_ptr = NULL; + sci->sc_stage.gc_inode_ptr = NULL; + if (mode == SC_FLUSH_DAT) { + sci->sc_stage.scnt = NILFS_ST_DAT; + goto dat_stage; + } + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_GC: + if (nilfs_doing_gc()) { + head = &sci->sc_gc_inodes; + ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, + head, i_dirty); + list_for_each_entry_continue(ii, head, i_dirty) { + err = nilfs_segctor_scan_file( + sci, &ii->vfs_inode, + &nilfs_sc_file_ops); + if (unlikely(err)) { + sci->sc_stage.gc_inode_ptr = list_entry( + ii->i_dirty.prev, + struct nilfs_inode_info, + i_dirty); + goto break_or_fail; + } + set_bit(NILFS_I_COLLECTED, &ii->i_state); + } + sci->sc_stage.gc_inode_ptr = NULL; + } + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_FILE: + head = &sci->sc_dirty_files; + ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, + i_dirty); + list_for_each_entry_continue(ii, head, i_dirty) { + clear_bit(NILFS_I_DIRTY, &ii->i_state); + + err = nilfs_segctor_scan_file(sci, &ii->vfs_inode, + &nilfs_sc_file_ops); + if (unlikely(err)) { + sci->sc_stage.dirty_file_ptr = + list_entry(ii->i_dirty.prev, + struct nilfs_inode_info, + i_dirty); + goto break_or_fail; + } + /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */ + /* XXX: required ? */ + } + sci->sc_stage.dirty_file_ptr = NULL; + if (mode == SC_FLUSH_FILE) { + sci->sc_stage.scnt = NILFS_ST_DONE; + return 0; + } + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_SKETCH: + if (mode == SC_LSEG_SR && sci->sc_sketch_inode) { + ii = NILFS_I(sci->sc_sketch_inode); + if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { + sci->sc_sketch_inode->i_ctime.tv_sec + = sci->sc_seg_ctime; + sci->sc_sketch_inode->i_mtime.tv_sec + = sci->sc_seg_ctime; + err = nilfs_mark_inode_dirty( + sci->sc_sketch_inode); + if (unlikely(err)) + goto break_or_fail; + } + err = nilfs_segctor_scan_file(sci, + sci->sc_sketch_inode, + &nilfs_sc_file_ops); + if (unlikely(err)) + goto break_or_fail; + } + sci->sc_stage.scnt++; + sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; + /* Fall through */ + case NILFS_ST_IFILE: + err = nilfs_segctor_scan_file(sci, sbi->s_ifile, + &nilfs_sc_file_ops); + if (unlikely(err)) + break; + sci->sc_stage.scnt++; + /* Creating a checkpoint */ + err = nilfs_segctor_create_checkpoint(sci); + if (unlikely(err)) + break; + /* Fall through */ + case NILFS_ST_CPFILE: + err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, + &nilfs_sc_file_ops); + if (unlikely(err)) + break; + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_SUFILE: + err = nilfs_segctor_prepare_free_segments(sci, + nilfs->ns_sufile); + if (unlikely(err)) + break; + err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, + &nilfs_sc_file_ops); + if (unlikely(err)) + break; + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_DAT: + dat_stage: + err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), + &nilfs_sc_dat_ops); + if (unlikely(err)) + break; + if (mode == SC_FLUSH_DAT) { + sci->sc_stage.scnt = NILFS_ST_DONE; + return 0; + } + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_SR: + if (mode == SC_LSEG_SR) { + /* Appending a super root */ + err = nilfs_segctor_add_super_root(sci); + if (unlikely(err)) + break; + } + /* End of a logical segment */ + sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; + sci->sc_stage.scnt = NILFS_ST_DONE; + return 0; + case NILFS_ST_DSYNC: + dsync_mode: + sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; + ii = sci->sc_stage.dirty_file_ptr; + if (!test_bit(NILFS_I_BUSY, &ii->i_state)) + break; + + err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); + if (unlikely(err)) + break; + sci->sc_stage.dirty_file_ptr = NULL; + sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; + sci->sc_stage.scnt = NILFS_ST_DONE; + return 0; + case NILFS_ST_DONE: + return 0; + default: + BUG(); + } + + break_or_fail: + return err; +} + +static int nilfs_segctor_terminate_segment(struct nilfs_sc_info *sci, + struct nilfs_segment_buffer *segbuf, + struct inode *sufile) +{ + struct nilfs_segment_entry *ent = segbuf->sb_segent; + int err; + + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + return err; + nilfs_mdt_mark_buffer_dirty(ent->bh_su); + nilfs_mdt_mark_dirty(sufile); + nilfs_close_segment_entry(ent, sufile); + + list_add_tail(&ent->list, &sci->sc_active_segments); + segbuf->sb_segent = NULL; + return 0; +} + +static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) +{ + struct buffer_head *bh_su; + struct nilfs_segment_usage *raw_su; + int err; + + err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su); + if (unlikely(err)) + return err; + nilfs_mdt_mark_buffer_dirty(bh_su); + nilfs_mdt_mark_dirty(sufile); + nilfs_sufile_put_segment_usage(sufile, segnum, bh_su); + return 0; +} + +static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf, *n; + struct inode *sufile = nilfs->ns_sufile; + __u64 nextnum; + int err; + + if (list_empty(&sci->sc_segbufs)) { + segbuf = nilfs_segbuf_new(sci->sc_super); + if (unlikely(!segbuf)) + return -ENOMEM; + list_add(&segbuf->sb_list, &sci->sc_segbufs); + } else + segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + + err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, + nilfs->ns_pseg_offset, nilfs); + if (unlikely(err)) + return err; + + if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { + err = nilfs_segctor_terminate_segment(sci, segbuf, sufile); + if (unlikely(err)) + return err; + + nilfs_shift_to_next_segment(nilfs); + err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + } + sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; + + err = nilfs_touch_segusage(sufile, segbuf->sb_segnum); + if (unlikely(err)) + return err; + + if (nilfs->ns_segnum == nilfs->ns_nextnum) { + /* Start from the head of a new full segment */ + err = nilfs_sufile_alloc(sufile, &nextnum); + if (unlikely(err)) + return err; + } else + nextnum = nilfs->ns_nextnum; + + segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; + nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); + + /* truncating segment buffers */ + list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, + sb_list) { + list_del_init(&segbuf->sb_list); + nilfs_segbuf_free(segbuf); + } + return err; +} + +static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int nadd) +{ + struct nilfs_segment_buffer *segbuf, *prev, *n; + struct inode *sufile = nilfs->ns_sufile; + __u64 nextnextnum; + LIST_HEAD(list); + int err, ret, i; + + prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + /* + * Since the segment specified with nextnum might be allocated during + * the previous construction, the buffer including its segusage may + * not be dirty. The following call ensures that the buffer is dirty + * and will pin the buffer on memory until the sufile is written. + */ + err = nilfs_touch_segusage(sufile, prev->sb_nextnum); + if (unlikely(err)) + return err; + + for (i = 0; i < nadd; i++) { + /* extend segment info */ + err = -ENOMEM; + segbuf = nilfs_segbuf_new(sci->sc_super); + if (unlikely(!segbuf)) + goto failed; + + /* map this buffer to region of segment on-disk */ + err = nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); + if (unlikely(err)) + goto failed_segbuf; + + sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; + + /* allocate the next next full segment */ + err = nilfs_sufile_alloc(sufile, &nextnextnum); + if (unlikely(err)) + goto failed_segbuf; + + segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1; + nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs); + + list_add_tail(&segbuf->sb_list, &list); + prev = segbuf; + } + list_splice(&list, sci->sc_segbufs.prev); + return 0; + + failed_segbuf: + nilfs_segbuf_free(segbuf); + failed: + list_for_each_entry_safe(segbuf, n, &list, sb_list) { + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); + BUG_ON(ret); + list_del_init(&segbuf->sb_list); + nilfs_segbuf_free(segbuf); + } + return err; +} + +static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf; + int ret, done = 0; + + segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + if (nilfs->ns_nextnum != segbuf->sb_nextnum) { + ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); + BUG_ON(ret); + } + if (segbuf->sb_io_error) { + /* Case 1: The first segment failed */ + if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) + /* Case 1a: Partial segment appended into an existing + segment */ + nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, + segbuf->sb_fseg_end); + else /* Case 1b: New full segment */ + set_nilfs_discontinued(nilfs); + done++; + } + + list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { + ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); + BUG_ON(ret); + if (!done && segbuf->sb_io_error) { + if (segbuf->sb_segnum != nilfs->ns_nextnum) + /* Case 2: extended segment (!= next) failed */ + nilfs_sufile_set_error(nilfs->ns_sufile, + segbuf->sb_segnum); + done++; + } + } +} + +static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) + nilfs_segbuf_clear(segbuf); + sci->sc_super_root = NULL; +} + +static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf; + + while (!list_empty(&sci->sc_segbufs)) { + segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + list_del_init(&segbuf->sb_list); + nilfs_segbuf_free(segbuf); + } + /* sci->sc_curseg = NULL; */ +} + +static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int err) +{ + if (unlikely(err)) { + nilfs_segctor_free_incomplete_segments(sci, nilfs); + nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); + } + nilfs_segctor_clear_segment_buffers(sci); +} + +static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, + struct inode *sufile) +{ + struct nilfs_segment_buffer *segbuf; + struct buffer_head *bh_su; + struct nilfs_segment_usage *raw_su; + unsigned long live_blocks; + int ret; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, + &raw_su, &bh_su); + BUG_ON(ret); /* always succeed because bh_su is dirty */ + live_blocks = segbuf->sb_sum.nblocks + + (segbuf->sb_pseg_start - segbuf->sb_fseg_start); + raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); + raw_su->su_nblocks = cpu_to_le32(live_blocks); + nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, + bh_su); + } +} + +static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, + struct inode *sufile) +{ + struct nilfs_segment_buffer *segbuf; + struct buffer_head *bh_su; + struct nilfs_segment_usage *raw_su; + int ret; + + segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, + &raw_su, &bh_su); + BUG_ON(ret); /* always succeed because bh_su is dirty */ + raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - + segbuf->sb_fseg_start); + nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); + + list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { + ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, + &raw_su, &bh_su); + BUG_ON(ret); /* always succeed */ + raw_su->su_nblocks = 0; + nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, + bh_su); + } +} + +static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, + struct nilfs_segment_buffer *last, + struct inode *sufile) +{ + struct nilfs_segment_buffer *segbuf = last, *n; + int ret; + + list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, + sb_list) { + list_del_init(&segbuf->sb_list); + sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); + BUG_ON(ret); + nilfs_segbuf_free(segbuf); + } +} + + +static int nilfs_segctor_collect(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int mode) +{ + struct nilfs_cstage prev_stage = sci->sc_stage; + int err, nadd = 1; + + /* Collection retry loop */ + for (;;) { + sci->sc_super_root = NULL; + sci->sc_nblk_this_inc = 0; + sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + + err = nilfs_segctor_reset_segment_buffer(sci); + if (unlikely(err)) + goto failed; + + err = nilfs_segctor_collect_blocks(sci, mode); + sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; + if (!err) + break; + + if (unlikely(err != -E2BIG)) + goto failed; + + /* The current segment is filled up */ + if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) + break; + + nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); + nilfs_segctor_clear_segment_buffers(sci); + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); + sci->sc_stage = prev_stage; + } + nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); + return 0; + + failed: + return err; +} + +static void nilfs_list_replace_buffer(struct buffer_head *old_bh, + struct buffer_head *new_bh) +{ + BUG_ON(!list_empty(&new_bh->b_assoc_buffers)); + + list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers); + /* The caller must release old_bh */ +} + +static int +nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, + struct nilfs_segment_buffer *segbuf, + int mode) +{ + struct inode *inode = NULL; + sector_t blocknr; + unsigned long nfinfo = segbuf->sb_sum.nfinfo; + unsigned long nblocks = 0, ndatablk = 0; + struct nilfs_sc_operations *sc_op = NULL; + struct nilfs_segsum_pointer ssp; + struct nilfs_finfo *finfo = NULL; + union nilfs_binfo binfo; + struct buffer_head *bh, *bh_org; + ino_t ino = 0; + int err = 0; + + if (!nfinfo) + goto out; + + blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk; + ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); + ssp.offset = sizeof(struct nilfs_segment_summary); + + list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + if (bh == sci->sc_super_root) + break; + if (!finfo) { + finfo = nilfs_segctor_map_segsum_entry( + sci, &ssp, sizeof(*finfo)); + ino = le64_to_cpu(finfo->fi_ino); + nblocks = le32_to_cpu(finfo->fi_nblocks); + ndatablk = le32_to_cpu(finfo->fi_ndatablk); + + if (buffer_nilfs_node(bh)) + inode = NILFS_BTNC_I(bh->b_page->mapping); + else + inode = NILFS_AS_I(bh->b_page->mapping); + + if (mode == SC_LSEG_DSYNC) + sc_op = &nilfs_sc_dsync_ops; + else if (ino == NILFS_DAT_INO) + sc_op = &nilfs_sc_dat_ops; + else /* file blocks */ + sc_op = &nilfs_sc_file_ops; + } + bh_org = bh; + get_bh(bh_org); + err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr, + &binfo); + if (bh != bh_org) + nilfs_list_replace_buffer(bh_org, bh); + brelse(bh_org); + if (unlikely(err)) + goto failed_bmap; + + if (ndatablk > 0) + sc_op->write_data_binfo(sci, &ssp, &binfo); + else + sc_op->write_node_binfo(sci, &ssp, &binfo); + + blocknr++; + if (--nblocks == 0) { + finfo = NULL; + if (--nfinfo == 0) + break; + } else if (ndatablk > 0) + ndatablk--; + } + out: + return 0; + + failed_bmap: + err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); + return err; +} + +static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) +{ + struct nilfs_segment_buffer *segbuf; + int err; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode); + if (unlikely(err)) + return err; + nilfs_segbuf_fill_in_segsum(segbuf); + } + return 0; +} + +static int +nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) +{ + struct page *clone_page; + struct buffer_head *bh, *head, *bh2; + void *kaddr; + + bh = head = page_buffers(page); + + clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); + if (unlikely(!clone_page)) + return -ENOMEM; + + bh2 = page_buffers(clone_page); + kaddr = kmap_atomic(page, KM_USER0); + do { + if (list_empty(&bh->b_assoc_buffers)) + continue; + get_bh(bh2); + page_cache_get(clone_page); /* for each bh */ + memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); + bh2->b_blocknr = bh->b_blocknr; + list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); + list_add_tail(&bh->b_assoc_buffers, out); + } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); + kunmap_atomic(kaddr, KM_USER0); + + if (!TestSetPageWriteback(clone_page)) + inc_zone_page_state(clone_page, NR_WRITEBACK); + unlock_page(clone_page); + + return 0; +} + +static int nilfs_test_page_to_be_frozen(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) + return 0; + + if (page_mapped(page)) { + ClearPageChecked(page); + return 1; + } + return PageChecked(page); +} + +static int nilfs_begin_page_io(struct page *page, struct list_head *out) +{ + if (!page || PageWriteback(page)) + /* For split b-tree node pages, this function may be called + twice. We ignore the 2nd or later calls by this check. */ + return 0; + + lock_page(page); + clear_page_dirty_for_io(page); + set_page_writeback(page); + unlock_page(page); + + if (nilfs_test_page_to_be_frozen(page)) { + int err = nilfs_copy_replace_page_buffers(page, out); + if (unlikely(err)) + return err; + } + return 0; +} + +static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, + struct page **failed_page) +{ + struct nilfs_segment_buffer *segbuf; + struct page *bd_page = NULL, *fs_page = NULL; + struct list_head *list = &sci->sc_copied_buffers; + int err; + + *failed_page = NULL; + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + struct buffer_head *bh; + + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + if (bh->b_page != bd_page) { + if (bd_page) { + lock_page(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); + } + bd_page = bh->b_page; + } + } + + list_for_each_entry(bh, &segbuf->sb_payload_buffers, + b_assoc_buffers) { + if (bh == sci->sc_super_root) { + if (bh->b_page != bd_page) { + lock_page(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); + bd_page = bh->b_page; + } + break; + } + if (bh->b_page != fs_page) { + err = nilfs_begin_page_io(fs_page, list); + if (unlikely(err)) { + *failed_page = fs_page; + goto out; + } + fs_page = bh->b_page; + } + } + } + if (bd_page) { + lock_page(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); + } + err = nilfs_begin_page_io(fs_page, list); + if (unlikely(err)) + *failed_page = fs_page; + out: + return err; +} + +static int nilfs_segctor_write(struct nilfs_sc_info *sci, + struct backing_dev_info *bdi) +{ + struct nilfs_segment_buffer *segbuf; + struct nilfs_write_info wi; + int err, res; + + wi.sb = sci->sc_super; + wi.bh_sr = sci->sc_super_root; + wi.bdi = bdi; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + nilfs_segbuf_prepare_write(segbuf, &wi); + err = nilfs_segbuf_write(segbuf, &wi); + + res = nilfs_segbuf_wait(segbuf, &wi); + err = unlikely(err) ? : res; + if (unlikely(err)) + return err; + } + return 0; +} + +static int nilfs_page_has_uncleared_buffer(struct page *page) +{ + struct buffer_head *head, *bh; + + head = bh = page_buffers(page); + do { + if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers)) + return 1; + bh = bh->b_this_page; + } while (bh != head); + return 0; +} + +static void __nilfs_end_page_io(struct page *page, int err) +{ + /* BUG_ON(err > 0); */ + if (!err) { + if (!nilfs_page_buffers_clean(page)) + __set_page_dirty_nobuffers(page); + ClearPageError(page); + } else { + __set_page_dirty_nobuffers(page); + SetPageError(page); + } + + if (buffer_nilfs_allocated(page_buffers(page))) { + if (TestClearPageWriteback(page)) + dec_zone_page_state(page, NR_WRITEBACK); + } else + end_page_writeback(page); +} + +static void nilfs_end_page_io(struct page *page, int err) +{ + if (!page) + return; + + if (buffer_nilfs_node(page_buffers(page)) && + nilfs_page_has_uncleared_buffer(page)) + /* For b-tree node pages, this function may be called twice + or more because they might be split in a segment. + This check assures that cleanup has been done for all + buffers in a split btnode page. */ + return; + + __nilfs_end_page_io(page, err); +} + +static void nilfs_clear_copied_buffers(struct list_head *list, int err) +{ + struct buffer_head *bh, *head; + struct page *page; + + while (!list_empty(list)) { + bh = list_entry(list->next, struct buffer_head, + b_assoc_buffers); + page = bh->b_page; + page_cache_get(page); + head = bh = page_buffers(page); + do { + if (!list_empty(&bh->b_assoc_buffers)) { + list_del_init(&bh->b_assoc_buffers); + if (!err) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + } + brelse(bh); /* for b_assoc_buffers */ + } + } while ((bh = bh->b_this_page) != head); + + __nilfs_end_page_io(page, err); + page_cache_release(page); + } +} + +static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, + struct page *failed_page, int err) +{ + struct nilfs_segment_buffer *segbuf; + struct page *bd_page = NULL, *fs_page = NULL; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + struct buffer_head *bh; + + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + if (bh->b_page != bd_page) { + if (bd_page) + end_page_writeback(bd_page); + bd_page = bh->b_page; + } + } + + list_for_each_entry(bh, &segbuf->sb_payload_buffers, + b_assoc_buffers) { + if (bh == sci->sc_super_root) { + if (bh->b_page != bd_page) { + end_page_writeback(bd_page); + bd_page = bh->b_page; + } + break; + } + if (bh->b_page != fs_page) { + nilfs_end_page_io(fs_page, err); + if (unlikely(fs_page == failed_page)) + goto done; + fs_page = bh->b_page; + } + } + } + if (bd_page) + end_page_writeback(bd_page); + + nilfs_end_page_io(fs_page, err); + done: + nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); +} + +static void nilfs_set_next_segment(struct the_nilfs *nilfs, + struct nilfs_segment_buffer *segbuf) +{ + nilfs->ns_segnum = segbuf->sb_segnum; + nilfs->ns_nextnum = segbuf->sb_nextnum; + nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start + + segbuf->sb_sum.nblocks; + nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq; + nilfs->ns_ctime = segbuf->sb_sum.ctime; +} + +static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf; + struct page *bd_page = NULL, *fs_page = NULL; + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + int update_sr = (sci->sc_super_root != NULL); + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + struct buffer_head *bh; + + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + if (bh->b_page != bd_page) { + if (bd_page) + end_page_writeback(bd_page); + bd_page = bh->b_page; + } + } + /* + * We assume that the buffers which belong to the same page + * continue over the buffer list. + * Under this assumption, the last BHs of pages is + * identifiable by the discontinuity of bh->b_page + * (page != fs_page). + * + * For B-tree node blocks, however, this assumption is not + * guaranteed. The cleanup code of B-tree node pages needs + * special care. + */ + list_for_each_entry(bh, &segbuf->sb_payload_buffers, + b_assoc_buffers) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + if (bh == sci->sc_super_root) { + if (bh->b_page != bd_page) { + end_page_writeback(bd_page); + bd_page = bh->b_page; + } + break; + } + if (bh->b_page != fs_page) { + nilfs_end_page_io(fs_page, 0); + fs_page = bh->b_page; + } + } + + if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) { + if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) { + set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); + sci->sc_lseg_stime = jiffies; + } + if (NILFS_SEG_LOGEND(&segbuf->sb_sum)) + clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); + } + } + /* + * Since pages may continue over multiple segment buffers, + * end of the last page must be checked outside of the loop. + */ + if (bd_page) + end_page_writeback(bd_page); + + nilfs_end_page_io(fs_page, 0); + + nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); + + nilfs_drop_collected_inodes(&sci->sc_dirty_files); + + if (nilfs_doing_gc()) { + nilfs_drop_collected_inodes(&sci->sc_gc_inodes); + if (update_sr) + nilfs_commit_gcdat_inode(nilfs); + } else { + nilfs->ns_nongc_ctime = sci->sc_seg_ctime; + set_nilfs_cond_nongc_write(nilfs); + wake_up(&nilfs->ns_cleanerd_wq); + } + + sci->sc_nblk_inc += sci->sc_nblk_this_inc; + + segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + nilfs_set_next_segment(nilfs, segbuf); + + if (update_sr) { + nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, + segbuf->sb_sum.seg_seq, nilfs->ns_cno); + + clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); + set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); + } else + clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); +} + +static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, + struct nilfs_sb_info *sbi) +{ + struct nilfs_inode_info *ii, *n; + __u64 cno = sbi->s_nilfs->ns_cno; + + spin_lock(&sbi->s_inode_lock); + retry: + list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { + if (!ii->i_bh) { + struct buffer_head *ibh; + int err; + + spin_unlock(&sbi->s_inode_lock); + err = nilfs_ifile_get_inode_block( + sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); + if (unlikely(err)) { + nilfs_warning(sbi->s_super, __func__, + "failed to get inode block.\n"); + return err; + } + nilfs_mdt_mark_buffer_dirty(ibh); + nilfs_mdt_mark_dirty(sbi->s_ifile); + spin_lock(&sbi->s_inode_lock); + if (likely(!ii->i_bh)) + ii->i_bh = ibh; + else + brelse(ibh); + goto retry; + } + ii->i_cno = cno; + + clear_bit(NILFS_I_QUEUED, &ii->i_state); + set_bit(NILFS_I_BUSY, &ii->i_state); + list_del(&ii->i_dirty); + list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); + } + spin_unlock(&sbi->s_inode_lock); + + NILFS_I(sbi->s_ifile)->i_cno = cno; + + return 0; +} + +static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, + struct nilfs_sb_info *sbi) +{ + struct nilfs_transaction_info *ti = current->journal_info; + struct nilfs_inode_info *ii, *n; + __u64 cno = sbi->s_nilfs->ns_cno; + + spin_lock(&sbi->s_inode_lock); + list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { + if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || + test_bit(NILFS_I_DIRTY, &ii->i_state)) { + /* The current checkpoint number (=nilfs->ns_cno) is + changed between check-in and check-out only if the + super root is written out. So, we can update i_cno + for the inodes that remain in the dirty list. */ + ii->i_cno = cno; + continue; + } + clear_bit(NILFS_I_BUSY, &ii->i_state); + brelse(ii->i_bh); + ii->i_bh = NULL; + list_del(&ii->i_dirty); + list_add_tail(&ii->i_dirty, &ti->ti_garbage); + } + spin_unlock(&sbi->s_inode_lock); +} + +/* + * Nasty routines to manipulate active flags on sufile. + * These would be removed in a future release. + */ +static void nilfs_segctor_reactivate_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf, *last; + struct nilfs_segment_entry *ent, *n; + struct inode *sufile = nilfs->ns_sufile; + struct list_head *head; + + last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { + ent = segbuf->sb_segent; + if (!ent) + break; /* ignore unmapped segments (should check it?)*/ + nilfs_segment_usage_set_active(ent->raw_su); + nilfs_close_segment_entry(ent, sufile); + } + + head = &sci->sc_active_segments; + list_for_each_entry_safe(ent, n, head, list) { + nilfs_segment_usage_set_active(ent->raw_su); + nilfs_close_segment_entry(ent, sufile); + } + + down_write(&nilfs->ns_sem); + head = &nilfs->ns_used_segments; + list_for_each_entry(ent, head, list) { + nilfs_segment_usage_set_volatile_active(ent->raw_su); + } + up_write(&nilfs->ns_sem); +} + +static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf, *last; + struct nilfs_segment_entry *ent; + struct inode *sufile = nilfs->ns_sufile; + struct list_head *head; + int err; + + last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { + /* + * Deactivate ongoing full segments. The last segment is kept + * active because it is a start point of recovery, and is not + * relocatable until the super block points to a newer + * checkpoint. + */ + ent = segbuf->sb_segent; + if (!ent) + break; /* ignore unmapped segments (should check it?)*/ + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + goto failed; + nilfs_segment_usage_clear_active(ent->raw_su); + BUG_ON(!buffer_dirty(ent->bh_su)); + } + + head = &sci->sc_active_segments; + list_for_each_entry(ent, head, list) { + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + goto failed; + nilfs_segment_usage_clear_active(ent->raw_su); + BUG_ON(!buffer_dirty(ent->bh_su)); + } + + down_write(&nilfs->ns_sem); + head = &nilfs->ns_used_segments; + list_for_each_entry(ent, head, list) { + /* clear volatile active for segments of older generations */ + nilfs_segment_usage_clear_volatile_active(ent->raw_su); + } + up_write(&nilfs->ns_sem); + return 0; + + failed: + nilfs_segctor_reactivate_segments(sci, nilfs); + return err; +} + +static void nilfs_segctor_bead_completed_segments(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf, *last; + struct nilfs_segment_entry *ent; + + /* move each segbuf->sb_segent to the list of used active segments */ + last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { + ent = segbuf->sb_segent; + if (!ent) + break; /* ignore unmapped segments (should check it?)*/ + list_add_tail(&ent->list, &sci->sc_active_segments); + segbuf->sb_segent = NULL; + } +} + +static void +__nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) + +{ + struct nilfs_segment_entry *ent; + + list_splice_init(&sci->sc_active_segments, + nilfs->ns_used_segments.prev); + + list_for_each_entry(ent, &nilfs->ns_used_segments, list) { + nilfs_segment_usage_set_volatile_active(ent->raw_su); + /* These segments are kept open */ + } +} + +/* + * Main procedure of segment constructor + */ +static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + struct page *failed_page; + int err, has_sr = 0; + + sci->sc_stage.scnt = NILFS_ST_INIT; + + err = nilfs_segctor_check_in_files(sci, sbi); + if (unlikely(err)) + goto out; + + if (nilfs_test_metadata_dirty(sbi)) + set_bit(NILFS_SC_DIRTY, &sci->sc_flags); + + if (nilfs_segctor_clean(sci)) + goto out; + + do { + sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK; + + err = nilfs_segctor_begin_construction(sci, nilfs); + if (unlikely(err)) + goto out; + + /* Update time stamp */ + sci->sc_seg_ctime = get_seconds(); + + err = nilfs_segctor_collect(sci, nilfs, mode); + if (unlikely(err)) + goto failed; + + has_sr = (sci->sc_super_root != NULL); + + /* Avoid empty segment */ + if (sci->sc_stage.scnt == NILFS_ST_DONE && + NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { + BUG_ON(mode == SC_LSEG_SR); + nilfs_segctor_end_construction(sci, nilfs, 1); + goto out; + } + + err = nilfs_segctor_assign(sci, mode); + if (unlikely(err)) + goto failed; + + if (has_sr) { + err = nilfs_segctor_deactivate_segments(sci, nilfs); + if (unlikely(err)) + goto failed; + } + if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) + nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); + + if (has_sr) { + err = nilfs_segctor_fill_in_checkpoint(sci); + if (unlikely(err)) + goto failed_to_make_up; + + nilfs_segctor_fill_in_super_root(sci, nilfs); + } + nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); + + /* Write partial segments */ + err = nilfs_segctor_prepare_write(sci, &failed_page); + if (unlikely(err)) + goto failed_to_write; + + nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); + + err = nilfs_segctor_write(sci, nilfs->ns_bdi); + if (unlikely(err)) + goto failed_to_write; + + nilfs_segctor_complete_write(sci); + + /* Commit segments */ + nilfs_segctor_bead_completed_segments(sci); + if (has_sr) { + down_write(&nilfs->ns_sem); + nilfs_update_last_segment(sbi, 1); + __nilfs_segctor_commit_deactivate_segments(sci, nilfs); + up_write(&nilfs->ns_sem); + nilfs_segctor_commit_free_segments(sci); + nilfs_segctor_clear_metadata_dirty(sci); + } + + nilfs_segctor_end_construction(sci, nilfs, 0); + + } while (sci->sc_stage.scnt != NILFS_ST_DONE); + + /* Clearing sketch data */ + if (has_sr && sci->sc_sketch_inode) { + if (i_size_read(sci->sc_sketch_inode) == 0) + clear_bit(NILFS_I_DIRTY, + &NILFS_I(sci->sc_sketch_inode)->i_state); + i_size_write(sci->sc_sketch_inode, 0); + } + out: + nilfs_segctor_destroy_segment_buffers(sci); + nilfs_segctor_check_out_files(sci, sbi); + return err; + + failed_to_write: + nilfs_segctor_abort_write(sci, failed_page, err); + nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile); + + failed_to_make_up: + if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) + nilfs_redirty_inodes(&sci->sc_dirty_files); + if (has_sr) + nilfs_segctor_reactivate_segments(sci, nilfs); + + failed: + if (nilfs_doing_gc()) + nilfs_redirty_inodes(&sci->sc_gc_inodes); + nilfs_segctor_end_construction(sci, nilfs, err); + goto out; +} + +/** + * nilfs_secgtor_start_timer - set timer of background write + * @sci: nilfs_sc_info + * + * If the timer has already been set, it ignores the new request. + * This function MUST be called within a section locking the segment + * semaphore. + */ +static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) +{ + spin_lock(&sci->sc_state_lock); + if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { + sci->sc_timer->expires = jiffies + sci->sc_interval; + add_timer(sci->sc_timer); + sci->sc_state |= NILFS_SEGCTOR_COMMIT; + } + spin_unlock(&sci->sc_state_lock); +} + +static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) +{ + spin_lock(&sci->sc_state_lock); + if (!(sci->sc_flush_request & (1 << bn))) { + unsigned long prev_req = sci->sc_flush_request; + + sci->sc_flush_request |= (1 << bn); + if (!prev_req) + wake_up(&sci->sc_wait_daemon); + } + spin_unlock(&sci->sc_state_lock); +} + +/** + * nilfs_flush_segment - trigger a segment construction for resource control + * @sb: super block + * @ino: inode number of the file to be flushed out. + */ +void nilfs_flush_segment(struct super_block *sb, ino_t ino) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + + if (!sci || nilfs_doing_construction()) + return; + nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); + /* assign bit 0 to data files */ +} + +int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, + __u64 *segnum, size_t nsegs) +{ + struct nilfs_segment_entry *ent; + struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; + struct inode *sufile = nilfs->ns_sufile; + LIST_HEAD(list); + __u64 *pnum; + const char *flag_name; + size_t i; + int err, err2 = 0; + + for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { + ent = nilfs_alloc_segment_entry(*pnum); + if (unlikely(!ent)) { + err = -ENOMEM; + goto failed; + } + list_add_tail(&ent->list, &list); + + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + goto failed; + + if (unlikely(le32_to_cpu(ent->raw_su->su_flags) != + (1UL << NILFS_SEGMENT_USAGE_DIRTY))) { + if (nilfs_segment_usage_clean(ent->raw_su)) + flag_name = "clean"; + else if (nilfs_segment_usage_active(ent->raw_su)) + flag_name = "active"; + else if (nilfs_segment_usage_volatile_active( + ent->raw_su)) + flag_name = "volatile active"; + else if (!nilfs_segment_usage_dirty(ent->raw_su)) + flag_name = "non-dirty"; + else + flag_name = "erroneous"; + + printk(KERN_ERR + "NILFS: %s segment is requested to be cleaned " + "(segnum=%llu)\n", + flag_name, (unsigned long long)ent->segnum); + err2 = -EINVAL; + } + nilfs_close_segment_entry(ent, sufile); + } + if (unlikely(err2)) { + err = err2; + goto failed; + } + list_splice(&list, sci->sc_cleaning_segments.prev); + return 0; + + failed: + nilfs_dispose_segment_list(&list); + return err; +} + +void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci) +{ + nilfs_dispose_segment_list(&sci->sc_cleaning_segments); +} + +struct nilfs_segctor_wait_request { + wait_queue_t wq; + __u32 seq; + int err; + atomic_t done; +}; + +static int nilfs_segctor_sync(struct nilfs_sc_info *sci) +{ + struct nilfs_segctor_wait_request wait_req; + int err = 0; + + spin_lock(&sci->sc_state_lock); + init_wait(&wait_req.wq); + wait_req.err = 0; + atomic_set(&wait_req.done, 0); + wait_req.seq = ++sci->sc_seq_request; + spin_unlock(&sci->sc_state_lock); + + init_waitqueue_entry(&wait_req.wq, current); + add_wait_queue(&sci->sc_wait_request, &wait_req.wq); + set_current_state(TASK_INTERRUPTIBLE); + wake_up(&sci->sc_wait_daemon); + + for (;;) { + if (atomic_read(&wait_req.done)) { + err = wait_req.err; + break; + } + if (!signal_pending(current)) { + schedule(); + continue; + } + err = -ERESTARTSYS; + break; + } + finish_wait(&sci->sc_wait_request, &wait_req.wq); + return err; +} + +static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) +{ + struct nilfs_segctor_wait_request *wrq, *n; + unsigned long flags; + + spin_lock_irqsave(&sci->sc_wait_request.lock, flags); + list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, + wq.task_list) { + if (!atomic_read(&wrq->done) && + nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { + wrq->err = err; + atomic_set(&wrq->done, 1); + } + if (atomic_read(&wrq->done)) { + wrq->wq.func(&wrq->wq, + TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, + 0, NULL); + } + } + spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags); +} + +/** + * nilfs_construct_segment - construct a logical segment + * @sb: super block + * + * Return Value: On success, 0 is retured. On errors, one of the following + * negative error code is returned. + * + * %-EROFS - Read only filesystem. + * + * %-EIO - I/O error + * + * %-ENOSPC - No space left on device (only in a panic state). + * + * %-ERESTARTSYS - Interrupted. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_construct_segment(struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct nilfs_transaction_info *ti; + int err; + + if (!sci) + return -EROFS; + + /* A call inside transactions causes a deadlock. */ + BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); + + err = nilfs_segctor_sync(sci); + return err; +} + +/** + * nilfs_construct_dsync_segment - construct a data-only logical segment + * @sb: super block + * @inode: the inode whose data blocks should be written out + * + * Return Value: On success, 0 is retured. On errors, one of the following + * negative error code is returned. + * + * %-EROFS - Read only filesystem. + * + * %-EIO - I/O error + * + * %-ENOSPC - No space left on device (only in a panic state). + * + * %-ERESTARTSYS - Interrupted. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_construct_dsync_segment(struct super_block *sb, + struct inode *inode) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct nilfs_inode_info *ii; + struct nilfs_transaction_info ti; + int err = 0; + + if (!sci) + return -EROFS; + + nilfs_transaction_lock(sbi, &ti, 0); + + ii = NILFS_I(inode); + if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || + nilfs_test_opt(sbi, STRICT_ORDER) || + test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || + nilfs_discontinued(sbi->s_nilfs)) { + nilfs_transaction_unlock(sbi); + err = nilfs_segctor_sync(sci); + return err; + } + + spin_lock(&sbi->s_inode_lock); + if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && + !test_bit(NILFS_I_BUSY, &ii->i_state)) { + spin_unlock(&sbi->s_inode_lock); + nilfs_transaction_unlock(sbi); + return 0; + } + spin_unlock(&sbi->s_inode_lock); + sci->sc_stage.dirty_file_ptr = ii; + + err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); + + nilfs_transaction_unlock(sbi); + return err; +} + +struct nilfs_segctor_req { + int mode; + __u32 seq_accepted; + int sc_err; /* construction failure */ + int sb_err; /* super block writeback failure */ +}; + +#define FLUSH_FILE_BIT (0x1) /* data file only */ +#define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ + +static void nilfs_segctor_accept(struct nilfs_sc_info *sci, + struct nilfs_segctor_req *req) +{ + BUG_ON(!sci); + + req->sc_err = req->sb_err = 0; + spin_lock(&sci->sc_state_lock); + req->seq_accepted = sci->sc_seq_request; + spin_unlock(&sci->sc_state_lock); + + if (sci->sc_timer) + del_timer_sync(sci->sc_timer); +} + +static void nilfs_segctor_notify(struct nilfs_sc_info *sci, + struct nilfs_segctor_req *req) +{ + /* Clear requests (even when the construction failed) */ + spin_lock(&sci->sc_state_lock); + + sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; + + if (req->mode == SC_LSEG_SR) { + sci->sc_seq_done = req->seq_accepted; + nilfs_segctor_wakeup(sci, req->sc_err ? : req->sb_err); + sci->sc_flush_request = 0; + } else if (req->mode == SC_FLUSH_FILE) + sci->sc_flush_request &= ~FLUSH_FILE_BIT; + else if (req->mode == SC_FLUSH_DAT) + sci->sc_flush_request &= ~FLUSH_DAT_BIT; + + spin_unlock(&sci->sc_state_lock); +} + +static int nilfs_segctor_construct(struct nilfs_sc_info *sci, + struct nilfs_segctor_req *req) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + int err = 0; + + if (nilfs_discontinued(nilfs)) + req->mode = SC_LSEG_SR; + if (!nilfs_segctor_confirm(sci)) { + err = nilfs_segctor_do_construct(sci, req->mode); + req->sc_err = err; + } + if (likely(!err)) { + if (req->mode != SC_FLUSH_DAT) + atomic_set(&nilfs->ns_ndirtyblks, 0); + if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && + nilfs_discontinued(nilfs)) { + down_write(&nilfs->ns_sem); + req->sb_err = nilfs_commit_super(sbi); + up_write(&nilfs->ns_sem); + } + } + return err; +} + +static void nilfs_construction_timeout(unsigned long data) +{ + struct task_struct *p = (struct task_struct *)data; + wake_up_process(p); +} + +static void +nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) +{ + struct nilfs_inode_info *ii, *n; + + list_for_each_entry_safe(ii, n, head, i_dirty) { + if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) + continue; + hlist_del_init(&ii->vfs_inode.i_hash); + list_del_init(&ii->i_dirty); + nilfs_clear_gcinode(&ii->vfs_inode); + } +} + +int nilfs_clean_segments(struct super_block *sb, void __user *argp) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_transaction_info ti; + struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; + int err; + + if (unlikely(!sci)) + return -EROFS; + + nilfs_transaction_lock(sbi, &ti, 1); + + err = nilfs_init_gcdat_inode(nilfs); + if (unlikely(err)) + goto out_unlock; + err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); + if (unlikely(err)) + goto out_unlock; + + list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); + + for (;;) { + nilfs_segctor_accept(sci, &req); + err = nilfs_segctor_construct(sci, &req); + nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); + nilfs_segctor_notify(sci, &req); + + if (likely(!err)) + break; + + nilfs_warning(sb, __func__, + "segment construction failed. (err=%d)", err); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(sci->sc_interval); + } + + out_unlock: + nilfs_clear_gcdat_inode(nilfs); + nilfs_transaction_unlock(sbi); + return err; +} + +static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct nilfs_transaction_info ti; + struct nilfs_segctor_req req = { .mode = mode }; + + nilfs_transaction_lock(sbi, &ti, 0); + + nilfs_segctor_accept(sci, &req); + nilfs_segctor_construct(sci, &req); + nilfs_segctor_notify(sci, &req); + + /* + * Unclosed segment should be retried. We do this using sc_timer. + * Timeout of sc_timer will invoke complete construction which leads + * to close the current logical segment. + */ + if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) + nilfs_segctor_start_timer(sci); + + nilfs_transaction_unlock(sbi); +} + +static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) +{ + int mode = 0; + int err; + + spin_lock(&sci->sc_state_lock); + mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? + SC_FLUSH_DAT : SC_FLUSH_FILE; + spin_unlock(&sci->sc_state_lock); + + if (mode) { + err = nilfs_segctor_do_construct(sci, mode); + + spin_lock(&sci->sc_state_lock); + sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? + ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT; + spin_unlock(&sci->sc_state_lock); + } + clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); +} + +static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) +{ + if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || + time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) { + if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT)) + return SC_FLUSH_FILE; + else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT)) + return SC_FLUSH_DAT; + } + return SC_LSEG_SR; +} + +/** + * nilfs_segctor_thread - main loop of the segment constructor thread. + * @arg: pointer to a struct nilfs_sc_info. + * + * nilfs_segctor_thread() initializes a timer and serves as a daemon + * to execute segment constructions. + */ +static int nilfs_segctor_thread(void *arg) +{ + struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; + struct timer_list timer; + int timeout = 0; + + init_timer(&timer); + timer.data = (unsigned long)current; + timer.function = nilfs_construction_timeout; + sci->sc_timer = &timer; + + /* start sync. */ + sci->sc_task = current; + wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ + printk(KERN_INFO + "segctord starting. Construction interval = %lu seconds, " + "CP frequency < %lu seconds\n", + sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); + + spin_lock(&sci->sc_state_lock); + loop: + for (;;) { + int mode; + + if (sci->sc_state & NILFS_SEGCTOR_QUIT) + goto end_thread; + + if (timeout || sci->sc_seq_request != sci->sc_seq_done) + mode = SC_LSEG_SR; + else if (!sci->sc_flush_request) + break; + else + mode = nilfs_segctor_flush_mode(sci); + + spin_unlock(&sci->sc_state_lock); + nilfs_segctor_thread_construct(sci, mode); + spin_lock(&sci->sc_state_lock); + timeout = 0; + } + + + if (freezing(current)) { + spin_unlock(&sci->sc_state_lock); + refrigerator(); + spin_lock(&sci->sc_state_lock); + } else { + DEFINE_WAIT(wait); + int should_sleep = 1; + + prepare_to_wait(&sci->sc_wait_daemon, &wait, + TASK_INTERRUPTIBLE); + + if (sci->sc_seq_request != sci->sc_seq_done) + should_sleep = 0; + else if (sci->sc_flush_request) + should_sleep = 0; + else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) + should_sleep = time_before(jiffies, + sci->sc_timer->expires); + + if (should_sleep) { + spin_unlock(&sci->sc_state_lock); + schedule(); + spin_lock(&sci->sc_state_lock); + } + finish_wait(&sci->sc_wait_daemon, &wait); + timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer->expires)); + } + goto loop; + + end_thread: + spin_unlock(&sci->sc_state_lock); + del_timer_sync(sci->sc_timer); + sci->sc_timer = NULL; + + /* end sync. */ + sci->sc_task = NULL; + wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ + return 0; +} + +static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) +{ + struct task_struct *t; + + t = kthread_run(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + int err = PTR_ERR(t); + + printk(KERN_ERR "NILFS: error %d creating segctord thread\n", + err); + return err; + } + wait_event(sci->sc_wait_task, sci->sc_task != NULL); + return 0; +} + +static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) +{ + sci->sc_state |= NILFS_SEGCTOR_QUIT; + + while (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + spin_unlock(&sci->sc_state_lock); + wait_event(sci->sc_wait_task, sci->sc_task == NULL); + spin_lock(&sci->sc_state_lock); + } +} + +static int nilfs_segctor_init(struct nilfs_sc_info *sci, + struct nilfs_recovery_info *ri) +{ + int err; + struct inode *inode = nilfs_iget(sci->sc_super, NILFS_SKETCH_INO); + + sci->sc_sketch_inode = IS_ERR(inode) ? NULL : inode; + if (sci->sc_sketch_inode) + i_size_write(sci->sc_sketch_inode, 0); + + sci->sc_seq_done = sci->sc_seq_request; + if (ri) + list_splice_init(&ri->ri_used_segments, + sci->sc_active_segments.prev); + + err = nilfs_segctor_start_thread(sci); + if (err) { + if (ri) + list_splice_init(&sci->sc_active_segments, + ri->ri_used_segments.prev); + if (sci->sc_sketch_inode) { + iput(sci->sc_sketch_inode); + sci->sc_sketch_inode = NULL; + } + } + return err; +} + +/* + * Setup & clean-up functions + */ +static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) +{ + struct nilfs_sc_info *sci; + + sci = kzalloc(sizeof(*sci), GFP_KERNEL); + if (!sci) + return NULL; + + sci->sc_sbi = sbi; + sci->sc_super = sbi->s_super; + + init_waitqueue_head(&sci->sc_wait_request); + init_waitqueue_head(&sci->sc_wait_daemon); + init_waitqueue_head(&sci->sc_wait_task); + spin_lock_init(&sci->sc_state_lock); + INIT_LIST_HEAD(&sci->sc_dirty_files); + INIT_LIST_HEAD(&sci->sc_segbufs); + INIT_LIST_HEAD(&sci->sc_gc_inodes); + INIT_LIST_HEAD(&sci->sc_active_segments); + INIT_LIST_HEAD(&sci->sc_cleaning_segments); + INIT_LIST_HEAD(&sci->sc_copied_buffers); + + sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; + sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; + sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; + + if (sbi->s_interval) + sci->sc_interval = sbi->s_interval; + if (sbi->s_watermark) + sci->sc_watermark = sbi->s_watermark; + return sci; +} + +static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) +{ + int ret, retrycount = NILFS_SC_CLEANUP_RETRY; + + /* The segctord thread was stopped and its timer was removed. + But some tasks remain. */ + do { + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct nilfs_transaction_info ti; + struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; + + nilfs_transaction_lock(sbi, &ti, 0); + nilfs_segctor_accept(sci, &req); + ret = nilfs_segctor_construct(sci, &req); + nilfs_segctor_notify(sci, &req); + nilfs_transaction_unlock(sbi); + + } while (ret && retrycount-- > 0); +} + +/** + * nilfs_segctor_destroy - destroy the segment constructor. + * @sci: nilfs_sc_info + * + * nilfs_segctor_destroy() kills the segctord thread and frees + * the nilfs_sc_info struct. + * Caller must hold the segment semaphore. + */ +static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + int flag; + + up_write(&sbi->s_nilfs->ns_segctor_sem); + + spin_lock(&sci->sc_state_lock); + nilfs_segctor_kill_thread(sci); + flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request + || sci->sc_seq_request != sci->sc_seq_done); + spin_unlock(&sci->sc_state_lock); + + if (flag || nilfs_segctor_confirm(sci)) + nilfs_segctor_write_out(sci); + + BUG_ON(!list_empty(&sci->sc_copied_buffers)); + + if (!list_empty(&sci->sc_dirty_files)) { + nilfs_warning(sbi->s_super, __func__, + "dirty file(s) after the final construction\n"); + nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); + } + if (!list_empty(&sci->sc_active_segments)) + nilfs_dispose_segment_list(&sci->sc_active_segments); + + if (!list_empty(&sci->sc_cleaning_segments)) + nilfs_dispose_segment_list(&sci->sc_cleaning_segments); + + BUG_ON(!list_empty(&sci->sc_segbufs)); + + if (sci->sc_sketch_inode) { + iput(sci->sc_sketch_inode); + sci->sc_sketch_inode = NULL; + } + down_write(&sbi->s_nilfs->ns_segctor_sem); + + kfree(sci); +} + +/** + * nilfs_attach_segment_constructor - attach a segment constructor + * @sbi: nilfs_sb_info + * @ri: nilfs_recovery_info + * + * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, + * initilizes it, and starts the segment constructor. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, + struct nilfs_recovery_info *ri) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + int err; + + /* Each field of nilfs_segctor is cleared through the initialization + of super-block info */ + sbi->s_sc_info = nilfs_segctor_new(sbi); + if (!sbi->s_sc_info) + return -ENOMEM; + + nilfs_attach_writer(nilfs, sbi); + err = nilfs_segctor_init(NILFS_SC(sbi), ri); + if (err) { + nilfs_detach_writer(nilfs, sbi); + kfree(sbi->s_sc_info); + sbi->s_sc_info = NULL; + } + return err; +} + +/** + * nilfs_detach_segment_constructor - destroy the segment constructor + * @sbi: nilfs_sb_info + * + * nilfs_detach_segment_constructor() kills the segment constructor daemon, + * frees the struct nilfs_sc_info, and destroy the dirty file list. + */ +void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + LIST_HEAD(garbage_list); + + down_write(&nilfs->ns_segctor_sem); + if (NILFS_SC(sbi)) { + nilfs_segctor_destroy(NILFS_SC(sbi)); + sbi->s_sc_info = NULL; + } + + /* Force to free the list of dirty files */ + spin_lock(&sbi->s_inode_lock); + if (!list_empty(&sbi->s_dirty_files)) { + list_splice_init(&sbi->s_dirty_files, &garbage_list); + nilfs_warning(sbi->s_super, __func__, + "Non empty dirty list after the last " + "segment construction\n"); + } + spin_unlock(&sbi->s_inode_lock); + up_write(&nilfs->ns_segctor_sem); + + nilfs_dispose_list(sbi, &garbage_list, 1); + nilfs_detach_writer(nilfs, sbi); +} -- cgit v1.2.3-70-g09d2 From f30bf3e40f80ef50c17f55271deae3abc03e793e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:38 -0700 Subject: nilfs2: fix missed-sync issue for do_sync_mapping_range() Chris Mason pointed out that there is a missed sync issue in nilfs_writepages(): On Wed, 17 Dec 2008 21:52:55 -0500, Chris Mason wrote: > It looks like nilfs_writepage ignores WB_SYNC_NONE, which is used by > do_sync_mapping_range(). where WB_SYNC_NONE in do_sync_mapping_range() was replaced with WB_SYNC_ALL by Nick's patch (commit: ee53a891f47444c53318b98dac947ede963db400). This fixes the problem by letting nilfs_writepages() write out the log of file data within the range if sync_mode is WB_SYNC_ALL. This involves removal of nilfs_file_aio_write() which was previously needed to ensure O_SYNC sync writes. Cc: Chris Mason Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/file.c | 27 ++---------- fs/nilfs2/inode.c | 16 ++++--- fs/nilfs2/segment.c | 120 +++++++++++++++++++++++++++++++--------------------- fs/nilfs2/segment.h | 11 ++++- 4 files changed, 93 insertions(+), 81 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 8031086db8d..cd38124372f 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -44,35 +44,14 @@ int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) return 0; if (datasync) - err = nilfs_construct_dsync_segment(inode->i_sb, inode); + err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, + LLONG_MAX); else err = nilfs_construct_segment(inode->i_sb); return err; } -static ssize_t -nilfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_dentry->d_inode; - ssize_t ret; - - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (ret <= 0) - return ret; - - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { - int err; - - err = nilfs_construct_dsync_segment(inode->i_sb, inode); - if (unlikely(err)) - return err; - } - return ret; -} - static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; @@ -160,7 +139,7 @@ struct file_operations nilfs_file_operations = { .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, - .aio_write = nilfs_file_aio_write, + .aio_write = generic_file_aio_write, .ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b4697d9d7e5..289d1798dec 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "nilfs.h" #include "segment.h" #include "page.h" @@ -145,8 +146,14 @@ static int nilfs_readpages(struct file *file, struct address_space *mapping, static int nilfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - /* This empty method is required not to call generic_writepages() */ - return 0; + struct inode *inode = mapping->host; + int err = 0; + + if (wbc->sync_mode == WB_SYNC_ALL) + err = nilfs_construct_dsync_segment(inode->i_sb, inode, + wbc->range_start, + wbc->range_end); + return err; } static int nilfs_writepage(struct page *page, struct writeback_control *wbc) @@ -225,11 +232,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t size; - int err; - - err = nilfs_construct_dsync_segment(inode->i_sb, inode); - if (unlikely(err)) - return err; if (rw == WRITE) return 0; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2c4c088059f..ad65a737aff 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -654,29 +654,41 @@ struct nilfs_sc_operations nilfs_sc_dsync_ops = { .write_node_binfo = NULL, }; -static int nilfs_lookup_dirty_data_buffers(struct inode *inode, - struct list_head *listp, - struct nilfs_sc_info *sci) +static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, + struct list_head *listp, + size_t nlimit, + loff_t start, loff_t end) { - struct nilfs_segment_buffer *segbuf = sci->sc_curseg; struct address_space *mapping = inode->i_mapping; struct pagevec pvec; - unsigned i, ndirties = 0, nlimit; - pgoff_t index = 0; - int err = 0; + pgoff_t index = 0, last = ULONG_MAX; + size_t ndirties = 0; + int i; - nlimit = sci->sc_segbuf_nblocks - - (sci->sc_nblk_this_inc + segbuf->sb_sum.nblocks); + if (unlikely(start != 0 || end != LLONG_MAX)) { + /* + * A valid range is given for sync-ing data pages. The + * range is rounded to per-page; extra dirty buffers + * may be included if blocksize < pagesize. + */ + index = start >> PAGE_SHIFT; + last = end >> PAGE_SHIFT; + } pagevec_init(&pvec, 0); repeat: - if (!pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) - return 0; + if (unlikely(index > last) || + !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, + min_t(pgoff_t, last - index, + PAGEVEC_SIZE - 1) + 1)) + return ndirties; for (i = 0; i < pagevec_count(&pvec); i++) { struct buffer_head *bh, *head; struct page *page = pvec.pages[i]; + if (unlikely(page->index > last)) + break; + if (mapping->host) { lock_page(page); if (!page_has_buffers(page)) @@ -687,24 +699,21 @@ static int nilfs_lookup_dirty_data_buffers(struct inode *inode, bh = head = page_buffers(page); do { - if (buffer_dirty(bh)) { - if (ndirties > nlimit) { - err = -E2BIG; - break; - } - get_bh(bh); - list_add_tail(&bh->b_assoc_buffers, listp); - ndirties++; + if (!buffer_dirty(bh)) + continue; + get_bh(bh); + list_add_tail(&bh->b_assoc_buffers, listp); + ndirties++; + if (unlikely(ndirties >= nlimit)) { + pagevec_release(&pvec); + cond_resched(); + return ndirties; } - bh = bh->b_this_page; - } while (bh != head); + } while (bh = bh->b_this_page, bh != head); } pagevec_release(&pvec); cond_resched(); - - if (!err) - goto repeat; - return err; + goto repeat; } static void nilfs_lookup_dirty_node_buffers(struct inode *inode, @@ -1058,23 +1067,31 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, return err; } +static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) +{ + /* Remaining number of blocks within segment buffer */ + return sci->sc_segbuf_nblocks - + (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks); +} + static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, struct inode *inode, struct nilfs_sc_operations *sc_ops) { LIST_HEAD(data_buffers); LIST_HEAD(node_buffers); - int err, err2; + int err; if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { - err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, - sci); - if (err) { - err2 = nilfs_segctor_apply_buffers( + size_t n, rest = nilfs_segctor_buffer_rest(sci); + + n = nilfs_lookup_dirty_data_buffers( + inode, &data_buffers, rest + 1, 0, LLONG_MAX); + if (n > rest) { + err = nilfs_segctor_apply_buffers( sci, inode, &data_buffers, - err == -E2BIG ? sc_ops->collect_data : NULL); - if (err == -E2BIG) - err = err2; + sc_ops->collect_data); + BUG_ON(!err); /* always receive -E2BIG or true error */ goto break_or_fail; } } @@ -1114,16 +1131,20 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, struct inode *inode) { LIST_HEAD(data_buffers); - int err, err2; + size_t n, rest = nilfs_segctor_buffer_rest(sci); + int err; - err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, sci); - err2 = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, - (!err || err == -E2BIG) ? - nilfs_collect_file_data : NULL); - if (err == -E2BIG) - err = err2; - if (!err) + n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1, + sci->sc_dsync_start, + sci->sc_dsync_end); + + err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, + nilfs_collect_file_data); + if (!err) { nilfs_segctor_end_finfo(sci, inode); + BUG_ON(n > rest); + /* always receive -E2BIG or true error if n > rest */ + } return err; } @@ -1276,14 +1297,13 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) case NILFS_ST_DSYNC: dsync_mode: sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; - ii = sci->sc_stage.dirty_file_ptr; + ii = sci->sc_dsync_inode; if (!test_bit(NILFS_I_BUSY, &ii->i_state)) break; err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); if (unlikely(err)) break; - sci->sc_stage.dirty_file_ptr = NULL; sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; sci->sc_stage.scnt = NILFS_ST_DONE; return 0; @@ -2624,7 +2644,9 @@ int nilfs_construct_segment(struct super_block *sb) /** * nilfs_construct_dsync_segment - construct a data-only logical segment * @sb: super block - * @inode: the inode whose data blocks should be written out + * @inode: inode whose data blocks should be written out + * @start: start byte offset + * @end: end byte offset (inclusive) * * Return Value: On success, 0 is retured. On errors, one of the following * negative error code is returned. @@ -2639,8 +2661,8 @@ int nilfs_construct_segment(struct super_block *sb) * * %-ENOMEM - Insufficient memory available. */ -int nilfs_construct_dsync_segment(struct super_block *sb, - struct inode *inode) +int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, + loff_t start, loff_t end) { struct nilfs_sb_info *sbi = NILFS_SB(sb); struct nilfs_sc_info *sci = NILFS_SC(sbi); @@ -2671,7 +2693,9 @@ int nilfs_construct_dsync_segment(struct super_block *sb, return 0; } spin_unlock(&sbi->s_inode_lock); - sci->sc_stage.dirty_file_ptr = ii; + sci->sc_dsync_inode = ii; + sci->sc_dsync_start = start; + sci->sc_dsync_end = end; err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 615654b8c32..2dd39da9f38 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -93,6 +93,9 @@ struct nilfs_segsum_pointer { * @sc_active_segments: List of active segments that were already written out * @sc_cleaning_segments: List of segments to be freed through construction * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data + * @sc_dsync_inode: inode whose data pages are written for a sync operation + * @sc_dsync_start: start byte offset of data pages + * @sc_dsync_end: end byte offset of data pages (inclusive) * @sc_segbufs: List of segment buffers * @sc_segbuf_nblocks: Number of available blocks in segment buffers. * @sc_curseg: Current segment buffer @@ -134,6 +137,10 @@ struct nilfs_sc_info { struct list_head sc_cleaning_segments; struct list_head sc_copied_buffers; + struct nilfs_inode_info *sc_dsync_inode; + loff_t sc_dsync_start; + loff_t sc_dsync_end; + /* Segment buffers */ struct list_head sc_segbufs; unsigned long sc_segbuf_nblocks; @@ -221,8 +228,8 @@ extern void nilfs_destroy_transaction_cache(void); extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); -extern int nilfs_construct_dsync_segment(struct super_block *, - struct inode *); +extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, + loff_t, loff_t); extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, void __user *); -- cgit v1.2.3-70-g09d2 From 47420c799830d4676e544dbec56b2a7f787528f5 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:45 -0700 Subject: nilfs2: avoid double error caused by nilfs_transaction_end Pekka Enberg pointed out that double error handlings found after nilfs_transaction_end() can be avoided by separating abort operation: OK, I don't understand this. The only way nilfs_transaction_end() can fail is if we have NILFS_TI_SYNC set and we fail to construct the segment. But why do we want to construct a segment if we don't commit? I guess what I'm asking is why don't we have a separate nilfs_transaction_abort() function that can't fail for the erroneous case to avoid this double error value tracking thing? This does the separation and renames nilfs_transaction_end() to nilfs_transaction_commit() for clarification. Since, some calls of these functions were used just for exclusion control against the segment constructor, they are replaced with semaphore operations. Acked-by: Pekka Enberg Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/inode.c | 23 +++++++++------- fs/nilfs2/ioctl.c | 58 +++++++++++++++++++++++----------------- fs/nilfs2/mdt.c | 5 +++- fs/nilfs2/namei.c | 74 +++++++++++++++++++++++++++++++++++---------------- fs/nilfs2/nilfs.h | 3 ++- fs/nilfs2/segment.c | 43 +++++++++++++++++++----------- fs/nilfs2/the_nilfs.h | 4 +-- 7 files changed, 135 insertions(+), 75 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 289d1798dec..4bf1e2c5bac 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -77,7 +77,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, goto out; err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, (unsigned long)bh_result); - nilfs_transaction_end(inode->i_sb, !err); if (unlikely(err != 0)) { if (err == -EEXIST) { /* @@ -100,8 +99,10 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, inode->i_ino); err = -EIO; } + nilfs_transaction_abort(inode->i_sb); goto out; } + nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed @@ -203,7 +204,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, err = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, nilfs_get_block); if (unlikely(err)) - nilfs_transaction_end(inode->i_sb, 0); + nilfs_transaction_abort(inode->i_sb); return err; } @@ -221,7 +222,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, copied = generic_write_end(file, mapping, pos, len, copied, page, fsdata); nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); - err = nilfs_transaction_end(inode->i_sb, 1); + err = nilfs_transaction_commit(inode->i_sb); return err ? : copied; } @@ -641,7 +642,7 @@ void nilfs_truncate(struct inode *inode) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); - nilfs_transaction_end(sb, 1); + nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. But truncate has no return value. */ } @@ -669,7 +670,7 @@ void nilfs_delete_inode(struct inode *inode) /* nilfs_free_inode() marks inode buffer dirty */ if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); - nilfs_transaction_end(sb, 1); + nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. But delete_inode has no return value. */ } @@ -679,7 +680,7 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) struct nilfs_transaction_info ti; struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; - int err, err2; + int err; err = inode_change_ok(inode, iattr); if (err) @@ -691,8 +692,12 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) err = inode_setattr(inode, iattr); if (!err && (iattr->ia_valid & ATTR_MODE)) err = nilfs_acl_chmod(inode); - err2 = nilfs_transaction_end(sb, 1); - return err ? : err2; + if (likely(!err)) + err = nilfs_transaction_commit(sb); + else + nilfs_transaction_abort(sb); + + return err; } int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, @@ -817,5 +822,5 @@ void nilfs_dirty_inode(struct inode *inode) nilfs_transaction_begin(inode->i_sb, &ti, 0); if (likely(inode->i_ino != NILFS_SKETCH_INO)) nilfs_mark_inode_dirty(inode); - nilfs_transaction_end(inode->i_sb, 1); /* never fails */ + nilfs_transaction_commit(inode->i_sb); /* never fails */ } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 02e91e167ca..5ce06a01c7e 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -105,7 +105,11 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( cpfile, cpmode.cm_cno, cpmode.cm_mode); - nilfs_transaction_end(inode->i_sb, !ret); + if (unlikely(ret < 0)) { + nilfs_transaction_abort(inode->i_sb); + return ret; + } + nilfs_transaction_commit(inode->i_sb); /* never fails */ return ret; } @@ -125,7 +129,11 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); - nilfs_transaction_end(inode->i_sb, !ret); + if (unlikely(ret < 0)) { + nilfs_transaction_abort(inode->i_sb); + return ret; + } + nilfs_transaction_commit(inode->i_sb); /* never fails */ return ret; } @@ -142,16 +150,17 @@ static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, { struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_argv argv; - struct nilfs_transaction_info ti; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - nilfs_transaction_begin(inode->i_sb, &ti, 0); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_cpinfo); - nilfs_transaction_end(inode->i_sb, 0); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) + return ret; if (copy_to_user(argp, &argv, sizeof(argv))) ret = -EFAULT; @@ -161,14 +170,13 @@ static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_cpstat cpstat; - struct nilfs_transaction_info ti; int ret; - nilfs_transaction_begin(inode->i_sb, &ti, 0); - ret = nilfs_cpfile_get_stat(cpfile, &cpstat); - nilfs_transaction_end(inode->i_sb, 0); + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); + up_read(&nilfs->ns_segctor_sem); if (ret < 0) return ret; @@ -189,16 +197,17 @@ static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, { struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_argv argv; - struct nilfs_transaction_info ti; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - nilfs_transaction_begin(inode->i_sb, &ti, 0); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_suinfo); - nilfs_transaction_end(inode->i_sb, 0); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) + return ret; if (copy_to_user(argp, &argv, sizeof(argv))) ret = -EFAULT; @@ -208,14 +217,13 @@ static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct inode *sufile = NILFS_SB(inode->i_sb)->s_nilfs->ns_sufile; + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_sustat sustat; - struct nilfs_transaction_info ti; int ret; - nilfs_transaction_begin(inode->i_sb, &ti, 0); - ret = nilfs_sufile_get_stat(sufile, &sustat); - nilfs_transaction_end(inode->i_sb, 0); + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat); + up_read(&nilfs->ns_segctor_sem); if (ret < 0) return ret; @@ -236,16 +244,17 @@ static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, { struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_argv argv; - struct nilfs_transaction_info ti; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - nilfs_transaction_begin(inode->i_sb, &ti, 0); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_vinfo); - nilfs_transaction_end(inode->i_sb, 0); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) + return ret; if (copy_to_user(argp, &argv, sizeof(argv))) ret = -EFAULT; @@ -280,16 +289,17 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, { struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_argv argv; - struct nilfs_transaction_info ti; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - nilfs_transaction_begin(inode->i_sb, &ti, 0); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_bdescs); - nilfs_transaction_end(inode->i_sb, 0); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) + return ret; if (copy_to_user(argp, &argv, sizeof(argv))) ret = -EFAULT; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 6ab84757861..e0a632b86fe 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -123,7 +123,10 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, brelse(bh); failed_unlock: - nilfs_transaction_end(sb, !err); + if (likely(!err)) + err = nilfs_transaction_commit(sb); + else + nilfs_transaction_abort(sb); if (writer) nilfs_put_writer(nilfs); out: diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 95d1b29bff3..df70dadb336 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -109,7 +109,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, { struct inode *inode; struct nilfs_transaction_info ti; - int err, err2; + int err; err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) @@ -123,8 +123,12 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int @@ -132,7 +136,7 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { struct inode *inode; struct nilfs_transaction_info ti; - int err, err2; + int err; if (!new_valid_dev(rdev)) return -EINVAL; @@ -147,8 +151,12 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int nilfs_symlink(struct inode *dir, struct dentry *dentry, @@ -158,7 +166,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, struct super_block *sb = dir->i_sb; unsigned l = strlen(symname)+1; struct inode *inode; - int err, err2; + int err; if (l > sb->s_blocksize) return -ENAMETOOLONG; @@ -184,8 +192,12 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, err = nilfs_add_nondir(dentry, inode); out: - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; out_fail: inode_dec_link_count(inode); @@ -198,7 +210,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, { struct inode *inode = old_dentry->d_inode; struct nilfs_transaction_info ti; - int err, err2; + int err; if (inode->i_nlink >= NILFS_LINK_MAX) return -EMLINK; @@ -212,15 +224,19 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, atomic_inc(&inode->i_count); err = nilfs_add_nondir(dentry, inode); - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct inode *inode; struct nilfs_transaction_info ti; - int err, err2; + int err; if (dir->i_nlink >= NILFS_LINK_MAX) return -EMLINK; @@ -252,8 +268,12 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); out: - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; out_fail: inode_dec_link_count(inode); @@ -270,7 +290,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) struct nilfs_dir_entry *de; struct page *page; struct nilfs_transaction_info ti; - int err, err2; + int err; err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) @@ -300,15 +320,19 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) inode_dec_link_count(inode); err = 0; out: - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct nilfs_transaction_info ti; - int err, err2; + int err; err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) @@ -323,8 +347,12 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) inode_dec_link_count(dir); } } - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -404,7 +432,7 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, inode_dec_link_count(old_dir); } - err = nilfs_transaction_end(old_dir->i_sb, 1); + err = nilfs_transaction_commit(old_dir->i_sb); return err; out_dir: @@ -416,7 +444,7 @@ out_old: kunmap(old_page); page_cache_release(old_page); out: - nilfs_transaction_end(old_dir->i_sb, 0); + nilfs_transaction_abort(old_dir->i_sb); return err; } diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 17458ad4a80..48c070676cc 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -166,7 +166,8 @@ struct nilfs_transaction_info { int nilfs_transaction_begin(struct super_block *, struct nilfs_transaction_info *, int); -int nilfs_transaction_end(struct super_block *, int); +int nilfs_transaction_commit(struct super_block *); +void nilfs_transaction_abort(struct super_block *); static inline void nilfs_set_transaction_flag(unsigned int flag) { diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index ad65a737aff..6d66c5cb7b5 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -163,8 +163,8 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) else { /* * If journal_info field is occupied by other FS, - * we save it and restore on nilfs_transaction_end(). - * But this should never happen. + * it is saved and will be restored on + * nilfs_transaction_commit(). */ printk(KERN_WARNING "NILFS warning: journal info from a different " @@ -195,7 +195,7 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) * * nilfs_transaction_begin() acquires a reader/writer semaphore, called * the segment semaphore, to make a segment construction and write tasks - * exclusive. The function is used with nilfs_transaction_end() in pairs. + * exclusive. The function is used with nilfs_transaction_commit() in pairs. * The region enclosed by these two functions can be nested. To avoid a * deadlock, the semaphore is only acquired or released in the outermost call. * @@ -212,8 +212,6 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) * * %-ENOMEM - Insufficient memory available. * - * %-ERESTARTSYS - Interrupted - * * %-ENOSPC - No space left on device */ int nilfs_transaction_begin(struct super_block *sb, @@ -248,16 +246,17 @@ int nilfs_transaction_begin(struct super_block *sb, } /** - * nilfs_transaction_end - end indivisible file operations. + * nilfs_transaction_commit - commit indivisible file operations. * @sb: super block - * @commit: commit flag (0 for no change) * - * nilfs_transaction_end() releases the read semaphore which is - * acquired by nilfs_transaction_begin(). Its releasing is only done - * in outermost call of this function. If the nilfs_transaction_info - * was allocated dynamically, it is given back to a slab cache. + * nilfs_transaction_commit() releases the read semaphore which is + * acquired by nilfs_transaction_begin(). This is only performed + * in outermost call of this function. If a commit flag is set, + * nilfs_transaction_commit() sets a timer to start the segment + * constructor. If a sync flag is set, it starts construction + * directly. */ -int nilfs_transaction_end(struct super_block *sb, int commit) +int nilfs_transaction_commit(struct super_block *sb) { struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_sb_info *sbi; @@ -265,9 +264,7 @@ int nilfs_transaction_end(struct super_block *sb, int commit) int err = 0; BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); - - if (commit) - ti->ti_flags |= NILFS_TI_COMMIT; + ti->ti_flags |= NILFS_TI_COMMIT; if (ti->ti_count > 0) { ti->ti_count--; return 0; @@ -291,6 +288,22 @@ int nilfs_transaction_end(struct super_block *sb, int commit) return err; } +void nilfs_transaction_abort(struct super_block *sb) +{ + struct nilfs_transaction_info *ti = current->journal_info; + + BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); + if (ti->ti_count > 0) { + ti->ti_count--; + return; + } + up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); + + current->journal_info = ti->ti_save; + if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) + kmem_cache_free(nilfs_transaction_cachep, ti); +} + void nilfs_relax_pressure_in_lock(struct super_block *sb) { struct nilfs_sb_info *sbi = NILFS_SB(sb); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index dee8d83e054..9cd3c113f05 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -112,8 +112,8 @@ struct the_nilfs { /* * Following fields are dedicated to a writable FS-instance. * Except for the period seeking checkpoint, code outside the segment - * constructor must lock a segment semaphore with transaction_begin() - * and transaction_end(), when accessing these fields. + * constructor must lock a segment semaphore while accessing these + * fields. * The writable FS-instance is sole during a lifetime of the_nilfs. */ u64 ns_seg_seq; -- cgit v1.2.3-70-g09d2 From 1088dcf4c3a0a27fdad5214781d5084b11405238 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:51 -0700 Subject: nilfs2: remove timedwait ioctl command This removes NILFS_IOCTL_TIMEDWAIT command from ioctl interface along with the related flags and wait queue. The command is terrible because it just sleeps in the ioctl. I prefer to avoid this by devising means of event polling in userland program. By reconsidering the userland GC daemon, I found this is possible without changing behaviour of the daemon and sacrificing efficiency. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 95 +---------------------------------------------- fs/nilfs2/segment.c | 5 +-- fs/nilfs2/the_nilfs.c | 1 - fs/nilfs2/the_nilfs.h | 6 --- include/linux/nilfs2_fs.h | 22 ----------- 5 files changed, 2 insertions(+), 127 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9e4d9e64c8f..85a291ccc1b 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -578,62 +578,9 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; - - ret = nilfs_clean_segments(inode->i_sb, argp); - clear_nilfs_cond_nongc_write(NILFS_SB(inode->i_sb)->s_nilfs); - return ret; -} - -static int nilfs_ioctl_test_cond(struct the_nilfs *nilfs, int cond) -{ - return (cond & NILFS_TIMEDWAIT_SEG_WRITE) && - nilfs_cond_nongc_write(nilfs); -} - -static void nilfs_ioctl_clear_cond(struct the_nilfs *nilfs, int cond) -{ - if (cond & NILFS_TIMEDWAIT_SEG_WRITE) - clear_nilfs_cond_nongc_write(nilfs); -} - -static int nilfs_ioctl_timedwait(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_wait_cond wc; - long ret; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&wc, argp, sizeof(wc))) - return -EFAULT; - - unlock_kernel(); - ret = wc.wc_flags ? - wait_event_interruptible_timeout( - nilfs->ns_cleanerd_wq, - nilfs_ioctl_test_cond(nilfs, wc.wc_cond), - timespec_to_jiffies(&wc.wc_timeout)) : - wait_event_interruptible( - nilfs->ns_cleanerd_wq, - nilfs_ioctl_test_cond(nilfs, wc.wc_cond)); - lock_kernel(); - nilfs_ioctl_clear_cond(nilfs, wc.wc_cond); - - if (ret > 0) { - jiffies_to_timespec(ret, &wc.wc_timeout); - if (copy_to_user(argp, &wc, sizeof(wc))) - return -EFAULT; - return 0; - } - if (ret != 0) - return -EINTR; - - return wc.wc_flags ? -ETIME : 0; + return nilfs_clean_segments(inode->i_sb, argp); } static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, @@ -679,8 +626,6 @@ int nilfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); case NILFS_IOCTL_CLEAN_SEGMENTS: return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); - case NILFS_IOCTL_TIMEDWAIT: - return nilfs_ioctl_timedwait(inode, filp, cmd, argp); case NILFS_IOCTL_SYNC: return nilfs_ioctl_sync(inode, filp, cmd, argp); default: @@ -871,41 +816,6 @@ nilfs_compat_ioctl_clean_segments(struct inode *inode, struct file *filp, inode, filp, cmd, (unsigned long)uargv); } -static int -nilfs_compat_ioctl_timedwait(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct nilfs_wait_cond __user *uwcond; - struct nilfs_wait_cond32 __user *uwcond32; - struct timespec ts; - int cond, flags, ret; - - uwcond = compat_alloc_user_space(sizeof(struct nilfs_wait_cond)); - uwcond32 = compat_ptr(arg); - if (get_user(cond, &uwcond32->wc_cond) || - put_user(cond, &uwcond->wc_cond) || - get_user(flags, &uwcond32->wc_flags) || - put_user(flags, &uwcond->wc_flags) || - get_user(ts.tv_sec, &uwcond32->wc_timeout.tv_sec) || - get_user(ts.tv_nsec, &uwcond32->wc_timeout.tv_nsec) || - put_user(ts.tv_sec, &uwcond->wc_timeout.tv_sec) || - put_user(ts.tv_nsec, &uwcond->wc_timeout.tv_nsec)) - return -EFAULT; - - ret = nilfs_compat_locked_ioctl(inode, filp, cmd, - (unsigned long)uwcond); - if (ret < 0) - return ret; - - if (get_user(ts.tv_sec, &uwcond->wc_timeout.tv_sec) || - get_user(ts.tv_nsec, &uwcond->wc_timeout.tv_nsec) || - put_user(ts.tv_sec, &uwcond32->wc_timeout.tv_sec) || - put_user(ts.tv_nsec, &uwcond32->wc_timeout.tv_nsec)) - return -EFAULT; - - return 0; -} - static int nilfs_compat_ioctl_sync(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -943,9 +853,6 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL32_CLEAN_SEGMENTS: return nilfs_compat_ioctl_clean_segments( inode, filp, NILFS_IOCTL_CLEAN_SEGMENTS, arg); - case NILFS_IOCTL32_TIMEDWAIT: - return nilfs_compat_ioctl_timedwait( - inode, filp, NILFS_IOCTL_TIMEDWAIT, arg); case NILFS_IOCTL_SYNC: return nilfs_compat_ioctl_sync(inode, filp, cmd, arg); default: diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 6d66c5cb7b5..5db12d774a0 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2114,11 +2114,8 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_drop_collected_inodes(&sci->sc_gc_inodes); if (update_sr) nilfs_commit_gcdat_inode(nilfs); - } else { + } else nilfs->ns_nongc_ctime = sci->sc_seg_ctime; - set_nilfs_cond_nongc_write(nilfs); - wake_up(&nilfs->ns_cleanerd_wq); - } sci->sc_nblk_inc += sci->sc_nblk_this_inc; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 852e0bf3a3c..69b62558622 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -73,7 +73,6 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) nilfs->ns_gc_inodes_h = NULL; INIT_LIST_HEAD(&nilfs->ns_used_segments); init_rwsem(&nilfs->ns_segctor_sem); - init_waitqueue_head(&nilfs->ns_cleanerd_wq); return nilfs; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 9cd3c113f05..75da3730696 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -37,7 +37,6 @@ enum { THE_NILFS_LOADED, /* Roll-back/roll-forward has done and the latest checkpoint was loaded */ THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ - THE_NILFS_COND_NONGC_WRITE, /* Condition to wake up cleanerd */ }; /** @@ -74,7 +73,6 @@ enum { * @ns_gc_dat: shadow inode of the DAT file inode for GC * @ns_gc_inodes: dummy inodes to keep live blocks * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks - * @ns_cleanerd_wq: wait queue for cleanerd * @ns_blocksize_bits: bit length of block size * @ns_nsegments: number of segments in filesystem * @ns_blocks_per_segment: number of blocks per segment @@ -151,9 +149,6 @@ struct the_nilfs { struct list_head ns_gc_inodes; struct hlist_head *ns_gc_inodes_h; - /* cleanerd */ - wait_queue_head_t ns_cleanerd_wq; - /* Disk layout information (static) */ unsigned int ns_blocksize_bits; unsigned long ns_nsegments; @@ -186,7 +181,6 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \ THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(LOADED, loaded) THE_NILFS_FNS(DISCONTINUED, discontinued) -THE_NILFS_FNS(COND_NONGC_WRITE, cond_nongc_write) void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct block_device *); diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index e38fad2f7c0..b0a6b39eedb 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -763,18 +763,6 @@ struct nilfs_bdesc { __u32 bd_level; }; -#define NILFS_TIMEDWAIT_WRITE_LOCKED 0x1 -#define NILFS_TIMEDWAIT_SEG_WRITE 0x2 - -/** - * struct nilfs_wait_cond - - */ -struct nilfs_wait_cond { - int wc_cond; - int wc_flags; - struct timespec wc_timeout; -}; - #define NILFS_IOCTL_IDENT 'n' #define NILFS_IOCTL_CHANGE_CPMODE \ @@ -795,8 +783,6 @@ struct nilfs_wait_cond { _IOWR(NILFS_IOCTL_IDENT, 0x87, struct nilfs_argv) #define NILFS_IOCTL_CLEAN_SEGMENTS \ _IOW(NILFS_IOCTL_IDENT, 0x88, struct nilfs_argv[5]) -#define NILFS_IOCTL_TIMEDWAIT \ - _IOWR(NILFS_IOCTL_IDENT, 0x89, struct nilfs_wait_cond) #define NILFS_IOCTL_SYNC \ _IOR(NILFS_IOCTL_IDENT, 0x8A, __u64) #define NILFS_IOCTL_RESIZE \ @@ -827,12 +813,6 @@ struct nilfs_sustat32 { compat_time_t ss_nongc_ctime; }; -struct nilfs_wait_cond32 { - compat_int_t wc_cond; - compat_int_t wc_flags; - struct compat_timespec wc_timeout; -}; - #define NILFS_IOCTL32_CHANGE_CPMODE \ _IOW(NILFS_IOCTL_IDENT, 0x80, struct nilfs_cpmode32) #define NILFS_IOCTL32_GET_CPINFO \ @@ -847,8 +827,6 @@ struct nilfs_wait_cond32 { _IOWR(NILFS_IOCTL_IDENT, 0x87, struct nilfs_argv32) #define NILFS_IOCTL32_CLEAN_SEGMENTS \ _IOW(NILFS_IOCTL_IDENT, 0x88, struct nilfs_argv32[5]) -#define NILFS_IOCTL32_TIMEDWAIT \ - _IOWR(NILFS_IOCTL_IDENT, 0x89, struct nilfs_wait_cond32) #endif /* CONFIG_COMPAT */ #endif /* _LINUX_NILFS_FS_H */ -- cgit v1.2.3-70-g09d2 From 2c2e52fc4fca251e68f90821c9ff5cb18be4df58 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:54 -0700 Subject: nilfs2: extend nilfs_sustat ioctl struct This adds a new argument to the nilfs_sustat structure. The extended field allows to delete volatile active state of segments, which was needed to protect freshly-created segments from garbage collection but has confused code dealing with segments. This extension alleviates the mess and gives room for further simplifications. The volatile active flag is not persistent, so it's eliminable on this occasion without affecting compatibility other than the ioctl change. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/recovery.c | 32 +++++++++++++++++--------------- fs/nilfs2/segment.c | 39 +++++++++------------------------------ fs/nilfs2/sufile.c | 8 ++++++-- fs/nilfs2/super.c | 4 +++- fs/nilfs2/the_nilfs.c | 18 ------------------ fs/nilfs2/the_nilfs.h | 5 ++--- include/linux/nilfs2_fs.h | 10 ++++------ 7 files changed, 41 insertions(+), 75 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 877dc1ba23f..a4253f34e13 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -416,6 +416,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, struct nilfs_segment_entry *ent, *n; struct inode *sufile = nilfs->ns_sufile; __u64 segnum[4]; + time_t mtime; int err; int i; @@ -442,9 +443,9 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, /* * Collecting segments written after the latest super root. - * These are marked volatile active, and won't be reallocated in - * the next construction. + * These are marked dirty to avoid being reallocated in the next write. */ + mtime = get_seconds(); list_for_each_entry_safe(ent, n, head, list) { if (ent->segnum == segnum[0]) { list_del(&ent->list); @@ -454,17 +455,16 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, err = nilfs_open_segment_entry(ent, sufile); if (unlikely(err)) goto failed; - if (nilfs_segment_usage_clean(ent->raw_su)) { - nilfs_segment_usage_set_volatile_active(ent->raw_su); - /* Keep it open */ - } else { - /* Removing duplicated entries */ - list_del(&ent->list); - nilfs_close_segment_entry(ent, sufile); - nilfs_free_segment_entry(ent); + if (!nilfs_segment_usage_dirty(ent->raw_su)) { + /* make the segment garbage */ + ent->raw_su->su_nblocks = cpu_to_le32(0); + ent->raw_su->su_lastmod = cpu_to_le32(mtime); + nilfs_segment_usage_set_dirty(ent->raw_su); } + list_del(&ent->list); + nilfs_close_segment_entry(ent, sufile); + nilfs_free_segment_entry(ent); } - list_splice_init(head, nilfs->ns_used_segments.prev); /* * The segment having the latest super root is active, and @@ -882,10 +882,12 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, if (scan_newer) ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED; - else if (nilfs->ns_mount_state & NILFS_VALID_FS) - goto super_root_found; - - scan_newer = 1; + else { + nilfs->ns_prot_seq = ssi.seg_seq; + if (nilfs->ns_mount_state & NILFS_VALID_FS) + goto super_root_found; + scan_newer = 1; + } /* reset region for roll-forward */ pseg_start += ssi.nblocks; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 5db12d774a0..24d0fbd4271 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2229,13 +2229,6 @@ static void nilfs_segctor_reactivate_segments(struct nilfs_sc_info *sci, nilfs_segment_usage_set_active(ent->raw_su); nilfs_close_segment_entry(ent, sufile); } - - down_write(&nilfs->ns_sem); - head = &nilfs->ns_used_segments; - list_for_each_entry(ent, head, list) { - nilfs_segment_usage_set_volatile_active(ent->raw_su); - } - up_write(&nilfs->ns_sem); } static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, @@ -2244,7 +2237,6 @@ static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, struct nilfs_segment_buffer *segbuf, *last; struct nilfs_segment_entry *ent; struct inode *sufile = nilfs->ns_sufile; - struct list_head *head; int err; last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); @@ -2265,22 +2257,13 @@ static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, BUG_ON(!buffer_dirty(ent->bh_su)); } - head = &sci->sc_active_segments; - list_for_each_entry(ent, head, list) { + list_for_each_entry(ent, &sci->sc_active_segments, list) { err = nilfs_open_segment_entry(ent, sufile); if (unlikely(err)) goto failed; nilfs_segment_usage_clear_active(ent->raw_su); BUG_ON(!buffer_dirty(ent->bh_su)); } - - down_write(&nilfs->ns_sem); - head = &nilfs->ns_used_segments; - list_for_each_entry(ent, head, list) { - /* clear volatile active for segments of older generations */ - nilfs_segment_usage_clear_volatile_active(ent->raw_su); - } - up_write(&nilfs->ns_sem); return 0; failed: @@ -2304,19 +2287,15 @@ static void nilfs_segctor_bead_completed_segments(struct nilfs_sc_info *sci) } } -static void -__nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) - +static void nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) { - struct nilfs_segment_entry *ent; - - list_splice_init(&sci->sc_active_segments, - nilfs->ns_used_segments.prev); + struct nilfs_segment_entry *ent, *n; - list_for_each_entry(ent, &nilfs->ns_used_segments, list) { - nilfs_segment_usage_set_volatile_active(ent->raw_su); - /* These segments are kept open */ + list_for_each_entry_safe(ent, n, &sci->sc_active_segments, list) { + list_del(&ent->list); + nilfs_close_segment_entry(ent, nilfs->ns_sufile); + nilfs_free_segment_entry(ent); } } @@ -2405,8 +2384,8 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (has_sr) { down_write(&nilfs->ns_sem); nilfs_update_last_segment(sbi, 1); - __nilfs_segctor_commit_deactivate_segments(sci, nilfs); up_write(&nilfs->ns_sem); + nilfs_segctor_commit_deactivate_segments(sci, nilfs); nilfs_segctor_commit_free_segments(sci); nilfs_segctor_clear_metadata_dirty(sci); } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index b3674a8162a..cc714c72b13 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -446,6 +446,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; + struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; void *kaddr; int ret; @@ -460,8 +461,11 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); - sustat->ss_ctime = NILFS_MDT(sufile)->mi_nilfs->ns_ctime; - sustat->ss_nongc_ctime = NILFS_MDT(sufile)->mi_nilfs->ns_nongc_ctime; + sustat->ss_ctime = nilfs->ns_ctime; + sustat->ss_nongc_ctime = nilfs->ns_nongc_ctime; + spin_lock(&nilfs->ns_last_segment_lock); + sustat->ss_prot_seq = nilfs->ns_prot_seq; + spin_unlock(&nilfs->ns_last_segment_lock); kunmap_atomic(kaddr, KM_USER0); brelse(header_bh); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 268b563d215..2f0e9f7bf15 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -262,8 +262,10 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi) printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); else { - nilfs_dispose_used_segments(nilfs); clear_nilfs_discontinued(nilfs); + spin_lock(&nilfs->ns_last_segment_lock); + nilfs->ns_prot_seq = le64_to_cpu(nilfs->ns_sbp->s_last_seq); + spin_unlock(&nilfs->ns_last_segment_lock); } return err; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 69b62558622..661ab762d76 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -71,7 +71,6 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) INIT_LIST_HEAD(&nilfs->ns_supers); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_gc_inodes_h = NULL; - INIT_LIST_HEAD(&nilfs->ns_used_segments); init_rwsem(&nilfs->ns_segctor_sem); return nilfs; @@ -95,7 +94,6 @@ void put_nilfs(struct the_nilfs *nilfs) */ might_sleep(); if (nilfs_loaded(nilfs)) { - nilfs_dispose_used_segments(nilfs); nilfs_mdt_clear(nilfs->ns_sufile); nilfs_mdt_destroy(nilfs->ns_sufile); nilfs_mdt_clear(nilfs->ns_cpfile); @@ -463,22 +461,6 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) return err; } -void nilfs_dispose_used_segments(struct the_nilfs *nilfs) -{ - struct nilfs_segment_entry *ent, *n; - - /* nilfs->sem must be locked by the caller. */ - if (!nilfs_loaded(nilfs)) - return; - - list_for_each_entry_safe(ent, n, &nilfs->ns_used_segments, list) { - list_del_init(&ent->list); - nilfs_segment_usage_clear_volatile_active(ent->raw_su); - nilfs_close_segment_entry(ent, nilfs->ns_sufile); - nilfs_free_segment_entry(ent); - } -} - int nilfs_near_disk_full(struct the_nilfs *nilfs) { struct inode *sufile = nilfs->ns_sufile; diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 75da3730696..af566e78f7a 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -51,7 +51,6 @@ enum { * @ns_writer_refcount: number of referrers on ns_writer * @ns_sbh: buffer head of the on-disk super block * @ns_sbp: pointer to the super block data - * @ns_used_segments: list of full segments in volatile active state * @ns_supers: list of nilfs super block structs * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. @@ -65,6 +64,7 @@ enum { * @ns_last_pseg: start block number of the latest segment * @ns_last_seq: sequence value of the latest segment * @ns_last_cno: checkpoint number of the latest segment + * @ns_prot_seq: least sequence number of segments which must not be reclaimed * @ns_free_segments_count: counter of free segments * @ns_segctor_sem: segment constructor semaphore * @ns_dat: DAT file inode @@ -103,7 +103,6 @@ struct the_nilfs { */ struct buffer_head *ns_sbh; struct nilfs_super_block *ns_sbp; - struct list_head ns_used_segments; unsigned ns_mount_state; struct list_head ns_supers; @@ -132,6 +131,7 @@ struct the_nilfs { sector_t ns_last_pseg; u64 ns_last_seq; __u64 ns_last_cno; + u64 ns_prot_seq; unsigned long ns_free_segments_count; struct rw_semaphore ns_segctor_sem; @@ -188,7 +188,6 @@ void put_nilfs(struct the_nilfs *); int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); -void nilfs_dispose_used_segments(struct the_nilfs *); int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); int nilfs_near_disk_full(struct the_nilfs *); diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 306c446e694..aa93f0ee29d 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -565,8 +565,6 @@ enum { NILFS_SEGMENT_USAGE_DIRTY, NILFS_SEGMENT_USAGE_ERROR, - /* on-memory only */ - NILFS_SEGMENT_USAGE_VOLATILE_ACTIVE, /* ... */ }; @@ -594,7 +592,6 @@ nilfs_segment_usage_##name(const struct nilfs_segment_usage *su) \ NILFS_SEGMENT_USAGE_FNS(ACTIVE, active) NILFS_SEGMENT_USAGE_FNS(DIRTY, dirty) NILFS_SEGMENT_USAGE_FNS(ERROR, error) -NILFS_SEGMENT_USAGE_FNS(VOLATILE_ACTIVE, volatile_active) static inline void nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su) @@ -650,7 +647,6 @@ nilfs_suinfo_##name(const struct nilfs_suinfo *si) \ NILFS_SUINFO_FNS(ACTIVE, active) NILFS_SUINFO_FNS(DIRTY, dirty) NILFS_SUINFO_FNS(ERROR, error) -NILFS_SUINFO_FNS(VOLATILE_ACTIVE, volatile_active) static inline int nilfs_suinfo_clean(const struct nilfs_suinfo *si) { @@ -717,8 +713,9 @@ struct nilfs_cpstat { * @ss_nsegs: number of segments * @ss_ncleansegs: number of clean segments * @ss_ndirtysegs: number of dirty segments - * @ss_ctime: - * @ss_nongc_ctime: + * @ss_ctime: creation time of the last segment + * @ss_nongc_ctime: creation time of the last segment not for GC + * @ss_prot_seq: least sequence number of segments which must not be reclaimed */ struct nilfs_sustat { __u64 ss_nsegs; @@ -726,6 +723,7 @@ struct nilfs_sustat { __u64 ss_ndirtysegs; __u64 ss_ctime; __u64 ss_nongc_ctime; + __u64 ss_prot_seq; }; /** -- cgit v1.2.3-70-g09d2 From 1f5abe7e7dbcd83e73212c6cb135a6106cea6a0b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:55 -0700 Subject: nilfs2: replace BUG_ON and BUG calls triggerable from ioctl Pekka Enberg advised me: > It would be nice if BUG(), BUG_ON(), and panic() calls would be > converted to proper error handling using WARN_ON() calls. The BUG() > call in nilfs_cpfile_delete_checkpoints(), for example, looks to be > triggerable from user-space via the ioctl() system call. This will follow the comment and keep them to a minimum. Acked-by: Pekka Enberg Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/btree.c | 27 +++++++----------- fs/nilfs2/cpfile.c | 38 +++++++++++++------------ fs/nilfs2/dat.c | 15 +++++----- fs/nilfs2/direct.c | 13 +++++++-- fs/nilfs2/inode.c | 19 ++++--------- fs/nilfs2/ioctl.c | 63 ++++++++++++++++++++++++++++-------------- fs/nilfs2/mdt.c | 4 +-- fs/nilfs2/nilfs.h | 1 - fs/nilfs2/page.c | 10 +++---- fs/nilfs2/recovery.c | 3 -- fs/nilfs2/segment.c | 78 ++++++++++++++++------------------------------------ fs/nilfs2/sufile.c | 25 ++++++++++------- fs/nilfs2/super.c | 8 ++++-- 13 files changed, 144 insertions(+), 160 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 53f0d4c31cb..6b37a276729 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -425,7 +425,6 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, index++; out: - BUG_ON(indexp == NULL); *indexp = index; return s == 0; @@ -477,8 +476,6 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, __u64 ptr; int level, index, found, ret; - BUG_ON(minlevel <= NILFS_BTREE_LEVEL_DATA); - node = nilfs_btree_get_root(btree); level = nilfs_btree_node_get_level(btree, node); if ((level < minlevel) || @@ -505,7 +502,7 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, if (index < nilfs_btree_node_nchildren_max(btree, node)) ptr = nilfs_btree_node_get_ptr(btree, node, index); else { - BUG_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); + WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); /* insert */ ptr = NILFS_BMAP_INVALID_PTR; } @@ -1366,7 +1363,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, } else { /* no siblings */ /* the only child of the root node */ - BUG_ON(level != nilfs_btree_height(btree) - 2); + WARN_ON(level != nilfs_btree_height(btree) - 2); if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_shrink; @@ -1543,7 +1540,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, break; case 3: nchildren = nilfs_btree_node_get_nchildren(btree, root); - BUG_ON(nchildren > 1); + WARN_ON(nchildren > 1); ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); ret = nilfs_bmap_get_block(bmap, ptr, &bh); if (ret < 0) @@ -1552,7 +1549,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, break; default: node = NULL; - BUG(); + return -EINVAL; } nchildren = nilfs_btree_node_get_nchildren(btree, node); @@ -1833,14 +1830,13 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) { - BUG_ON(buffer_nilfs_volatile(path[level].bp_bh)); + WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); ret = nilfs_btree_prepare_update_v(btree, path, level); if (ret < 0) goto out; } /* success */ - BUG_ON(maxlevelp == NULL); *maxlevelp = level - 1; return 0; @@ -1909,7 +1905,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, __u64 key; int level, ret; - BUG_ON(!buffer_dirty(bh)); + WARN_ON(!buffer_dirty(bh)); btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(btree); @@ -1928,12 +1924,9 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); if (ret < 0) { - /* BUG_ON(ret == -ENOENT); */ - if (ret == -ENOENT) { + if (unlikely(ret == -ENOENT)) printk(KERN_CRIT "%s: key = %llu, level == %d\n", __func__, (unsigned long long)key, level); - BUG(); - } goto out; } @@ -2117,7 +2110,7 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); goto out; } @@ -2175,12 +2168,12 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); goto out; } ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); goto out; } diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 218b3441850..e90b60dfced 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -40,10 +40,7 @@ nilfs_cpfile_checkpoints_per_block(const struct inode *cpfile) static unsigned long nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno) { - __u64 tcno; - - BUG_ON(cno == 0); /* checkpoint number 0 is invalid */ - tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; + __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); return (unsigned long)tcno; } @@ -96,7 +93,7 @@ nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile, struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); unsigned int count; - BUG_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); + WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); count = le32_to_cpu(cp->cp_checkpoints_count) - n; cp->cp_checkpoints_count = cpu_to_le32(count); return count; @@ -178,6 +175,8 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - No such checkpoint. + * + * %-EINVAL - invalid checkpoint. */ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, __u64 cno, @@ -191,8 +190,9 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, void *kaddr; int ret; - BUG_ON(cno < 1 || cno > nilfs_mdt_cno(cpfile) || - (cno < nilfs_mdt_cno(cpfile) && create)); + if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) || + (cno < nilfs_mdt_cno(cpfile) && create))) + return -EINVAL; down_write(&NILFS_MDT(cpfile)->mi_sem); @@ -288,12 +288,11 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, unsigned long tnicps; int ret, ncps, nicps, count, i; - if ((start == 0) || (start > end)) { - printk(KERN_CRIT "%s: start = %llu, end = %llu\n", - __func__, - (unsigned long long)start, - (unsigned long long)end); - BUG(); + if (unlikely(start == 0 || start > end)) { + printk(KERN_ERR "%s: invalid range of checkpoint numbers: " + "[%llu, %llu)\n", __func__, + (unsigned long long)start, (unsigned long long)end); + return -EINVAL; } /* cannot delete the latest checkpoint */ @@ -323,7 +322,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cno, cp_bh, kaddr); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { - BUG_ON(nilfs_checkpoint_snapshot(cp)); + WARN_ON(nilfs_checkpoint_snapshot(cp)); if (!nilfs_checkpoint_invalid(cp)) { nilfs_checkpoint_set_invalid(cp); nicps++; @@ -393,6 +392,8 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, int n, ret; int ncps, i; + if (cno == 0) + return -ENOENT; /* checkpoint number 0 is invalid */ down_read(&NILFS_MDT(cpfile)->mi_sem); for (n = 0; cno < cur_cno && n < nci; cno += ncps) { @@ -532,9 +533,6 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) ssize_t nci; int ret; - /* checkpoint number 0 is invalid */ - if (cno == 0) - return -ENOENT; nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); if (nci < 0) return nci; @@ -582,6 +580,8 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) void *kaddr; int ret; + if (cno == 0) + return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); @@ -698,6 +698,8 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) void *kaddr; int ret; + if (cno == 0) + return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); @@ -813,6 +815,8 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) void *kaddr; int ret; + if (cno == 0) + return -ENOENT; /* checkpoint number 0 is invalid */ down_read(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 9360920f7d3..bb8a5818e7f 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -135,7 +135,7 @@ int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); return ret; } @@ -157,7 +157,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end), (unsigned long long)le64_to_cpu(entry->de_blocknr)); - BUG(); } entry->de_blocknr = cpu_to_le64(blocknr); kunmap_atomic(kaddr, KM_USER0); @@ -180,7 +179,7 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) ret = nilfs_dat_prepare_entry(dat, req, 0); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); return ret; } @@ -216,7 +215,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, end = start = le64_to_cpu(entry->de_start); if (!dead) { end = nilfs_mdt_cno(dat); - BUG_ON(start > end); + WARN_ON(start > end); } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); @@ -324,14 +323,16 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) return ret; kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); - if (entry->de_blocknr == cpu_to_le64(0)) { + if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, (unsigned long long)vblocknr, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end)); - BUG(); + kunmap_atomic(kaddr, KM_USER0); + brelse(entry_bh); + return -EINVAL; } - BUG_ON(blocknr == 0); + WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); kunmap_atomic(kaddr, KM_USER0); diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index e3ec2485008..c6379e48278 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -210,7 +210,6 @@ static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) if (lastkey == NILFS_DIRECT_KEY_MAX + 1) return -ENOENT; - BUG_ON(keyp == NULL); *keyp = lastkey; return 0; @@ -366,9 +365,17 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, direct = (struct nilfs_direct *)bmap; key = nilfs_bmap_data_get_key(bmap, *bh); - BUG_ON(key > NILFS_DIRECT_KEY_MAX); + if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { + printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, + (unsigned long long)key); + return -EINVAL; + } ptr = nilfs_direct_get_ptr(direct, key); - BUG_ON(ptr == NILFS_BMAP_INVALID_PTR); + if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { + printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, + (unsigned long long)ptr); + return -EINVAL; + } return direct->d_ops->dop_assign(direct, key, ptr, bh, blocknr, binfo); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 4bf1e2c5bac..b6536bb2a32 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -61,12 +61,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, map_bh(bh_result, inode->i_sb, blknum); goto out; } - if (unlikely(ret == 1)) { - printk(KERN_ERR "nilfs_get_block: bmap_lookup returns " - "buffer_head pointer (blkoff=%llu, blknum=%lu)\n", - (unsigned long long)blkoff, blknum); - BUG(); - } /* data block was not found */ if (ret == -ENOENT && create) { struct nilfs_transaction_info ti; @@ -85,14 +79,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, * However, the page having this block must * be locked in this case. */ - printk(KERN_ERR + printk(KERN_WARNING "nilfs_get_block: a race condition " "while inserting a data block. " "(inode number=%lu, file block " "offset=%llu)\n", inode->i_ino, (unsigned long long)blkoff); - BUG(); + err = 0; } else if (err == -EINVAL) { nilfs_error(inode->i_sb, __func__, "broken bmap (inode=%lu)\n", @@ -621,7 +615,6 @@ void nilfs_truncate(struct inode *inode) struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); - int ret; if (!test_bit(NILFS_I_BMAP, &ii->i_state)) return; @@ -630,8 +623,7 @@ void nilfs_truncate(struct inode *inode) blocksize = sb->s_blocksize; blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; - ret = nilfs_transaction_begin(sb, &ti, 0); - BUG_ON(ret); + nilfs_transaction_begin(sb, &ti, 0); /* never fails */ block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); @@ -652,7 +644,6 @@ void nilfs_delete_inode(struct inode *inode) struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); - int err; if (unlikely(is_bad_inode(inode))) { if (inode->i_data.nrpages) @@ -660,8 +651,8 @@ void nilfs_delete_inode(struct inode *inode) clear_inode(inode); return; } - err = nilfs_transaction_begin(sb, &ti, 0); - BUG_ON(err); + nilfs_transaction_begin(sb, &ti, 0); /* never fails */ + if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index cfb27892ffe..108d281ebca 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -489,14 +489,14 @@ nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, ret = nilfs_mdt_mark_block_dirty(dat, bdescs[i].bd_offset); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); return ret; } } else { ret = nilfs_bmap_mark(bmap, bdescs[i].bd_offset, bdescs[i].bd_level); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); return ret; } } @@ -519,7 +519,8 @@ nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); int ret; - BUG_ON(!sbi); + if (unlikely(!sbi)) + return -EROFS; ret = nilfs_segctor_add_segments_to_be_freed( NILFS_SC(sbi), buf, nmembs); nilfs_put_writer(nilfs); @@ -539,6 +540,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, void __user *argp) { struct nilfs_argv argv[5]; + const char *msg; int dir, ret; if (copy_from_user(argv, argp, sizeof(argv))) @@ -546,31 +548,50 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, dir = _IOC_WRITE; ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); - if (ret < 0) - goto out_move_blks; + if (ret < 0) { + msg = "cannot read source blocks"; + goto failed; + } ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); - if (ret < 0) - goto out_del_cps; + if (ret < 0) { + /* + * can safely abort because checkpoints can be removed + * independently. + */ + msg = "cannot delete checkpoints"; + goto failed; + } ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); - if (ret < 0) - goto out_free_vbns; + if (ret < 0) { + /* + * can safely abort because DAT file is updated atomically + * using a copy-on-write technique. + */ + msg = "cannot delete virtual blocks from DAT file"; + goto failed; + } ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); - if (ret < 0) - goto out_free_vbns; + if (ret < 0) { + /* + * can safely abort because the operation is nondestructive. + */ + msg = "cannot mark copying blocks dirty"; + goto failed; + } ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); - if (ret < 0) - goto out_free_segs; - + if (ret < 0) { + /* + * can safely abort because this operation is atomic. + */ + msg = "cannot set segments to be freed"; + goto failed; + } return 0; - out_free_segs: - BUG(); /* XXX: not implemented yet */ - out_free_vbns: - BUG();/* XXX: not implemented yet */ - out_del_cps: - BUG();/* XXX: not implemented yet */ - out_move_blks: + failed: nilfs_remove_all_gcinode(nilfs); + printk(KERN_ERR "NILFS: GC failed during preparation: %s: err=%d\n", + msg, ret); return ret; } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index e0a632b86fe..47dd815433f 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -154,10 +154,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, ret = -EBUSY; goto failed_bh; } - } else { - BUG_ON(mode != READ); + } else /* mode == READ */ lock_buffer(bh); - } if (buffer_uptodate(bh)) { unlock_buffer(bh); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index d08fb1ce501..a7f5bc724e3 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -173,7 +173,6 @@ static inline void nilfs_set_transaction_flag(unsigned int flag) { struct nilfs_transaction_info *ti = current->journal_info; - BUG_ON(!ti); ti->ti_flags |= flag; } diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 7b18be8cd47..1bfbba9c0e9 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -417,7 +417,7 @@ repeat: dpage = find_lock_page(dmap, offset); if (dpage) { /* override existing page on the destination cache */ - BUG_ON(PageDirty(dpage)); + WARN_ON(PageDirty(dpage)); nilfs_copy_page(dpage, page, 0); unlock_page(dpage); page_cache_release(dpage); @@ -427,17 +427,15 @@ repeat: /* move the page to the destination cache */ spin_lock_irq(&smap->tree_lock); page2 = radix_tree_delete(&smap->page_tree, offset); - if (unlikely(page2 != page)) - NILFS_PAGE_BUG(page, "page removal failed " - "(offset=%lu, page2=%p)", - offset, page2); + WARN_ON(page2 != page); + smap->nrpages--; spin_unlock_irq(&smap->tree_lock); spin_lock_irq(&dmap->tree_lock); err = radix_tree_insert(&dmap->page_tree, offset, page); if (unlikely(err < 0)) { - BUG_ON(err == -EEXIST); + WARN_ON(err == -EEXIST); page->mapping = NULL; page_cache_release(page); /* for cache */ } else { diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index a4253f34e13..ef387b19682 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -92,9 +92,6 @@ static int nilfs_warn_segment_error(int err) printk(KERN_WARNING "NILFS warning: No super root in the last segment\n"); break; - case NILFS_SEG_VALID: - default: - BUG(); } return -EINVAL; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 24d0fbd4271..9a87410985b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -334,8 +334,7 @@ static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, { struct nilfs_transaction_info *cur_ti = current->journal_info; - BUG_ON(cur_ti); - BUG_ON(!ti); + WARN_ON(cur_ti); ti->ti_flags = NILFS_TI_WRITER; ti->ti_count = 0; ti->ti_save = cur_ti; @@ -546,8 +545,6 @@ static int nilfs_collect_file_data(struct nilfs_sc_info *sci, { int err; - /* BUG_ON(!buffer_dirty(bh)); */ - /* excluded by scan_dirty_data_buffers() */ err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); if (unlikely(err < 0)) return nilfs_handle_bmap_error(err, __func__, inode, @@ -566,8 +563,6 @@ static int nilfs_collect_file_node(struct nilfs_sc_info *sci, { int err; - /* BUG_ON(!buffer_dirty(bh)); */ - /* excluded by scan_dirty_node_buffers() */ err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); if (unlikely(err < 0)) return nilfs_handle_bmap_error(err, __func__, inode, @@ -579,7 +574,7 @@ static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, struct buffer_head *bh, struct inode *inode) { - BUG_ON(!buffer_dirty(bh)); + WARN_ON(!buffer_dirty(bh)); return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); } @@ -628,7 +623,7 @@ static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, struct buffer_head *bh, struct inode *inode) { - BUG_ON(!buffer_dirty(bh)); + WARN_ON(!buffer_dirty(bh)); return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(struct nilfs_binfo_dat)); } @@ -862,9 +857,9 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) nilfs_mdt_mark_dirty(nilfs->ns_cpfile); nilfs_cpfile_put_checkpoint( nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - } else { - BUG_ON(err == -EINVAL || err == -ENOENT); - } + } else + WARN_ON(err == -EINVAL || err == -ENOENT); + return err; } @@ -879,7 +874,7 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, &raw_cp, &bh_cp); if (unlikely(err)) { - BUG_ON(err == -EINVAL || err == -ENOENT); + WARN_ON(err == -EINVAL || err == -ENOENT); goto failed_ibh; } raw_cp->cp_snapshot_list.ssl_next = 0; @@ -944,7 +939,6 @@ static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed) (struct nilfs_super_root *)bh_sr->b_data; u32 crc; - BUG_ON(NILFS_SR_BYTES > bh_sr->b_size); crc = crc32_le(seed, (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); @@ -1022,8 +1016,7 @@ static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, if (!(ent->flags & NILFS_SLH_FREED)) break; err = nilfs_sufile_cancel_free(sufile, ent->segnum); - BUG_ON(err); - + WARN_ON(err); /* do not happen */ ent->flags &= ~NILFS_SLH_FREED; } } @@ -1472,7 +1465,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, failed: list_for_each_entry_safe(segbuf, n, &list, sb_list) { ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); - BUG_ON(ret); + WARN_ON(ret); /* never fails */ list_del_init(&segbuf->sb_list); nilfs_segbuf_free(segbuf); } @@ -1488,7 +1481,7 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); if (nilfs->ns_nextnum != segbuf->sb_nextnum) { ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); - BUG_ON(ret); + WARN_ON(ret); /* never fails */ } if (segbuf->sb_io_error) { /* Case 1: The first segment failed */ @@ -1504,7 +1497,7 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); - BUG_ON(ret); + WARN_ON(ret); /* never fails */ if (!done && segbuf->sb_io_error) { if (segbuf->sb_segnum != nilfs->ns_nextnum) /* Case 2: extended segment (!= next) failed */ @@ -1558,7 +1551,7 @@ static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, &raw_su, &bh_su); - BUG_ON(ret); /* always succeed because bh_su is dirty */ + WARN_ON(ret); /* always succeed because bh_su is dirty */ live_blocks = segbuf->sb_sum.nblocks + (segbuf->sb_pseg_start - segbuf->sb_fseg_start); raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); @@ -1579,7 +1572,7 @@ static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, &raw_su, &bh_su); - BUG_ON(ret); /* always succeed because bh_su is dirty */ + WARN_ON(ret); /* always succeed because bh_su is dirty */ raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - segbuf->sb_fseg_start); nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); @@ -1587,7 +1580,7 @@ static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, &raw_su, &bh_su); - BUG_ON(ret); /* always succeed */ + WARN_ON(ret); /* always succeed */ raw_su->su_nblocks = 0; nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); @@ -1606,7 +1599,7 @@ static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, list_del_init(&segbuf->sb_list); sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); - BUG_ON(ret); + WARN_ON(ret); nilfs_segbuf_free(segbuf); } } @@ -1923,7 +1916,6 @@ static int nilfs_page_has_uncleared_buffer(struct page *page) static void __nilfs_end_page_io(struct page *page, int err) { - /* BUG_ON(err > 0); */ if (!err) { if (!nilfs_page_buffers_clean(page)) __set_page_dirty_nobuffers(page); @@ -2262,7 +2254,7 @@ static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, if (unlikely(err)) goto failed; nilfs_segment_usage_clear_active(ent->raw_su); - BUG_ON(!buffer_dirty(ent->bh_su)); + WARN_ON(!buffer_dirty(ent->bh_su)); } return 0; @@ -2340,7 +2332,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Avoid empty segment */ if (sci->sc_stage.scnt == NILFS_ST_DONE && NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { - BUG_ON(mode == SC_LSEG_SR); nilfs_segctor_end_construction(sci, nilfs, 1); goto out; } @@ -2479,9 +2470,8 @@ int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, struct inode *sufile = nilfs->ns_sufile; LIST_HEAD(list); __u64 *pnum; - const char *flag_name; size_t i; - int err, err2 = 0; + int err; for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { ent = nilfs_alloc_segment_entry(*pnum); @@ -2495,32 +2485,12 @@ int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, if (unlikely(err)) goto failed; - if (unlikely(le32_to_cpu(ent->raw_su->su_flags) != - (1UL << NILFS_SEGMENT_USAGE_DIRTY))) { - if (nilfs_segment_usage_clean(ent->raw_su)) - flag_name = "clean"; - else if (nilfs_segment_usage_active(ent->raw_su)) - flag_name = "active"; - else if (nilfs_segment_usage_volatile_active( - ent->raw_su)) - flag_name = "volatile active"; - else if (!nilfs_segment_usage_dirty(ent->raw_su)) - flag_name = "non-dirty"; - else - flag_name = "erroneous"; - - printk(KERN_ERR - "NILFS: %s segment is requested to be cleaned " - "(segnum=%llu)\n", - flag_name, (unsigned long long)ent->segnum); - err2 = -EINVAL; - } + if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su))) + printk(KERN_WARNING "NILFS: unused segment is " + "requested to be cleaned (segnum=%llu)\n", + (unsigned long long)ent->segnum); nilfs_close_segment_entry(ent, sufile); } - if (unlikely(err2)) { - err = err2; - goto failed; - } list_splice(&list, sci->sc_cleaning_segments.prev); return 0; @@ -2705,8 +2675,6 @@ struct nilfs_segctor_req { static void nilfs_segctor_accept(struct nilfs_sc_info *sci, struct nilfs_segctor_req *req) { - BUG_ON(!sci); - req->sc_err = req->sb_err = 0; spin_lock(&sci->sc_state_lock); req->seq_accepted = sci->sc_seq_request; @@ -3107,7 +3075,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) if (flag || nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); - BUG_ON(!list_empty(&sci->sc_copied_buffers)); + WARN_ON(!list_empty(&sci->sc_copied_buffers)); if (!list_empty(&sci->sc_dirty_files)) { nilfs_warning(sbi->s_super, __func__, @@ -3120,7 +3088,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) if (!list_empty(&sci->sc_cleaning_segments)) nilfs_dispose_segment_list(&sci->sc_cleaning_segments); - BUG_ON(!list_empty(&sci->sc_segbufs)); + WARN_ON(!list_empty(&sci->sc_segbufs)); if (sci->sc_sketch_inode) { iput(sci->sc_sketch_inode); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index cc714c72b13..4cf47e03a3a 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -231,10 +231,11 @@ int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) kaddr = kmap_atomic(su_bh->b_page, KM_USER0); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); - if (!nilfs_segment_usage_clean(su)) { - printk(KERN_CRIT "%s: segment %llu must be clean\n", + if (unlikely(!nilfs_segment_usage_clean(su))) { + printk(KERN_WARNING "%s: segment %llu must be clean\n", __func__, (unsigned long long)segnum); - BUG(); + kunmap_atomic(kaddr, KM_USER0); + goto out_su_bh; } nilfs_segment_usage_set_dirty(su); kunmap_atomic(kaddr, KM_USER0); @@ -249,11 +250,10 @@ int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); + out_su_bh: brelse(su_bh); - out_header: brelse(header_bh); - out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; @@ -317,7 +317,7 @@ int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs) kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0); su = nilfs_sufile_block_get_segment_usage( sufile, segnum[i], su_bh[i], kaddr); - BUG_ON(nilfs_segment_usage_error(su)); + WARN_ON(nilfs_segment_usage_error(su)); nilfs_segment_usage_set_clean(su); kunmap_atomic(kaddr, KM_USER0); nilfs_mdt_mark_buffer_dirty(su_bh[i]); @@ -385,8 +385,8 @@ int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, int ret; /* segnum is 0 origin */ - BUG_ON(segnum >= nilfs_sufile_get_nsegments(sufile)); - + if (segnum >= nilfs_sufile_get_nsegments(sufile)) + return -EINVAL; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh); if (ret < 0) @@ -515,6 +515,8 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid segment usage number. */ int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) { @@ -524,8 +526,11 @@ int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) void *kaddr; int ret; - BUG_ON(segnum >= nilfs_sufile_get_nsegments(sufile)); - + if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { + printk(KERN_WARNING "%s: invalid segment number: %llu\n", + __func__, (unsigned long long)segnum); + return -EINVAL; + } down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 2f0e9f7bf15..d0639a6aae9 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -841,8 +841,11 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, if (sb->s_flags & MS_RDONLY) { if (nilfs_test_opt(sbi, SNAPSHOT)) { - if (!nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, - sbi->s_snapshot_cno)) { + err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, + sbi->s_snapshot_cno); + if (err < 0) + goto failed_sbi; + if (!err) { printk(KERN_ERR "NILFS: The specified checkpoint is " "not a snapshot " @@ -1163,7 +1166,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, } else { struct nilfs_sb_info *sbi = NILFS_SB(s); - BUG_ON(!sbi || !sbi->s_nilfs); /* * s_umount protects super_block from unmount process; * It covers pointers of nilfs_sb_info and the_nilfs. -- cgit v1.2.3-70-g09d2 From 458c5b0822a669d170fdb7bb16c9145f652ebe06 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:56 -0700 Subject: nilfs2: clean up sketch file The sketch file is a file to mark checkpoints with user data. It was experimentally introduced in the original implementation, and now obsolete. The file was handled differently with regular files; the file size got truncated when a checkpoint was created. This stops the special treatment and will treat it as a regular file. Most users are not affected because mkfs.nilfs2 no longer makes this file. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/nilfs2.txt | 2 -- fs/nilfs2/inode.c | 35 ++------------------------ fs/nilfs2/segment.c | 49 +----------------------------------- fs/nilfs2/segment.h | 8 ------ include/linux/nilfs2_fs.h | 2 -- 5 files changed, 3 insertions(+), 93 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index 3367fc44388..55c4300abfc 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -161,8 +161,6 @@ the following meta data files: 4) Data address translation file -- Maps virtual block numbers to usual (DAT) block numbers. This file serves to make on-disk blocks relocatable. - 5) Sketch file (sketch) -- Keeps read-only data which can be - associated with checkpoints (optional) The following figure shows a typical organization of the logs: diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b6536bb2a32..a1922b17662 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -418,30 +418,6 @@ int nilfs_read_inode_common(struct inode *inode, return 0; } -static int nilfs_read_sketch_inode(struct inode *inode) -{ - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); - int err = 0; - - if (sbi->s_snapshot_cno) { - struct the_nilfs *nilfs = sbi->s_nilfs; - struct buffer_head *bh_cp; - struct nilfs_checkpoint *raw_cp; - - err = nilfs_cpfile_get_checkpoint( - nilfs->ns_cpfile, sbi->s_snapshot_cno, 0, &raw_cp, - &bh_cp); - if (likely(!err)) { - if (!nilfs_checkpoint_sketch(raw_cp)) - inode->i_size = 0; - nilfs_cpfile_put_checkpoint( - nilfs->ns_cpfile, sbi->s_snapshot_cno, bh_cp); - } - inode->i_flags |= S_NOCMTIME; - } - return err; -} - static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, struct inode *inode) { @@ -469,11 +445,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, inode->i_op = &nilfs_file_inode_operations; inode->i_fop = &nilfs_file_operations; inode->i_mapping->a_ops = &nilfs_aops; - if (unlikely(inode->i_ino == NILFS_SKETCH_INO)) { - err = nilfs_read_sketch_inode(inode); - if (unlikely(err)) - goto failed_unmap; - } } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &nilfs_dir_inode_operations; inode->i_fop = &nilfs_dir_operations; @@ -742,8 +713,7 @@ int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); - if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state) || - unlikely(inode->i_ino == NILFS_SKETCH_INO)) + if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) return 0; spin_lock(&sbi->s_inode_lock); @@ -811,7 +781,6 @@ void nilfs_dirty_inode(struct inode *inode) return; } nilfs_transaction_begin(inode->i_sb, &ti, 0); - if (likely(inode->i_ino != NILFS_SKETCH_INO)) - nilfs_mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 9a87410985b..981c34a0cd6 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -67,7 +67,6 @@ enum { NILFS_ST_INIT = 0, NILFS_ST_GC, /* Collecting dirty blocks for GC */ NILFS_ST_FILE, - NILFS_ST_SKETCH, NILFS_ST_IFILE, NILFS_ST_CPFILE, NILFS_ST_SUFILE, @@ -887,8 +886,7 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); - if (sci->sc_sketch_inode && i_size_read(sci->sc_sketch_inode) > 0) - nilfs_checkpoint_set_sketch(raw_cp); + nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); return 0; @@ -923,11 +921,6 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, nilfs_fill_in_file_bmap(ifile, ii); set_bit(NILFS_I_COLLECTED, &ii->i_state); } - if (sci->sc_sketch_inode) { - ii = NILFS_I(sci->sc_sketch_inode); - if (test_bit(NILFS_I_DIRTY, &ii->i_state)) - nilfs_fill_in_file_bmap(ifile, ii); - } } /* @@ -1228,26 +1221,6 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.scnt = NILFS_ST_DONE; return 0; } - sci->sc_stage.scnt++; /* Fall through */ - case NILFS_ST_SKETCH: - if (mode == SC_LSEG_SR && sci->sc_sketch_inode) { - ii = NILFS_I(sci->sc_sketch_inode); - if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { - sci->sc_sketch_inode->i_ctime.tv_sec - = sci->sc_seg_ctime; - sci->sc_sketch_inode->i_mtime.tv_sec - = sci->sc_seg_ctime; - err = nilfs_mark_inode_dirty( - sci->sc_sketch_inode); - if (unlikely(err)) - goto break_or_fail; - } - err = nilfs_segctor_scan_file(sci, - sci->sc_sketch_inode, - &nilfs_sc_file_ops); - if (unlikely(err)) - goto break_or_fail; - } sci->sc_stage.scnt++; sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; /* Fall through */ @@ -2385,13 +2358,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) } while (sci->sc_stage.scnt != NILFS_ST_DONE); - /* Clearing sketch data */ - if (has_sr && sci->sc_sketch_inode) { - if (i_size_read(sci->sc_sketch_inode) == 0) - clear_bit(NILFS_I_DIRTY, - &NILFS_I(sci->sc_sketch_inode)->i_state); - i_size_write(sci->sc_sketch_inode, 0); - } out: nilfs_segctor_destroy_segment_buffers(sci); nilfs_segctor_check_out_files(sci, sbi); @@ -2971,11 +2937,6 @@ static int nilfs_segctor_init(struct nilfs_sc_info *sci, struct nilfs_recovery_info *ri) { int err; - struct inode *inode = nilfs_iget(sci->sc_super, NILFS_SKETCH_INO); - - sci->sc_sketch_inode = IS_ERR(inode) ? NULL : inode; - if (sci->sc_sketch_inode) - i_size_write(sci->sc_sketch_inode, 0); sci->sc_seq_done = sci->sc_seq_request; if (ri) @@ -2987,10 +2948,6 @@ static int nilfs_segctor_init(struct nilfs_sc_info *sci, if (ri) list_splice_init(&sci->sc_active_segments, ri->ri_used_segments.prev); - if (sci->sc_sketch_inode) { - iput(sci->sc_sketch_inode); - sci->sc_sketch_inode = NULL; - } } return err; } @@ -3090,10 +3047,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) WARN_ON(!list_empty(&sci->sc_segbufs)); - if (sci->sc_sketch_inode) { - iput(sci->sc_sketch_inode); - sci->sc_sketch_inode = NULL; - } down_write(&sbi->s_nilfs->ns_segctor_sem); kfree(sci); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 2dd39da9f38..fbd162d7170 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -108,7 +108,6 @@ struct nilfs_segsum_pointer { * @sc_nblk_this_inc: Number of blocks included in the current logical segment * @sc_seg_ctime: Creation time * @sc_flags: Internal flags - * @sc_sketch_inode: Inode of the sketch file * @sc_state_lock: spinlock for sc_state and so on * @sc_state: Segctord state flags * @sc_flush_request: inode bitmap of metadata files to be flushed @@ -158,13 +157,6 @@ struct nilfs_sc_info { unsigned long sc_flags; - /* - * Pointer to an inode of the sketch. - * This pointer is kept only while it contains data. - * We protect it with a semaphore of the segment constructor. - */ - struct inode *sc_sketch_inode; - spinlock_t sc_state_lock; unsigned long sc_state; unsigned long sc_flush_request; diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index aa93f0ee29d..e9c84aa4a8e 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -494,7 +494,6 @@ nilfs_checkpoint_##name(const struct nilfs_checkpoint *cp) \ NILFS_CHECKPOINT_FNS(SNAPSHOT, snapshot) NILFS_CHECKPOINT_FNS(INVALID, invalid) -NILFS_CHECKPOINT_FNS(SKETCH, sketch) /** * struct nilfs_cpinfo - checkpoint information @@ -527,7 +526,6 @@ nilfs_cpinfo_##name(const struct nilfs_cpinfo *cpinfo) \ NILFS_CPINFO_FNS(SNAPSHOT, snapshot) NILFS_CPINFO_FNS(INVALID, invalid) -NILFS_CPINFO_FNS(SKETCH, sketch) /** -- cgit v1.2.3-70-g09d2 From c96fa464a567a2a8796009af0e79bc68af73f485 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:57 -0700 Subject: nilfs2: mark minor flag for checkpoint created by internal operation Nilfs creates checkpoints even for garbage collection or metadata updates such as checkpoint mode change. So, user often sees checkpoints created only by such internal operations. This is inconvenient in some situations. For example, application that monitors checkpoints and changes them to snapshots, will fall into an infinite loop because it cannot distinguish internally created checkpoints. This patch solves this sort of problem by adding a flag to checkpoint for identification. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/segment.c | 9 +++++++++ fs/nilfs2/segment.h | 3 +++ include/linux/nilfs2_fs.h | 3 +++ 3 files changed, 15 insertions(+) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 981c34a0cd6..2879704509f 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -462,6 +462,9 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, sci->sc_binfo_ptr = sci->sc_finfo_ptr; nilfs_segctor_map_segsum_entry( sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); + + if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) + set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); /* skip finfo */ } @@ -887,6 +890,11 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); + if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) + nilfs_checkpoint_clear_minor(raw_cp); + else + nilfs_checkpoint_set_minor(raw_cp); + nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); return 0; @@ -2091,6 +2099,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, segbuf->sb_sum.seg_seq, nilfs->ns_cno); + clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); } else diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index fbd162d7170..bb7d417fec6 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -185,6 +185,9 @@ enum { NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */ NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a checkpoint */ + NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files + other than DAT, cpfile, sufile, or files + moved by GC */ }; /* sc_state */ diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index e9c84aa4a8e..cbce6647f7f 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -470,6 +470,7 @@ enum { NILFS_CHECKPOINT_SNAPSHOT, NILFS_CHECKPOINT_INVALID, NILFS_CHECKPOINT_SKETCH, + NILFS_CHECKPOINT_MINOR, }; #define NILFS_CHECKPOINT_FNS(flag, name) \ @@ -494,6 +495,7 @@ nilfs_checkpoint_##name(const struct nilfs_checkpoint *cp) \ NILFS_CHECKPOINT_FNS(SNAPSHOT, snapshot) NILFS_CHECKPOINT_FNS(INVALID, invalid) +NILFS_CHECKPOINT_FNS(MINOR, minor) /** * struct nilfs_cpinfo - checkpoint information @@ -526,6 +528,7 @@ nilfs_cpinfo_##name(const struct nilfs_cpinfo *cpinfo) \ NILFS_CPINFO_FNS(SNAPSHOT, snapshot) NILFS_CPINFO_FNS(INVALID, invalid) +NILFS_CPINFO_FNS(MINOR, minor) /** -- cgit v1.2.3-70-g09d2 From cece552074c591970353ad48308d65f110aeaf28 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:58 -0700 Subject: nilfs2: simplify handling of active state of segments will reduce some lines of segment constructor. Previously, the state was complexly controlled through a list of segments in order to keep consistency in meta data of usage state of segments. Instead, this presents ``calculated'' active flags to userland cleaner program and stop maintaining its real flag on disk. Only by this fake flag, the cleaner cannot exactly know if each segment is reclaimable or not. However, the recent extension of nilfs_sustat ioctl struct (nilfs2-extend-nilfs_sustat-ioctl-struct.patch) can prevent the cleaner from reclaiming in-use segment wrongly. So, now I can apply this for simplification. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/recovery.c | 12 +--- fs/nilfs2/segbuf.c | 24 +------ fs/nilfs2/segbuf.h | 6 +- fs/nilfs2/segment.c | 173 ++++---------------------------------------------- fs/nilfs2/segment.h | 5 +- fs/nilfs2/sufile.c | 8 ++- fs/nilfs2/super.c | 4 +- fs/nilfs2/the_nilfs.h | 5 ++ 8 files changed, 29 insertions(+), 208 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index ef387b19682..6ab4c8fc5e9 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -463,16 +463,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, nilfs_free_segment_entry(ent); } - /* - * The segment having the latest super root is active, and - * should be deactivated on the next construction for recovery. - */ - err = -ENOMEM; - ent = nilfs_alloc_segment_entry(segnum[0]); - if (unlikely(!ent)) - goto failed; - list_add_tail(&ent->list, &ri->ri_used_segments); - /* Allocate new segments for recovery */ err = nilfs_sufile_alloc(sufile, &segnum[0]); if (unlikely(err)) @@ -757,7 +747,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, goto failed; } - err = nilfs_attach_segment_constructor(sbi, ri); + err = nilfs_attach_segment_constructor(sbi); if (unlikely(err)) goto failed; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 3d3ea8351f6..1e68821b4a9 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -64,27 +64,17 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) INIT_LIST_HEAD(&segbuf->sb_list); INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); INIT_LIST_HEAD(&segbuf->sb_payload_buffers); - segbuf->sb_segent = NULL; return segbuf; } void nilfs_segbuf_free(struct nilfs_segment_buffer *segbuf) { - struct nilfs_segment_entry *ent = segbuf->sb_segent; - - if (ent != NULL && list_empty(&ent->list)) { - /* free isolated segment list head */ - nilfs_free_segment_entry(segbuf->sb_segent); - segbuf->sb_segent = NULL; - } kmem_cache_free(nilfs_segbuf_cachep, segbuf); } -int nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, +void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, unsigned long offset, struct the_nilfs *nilfs) { - struct nilfs_segment_entry *ent; - segbuf->sb_segnum = segnum; nilfs_get_segment_range(nilfs, segnum, &segbuf->sb_fseg_start, &segbuf->sb_fseg_end); @@ -92,18 +82,6 @@ int nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, segbuf->sb_pseg_start = segbuf->sb_fseg_start + offset; segbuf->sb_rest_blocks = segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; - - /* Attach a segment list head */ - ent = segbuf->sb_segent; - if (ent == NULL) { - segbuf->sb_segent = nilfs_alloc_segment_entry(segnum); - if (unlikely(!segbuf->sb_segent)) - return -ENOMEM; - } else { - BUG_ON(ent->bh_su || !list_empty(&ent->list)); - ent->segnum = segnum; - } - return 0; } void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 25f2a5faa48..0c3076f4e59 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -68,7 +68,6 @@ struct nilfs_segsum_info { * struct nilfs_segment_buffer - Segment buffer * @sb_super: back pointer to a superblock struct * @sb_list: List head to chain this structure - * @sb_segent: Pointer for attaching a segment entry * @sb_sum: On-memory segment summary * @sb_segnum: Index number of the full segment * @sb_nextnum: Index number of the next full segment @@ -83,7 +82,6 @@ struct nilfs_segsum_info { struct nilfs_segment_buffer { struct super_block *sb_super; struct list_head sb_list; - struct nilfs_segment_entry *sb_segent; /* Segment information */ struct nilfs_segsum_info sb_sum; @@ -125,8 +123,8 @@ int __init nilfs_init_segbuf_cache(void); void nilfs_destroy_segbuf_cache(void); struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); void nilfs_segbuf_free(struct nilfs_segment_buffer *); -int nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, - struct the_nilfs *); +void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, + struct the_nilfs *); void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, struct the_nilfs *); int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2879704509f..e43558d50e7 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1304,25 +1304,6 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) return err; } -static int nilfs_segctor_terminate_segment(struct nilfs_sc_info *sci, - struct nilfs_segment_buffer *segbuf, - struct inode *sufile) -{ - struct nilfs_segment_entry *ent = segbuf->sb_segent; - int err; - - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - return err; - nilfs_mdt_mark_buffer_dirty(ent->bh_su); - nilfs_mdt_mark_dirty(sufile); - nilfs_close_segment_entry(ent, sufile); - - list_add_tail(&ent->list, &sci->sc_active_segments); - segbuf->sb_segent = NULL; - return 0; -} - static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) { struct buffer_head *bh_su; @@ -1342,7 +1323,6 @@ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_segment_buffer *segbuf, *n; - struct inode *sufile = nilfs->ns_sufile; __u64 nextnum; int err; @@ -1354,28 +1334,22 @@ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, } else segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); - err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, - nilfs->ns_pseg_offset, nilfs); - if (unlikely(err)) - return err; + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, + nilfs); if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { - err = nilfs_segctor_terminate_segment(sci, segbuf, sufile); - if (unlikely(err)) - return err; - nilfs_shift_to_next_segment(nilfs); - err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); } sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; - err = nilfs_touch_segusage(sufile, segbuf->sb_segnum); + err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); if (unlikely(err)) return err; if (nilfs->ns_segnum == nilfs->ns_nextnum) { /* Start from the head of a new full segment */ - err = nilfs_sufile_alloc(sufile, &nextnum); + err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); if (unlikely(err)) return err; } else @@ -1390,7 +1364,7 @@ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, list_del_init(&segbuf->sb_list); nilfs_segbuf_free(segbuf); } - return err; + return 0; } static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, @@ -1421,10 +1395,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, goto failed; /* map this buffer to region of segment on-disk */ - err = nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); - if (unlikely(err)) - goto failed_segbuf; - + nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; /* allocate the next next full segment */ @@ -2177,102 +2148,6 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, spin_unlock(&sbi->s_inode_lock); } -/* - * Nasty routines to manipulate active flags on sufile. - * These would be removed in a future release. - */ -static void nilfs_segctor_reactivate_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) -{ - struct nilfs_segment_buffer *segbuf, *last; - struct nilfs_segment_entry *ent, *n; - struct inode *sufile = nilfs->ns_sufile; - struct list_head *head; - - last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); - nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { - ent = segbuf->sb_segent; - if (!ent) - break; /* ignore unmapped segments (should check it?)*/ - nilfs_segment_usage_set_active(ent->raw_su); - nilfs_close_segment_entry(ent, sufile); - } - - head = &sci->sc_active_segments; - list_for_each_entry_safe(ent, n, head, list) { - nilfs_segment_usage_set_active(ent->raw_su); - nilfs_close_segment_entry(ent, sufile); - } -} - -static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) -{ - struct nilfs_segment_buffer *segbuf, *last; - struct nilfs_segment_entry *ent; - struct inode *sufile = nilfs->ns_sufile; - int err; - - last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); - nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { - /* - * Deactivate ongoing full segments. The last segment is kept - * active because it is a start point of recovery, and is not - * relocatable until the super block points to a newer - * checkpoint. - */ - ent = segbuf->sb_segent; - if (!ent) - break; /* ignore unmapped segments (should check it?)*/ - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - nilfs_segment_usage_clear_active(ent->raw_su); - BUG_ON(!buffer_dirty(ent->bh_su)); - } - - list_for_each_entry(ent, &sci->sc_active_segments, list) { - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - nilfs_segment_usage_clear_active(ent->raw_su); - WARN_ON(!buffer_dirty(ent->bh_su)); - } - return 0; - - failed: - nilfs_segctor_reactivate_segments(sci, nilfs); - return err; -} - -static void nilfs_segctor_bead_completed_segments(struct nilfs_sc_info *sci) -{ - struct nilfs_segment_buffer *segbuf, *last; - struct nilfs_segment_entry *ent; - - /* move each segbuf->sb_segent to the list of used active segments */ - last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); - nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { - ent = segbuf->sb_segent; - if (!ent) - break; /* ignore unmapped segments (should check it?)*/ - list_add_tail(&ent->list, &sci->sc_active_segments); - segbuf->sb_segent = NULL; - } -} - -static void nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) -{ - struct nilfs_segment_entry *ent, *n; - - list_for_each_entry_safe(ent, n, &sci->sc_active_segments, list) { - list_del(&ent->list); - nilfs_close_segment_entry(ent, nilfs->ns_sufile); - nilfs_free_segment_entry(ent); - } -} - /* * Main procedure of segment constructor */ @@ -2322,11 +2197,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) goto failed; - if (has_sr) { - err = nilfs_segctor_deactivate_segments(sci, nilfs); - if (unlikely(err)) - goto failed; - } if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); @@ -2353,12 +2223,10 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_complete_write(sci); /* Commit segments */ - nilfs_segctor_bead_completed_segments(sci); if (has_sr) { down_write(&nilfs->ns_sem); nilfs_update_last_segment(sbi, 1); up_write(&nilfs->ns_sem); - nilfs_segctor_commit_deactivate_segments(sci, nilfs); nilfs_segctor_commit_free_segments(sci); nilfs_segctor_clear_metadata_dirty(sci); } @@ -2379,8 +2247,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) failed_to_make_up: if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) nilfs_redirty_inodes(&sci->sc_dirty_files); - if (has_sr) - nilfs_segctor_reactivate_segments(sci, nilfs); failed: if (nilfs_doing_gc()) @@ -2942,23 +2808,11 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) } } -static int nilfs_segctor_init(struct nilfs_sc_info *sci, - struct nilfs_recovery_info *ri) +static int nilfs_segctor_init(struct nilfs_sc_info *sci) { - int err; - sci->sc_seq_done = sci->sc_seq_request; - if (ri) - list_splice_init(&ri->ri_used_segments, - sci->sc_active_segments.prev); - err = nilfs_segctor_start_thread(sci); - if (err) { - if (ri) - list_splice_init(&sci->sc_active_segments, - ri->ri_used_segments.prev); - } - return err; + return nilfs_segctor_start_thread(sci); } /* @@ -2982,7 +2836,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_gc_inodes); - INIT_LIST_HEAD(&sci->sc_active_segments); INIT_LIST_HEAD(&sci->sc_cleaning_segments); INIT_LIST_HEAD(&sci->sc_copied_buffers); @@ -3048,8 +2901,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) "dirty file(s) after the final construction\n"); nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); } - if (!list_empty(&sci->sc_active_segments)) - nilfs_dispose_segment_list(&sci->sc_active_segments); if (!list_empty(&sci->sc_cleaning_segments)) nilfs_dispose_segment_list(&sci->sc_cleaning_segments); @@ -3064,7 +2915,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) /** * nilfs_attach_segment_constructor - attach a segment constructor * @sbi: nilfs_sb_info - * @ri: nilfs_recovery_info * * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, * initilizes it, and starts the segment constructor. @@ -3074,8 +2924,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * * %-ENOMEM - Insufficient memory available. */ -int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, - struct nilfs_recovery_info *ri) +int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; @@ -3087,7 +2936,7 @@ int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, return -ENOMEM; nilfs_attach_writer(nilfs, sbi); - err = nilfs_segctor_init(NILFS_SC(sbi), ri); + err = nilfs_segctor_init(NILFS_SC(sbi)); if (err) { nilfs_detach_writer(nilfs, sbi); kfree(sbi->s_sc_info); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index bb7d417fec6..4a64eb82f1f 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -90,7 +90,6 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written - * @sc_active_segments: List of active segments that were already written out * @sc_cleaning_segments: List of segments to be freed through construction * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -132,7 +131,6 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; - struct list_head sc_active_segments; struct list_head sc_cleaning_segments; struct list_head sc_copied_buffers; @@ -232,8 +230,7 @@ extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, __u64 *, size_t); extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *); -extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *, - struct nilfs_recovery_info *); +extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); /* recovery.c */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 4cf47e03a3a..c774cf397e2 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -158,7 +158,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) if (!nilfs_segment_usage_clean(su)) continue; /* found a clean segment */ - nilfs_segment_usage_set_active(su); nilfs_segment_usage_set_dirty(su); kunmap_atomic(kaddr, KM_USER0); @@ -591,6 +590,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh; struct nilfs_segment_usage *su; size_t susz = NILFS_MDT(sufile)->mi_entry_size; + struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -623,7 +623,11 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, for (j = 0; j < n; j++, su = (void *)su + susz) { si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); - si[i + j].sui_flags = le32_to_cpu(su->su_flags); + si[i + j].sui_flags = le32_to_cpu(su->su_flags) & + ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); + if (nilfs_segment_is_active(nilfs, segnum + i + j)) + si[i + j].sui_flags |= + (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } kunmap_atomic(kaddr, KM_USER0); brelse(su_bh); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index b7519c327ba..ef31e9a51c8 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -868,7 +868,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, } if (!(sb->s_flags & MS_RDONLY)) { - err = nilfs_attach_segment_constructor(sbi, NULL); + err = nilfs_attach_segment_constructor(sbi); if (err) goto failed_checkpoint; } @@ -1001,7 +1001,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) nilfs_clear_opt(sbi, SNAPSHOT); sbi->s_snapshot_cno = 0; - err = nilfs_attach_segment_constructor(sbi, NULL); + err = nilfs_attach_segment_constructor(sbi); if (err) goto rw_remount_failed; diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index af566e78f7a..d750e48257c 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -280,4 +280,9 @@ static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs) return cno; } +static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n) +{ + return n == nilfs->ns_segnum || n == nilfs->ns_nextnum; +} + #endif /* _THE_NILFS_H */ -- cgit v1.2.3-70-g09d2 From e339ad31f59925b48a92ee3947692fdf9758b8c7 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:59 -0700 Subject: nilfs2: introduce secondary super block The former versions didn't have extra super blocks. This improves the weak point by introducing another super block at unused region in tail of the partition. This doesn't break disk format compatibility; older versions just ingore the secondary super block, and new versions just recover it if it doesn't exist. The partition created by an old mkfs may not have unused region, but in that case, the secondary super block will not be added. This doesn't make more redundant copies of the super block; it is a future work. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/nilfs.h | 7 +- fs/nilfs2/recovery.c | 1 - fs/nilfs2/segment.c | 8 +- fs/nilfs2/segment.h | 2 - fs/nilfs2/super.c | 229 +++++++++++++++++++--------------------------- fs/nilfs2/the_nilfs.c | 180 +++++++++++++++++++++++++++++++----- fs/nilfs2/the_nilfs.h | 18 +++- include/linux/nilfs2_fs.h | 4 + 8 files changed, 274 insertions(+), 175 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index a7f5bc724e3..19af5ab8627 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -275,13 +275,10 @@ extern void nilfs_error(struct super_block *, const char *, const char *, ...) extern void nilfs_warning(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern struct nilfs_super_block * -nilfs_load_super_block(struct super_block *, struct buffer_head **); -extern struct nilfs_super_block * -nilfs_reload_super_block(struct super_block *, struct buffer_head **, int); +nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, struct nilfs_super_block *, char *); -extern void nilfs_update_last_segment(struct nilfs_sb_info *, int); -extern int nilfs_commit_super(struct nilfs_sb_info *); +extern int nilfs_commit_super(struct nilfs_sb_info *, int); extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 6ab4c8fc5e9..6ade0963fc1 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -870,7 +870,6 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, if (scan_newer) ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED; else { - nilfs->ns_prot_seq = ssi.seg_seq; if (nilfs->ns_mount_state & NILFS_VALID_FS) goto super_root_found; scan_newer = 1; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index e43558d50e7..fb70ec3be20 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2068,7 +2068,8 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) if (update_sr) { nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, - segbuf->sb_sum.seg_seq, nilfs->ns_cno); + segbuf->sb_sum.seg_seq, nilfs->ns_cno++); + sbi->s_super->s_dirt = 1; clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); @@ -2224,9 +2225,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Commit segments */ if (has_sr) { - down_write(&nilfs->ns_sem); - nilfs_update_last_segment(sbi, 1); - up_write(&nilfs->ns_sem); nilfs_segctor_commit_free_segments(sci); nilfs_segctor_clear_metadata_dirty(sci); } @@ -2564,7 +2562,7 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && nilfs_discontinued(nilfs)) { down_write(&nilfs->ns_sem); - req->sb_err = nilfs_commit_super(sbi); + req->sb_err = nilfs_commit_super(sbi, 0); up_write(&nilfs->ns_sem); } } diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 4a64eb82f1f..a98fc1ed0bb 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -206,8 +206,6 @@ enum { logical segment with a super root */ #define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root creation */ -#define NILFS_SC_DEFAULT_SB_FREQ 30 /* Minimum interval of periodical - update of superblock (reserved) */ /* * The default threshold amount of data, in block counts. diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index ef31e9a51c8..e2ced824c62 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -103,8 +103,9 @@ void nilfs_error(struct super_block *sb, const char *function, down_write(&nilfs->ns_sem); if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { nilfs->ns_mount_state |= NILFS_ERROR_FS; - nilfs->ns_sbp->s_state |= cpu_to_le16(NILFS_ERROR_FS); - nilfs_commit_super(sbi); + nilfs->ns_sbp[0]->s_state |= + cpu_to_le16(NILFS_ERROR_FS); + nilfs_commit_super(sbi, 1); } up_write(&nilfs->ns_sem); @@ -208,90 +209,106 @@ static void nilfs_clear_inode(struct inode *inode) nilfs_btnode_cache_clear(&ii->i_btnode_cache); } -/** - * nilfs_update_last_segment - change pointer to the latest segment - * @sbi: nilfs_sb_info - * @update_cno: flag whether to update checkpoint number. - * - * nilfs_update_last_segment() changes information in the super block - * after a partial segment is written out successfully. The super - * block is marked dirty. It will be written out at the next VFS sync - * operations such as sync_supers() and generic_shutdown_super(). - */ -void nilfs_update_last_segment(struct nilfs_sb_info *sbi, int update_cno) -{ - struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block *sbp = nilfs->ns_sbp; - - /* nilfs->sem must be locked by the caller. */ - spin_lock(&nilfs->ns_last_segment_lock); - if (update_cno) - nilfs->ns_last_cno = nilfs->ns_cno++; - sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); - sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); - sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); - spin_unlock(&nilfs->ns_last_segment_lock); - - sbi->s_super->s_dirt = 1; /* must be set if delaying the call of - nilfs_commit_super() */ -} - -static int nilfs_sync_super(struct nilfs_sb_info *sbi) +static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; int barrier_done = 0; if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh); + set_buffer_ordered(nilfs->ns_sbh[0]); barrier_done = 1; } retry: - set_buffer_dirty(nilfs->ns_sbh); - err = sync_dirty_buffer(nilfs->ns_sbh); + set_buffer_dirty(nilfs->ns_sbh[0]); + err = sync_dirty_buffer(nilfs->ns_sbh[0]); if (err == -EOPNOTSUPP && barrier_done) { nilfs_warning(sbi->s_super, __func__, "barrier-based sync failed. " "disabling barriers\n"); nilfs_clear_opt(sbi, BARRIER); barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh); + clear_buffer_ordered(nilfs->ns_sbh[0]); goto retry; } - if (unlikely(err)) + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); - else { + if (err == -EIO && nilfs->ns_sbh[1]) { + nilfs_fall_back_super_block(nilfs); + goto retry; + } + } else { + struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; + + /* + * The latest segment becomes trailable from the position + * written in superblock. + */ clear_nilfs_discontinued(nilfs); - spin_lock(&nilfs->ns_last_segment_lock); - nilfs->ns_prot_seq = le64_to_cpu(nilfs->ns_sbp->s_last_seq); - spin_unlock(&nilfs->ns_last_segment_lock); + + /* update GC protection for recent segments */ + if (nilfs->ns_sbh[1]) { + sbp = NULL; + if (dupsb) { + set_buffer_dirty(nilfs->ns_sbh[1]); + if (!sync_dirty_buffer(nilfs->ns_sbh[1])) + sbp = nilfs->ns_sbp[1]; + } + } + if (sbp) { + spin_lock(&nilfs->ns_last_segment_lock); + nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); + spin_unlock(&nilfs->ns_last_segment_lock); + } } return err; } -int nilfs_commit_super(struct nilfs_sb_info *sbi) +int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) { struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block *sbp = nilfs->ns_sbp; + struct nilfs_super_block **sbp = nilfs->ns_sbp; sector_t nfreeblocks; + time_t t; int err; /* nilfs->sem must be locked by the caller. */ + if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) { + if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC) + nilfs_swap_super_block(nilfs); + else { + printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", + sbi->s_super->s_id); + return -EIO; + } + } err = nilfs_count_free_blocks(nilfs, &nfreeblocks); if (unlikely(err)) { printk(KERN_ERR "NILFS: failed to count free blocks\n"); return err; } - sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); - sbp->s_wtime = cpu_to_le64(get_seconds()); - sbp->s_sum = 0; - sbp->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, - (unsigned char *)sbp, - le16_to_cpu(sbp->s_bytes))); + spin_lock(&nilfs->ns_last_segment_lock); + sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); + sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); + sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); + spin_unlock(&nilfs->ns_last_segment_lock); + + t = get_seconds(); + nilfs->ns_sbwtime[0] = t; + sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks); + sbp[0]->s_wtime = cpu_to_le64(t); + sbp[0]->s_sum = 0; + sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, + (unsigned char *)sbp[0], + nilfs->ns_sbsize)); + if (dupsb && sbp[1]) { + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + nilfs->ns_sbwtime[1] = t; + } sbi->s_super->s_dirt = 0; - return nilfs_sync_super(sbi); + return nilfs_sync_super(sbi, dupsb); } static void nilfs_put_super(struct super_block *sb) @@ -303,8 +320,8 @@ static void nilfs_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { down_write(&nilfs->ns_sem); - nilfs->ns_sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); - nilfs_commit_super(sbi); + nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); + nilfs_commit_super(sbi, 1); up_write(&nilfs->ns_sem); } @@ -330,7 +347,7 @@ static void nilfs_put_super(struct super_block *sb) * 2. down_write(&nilfs->ns_sem) * * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer - * of the super block (nilfs->ns_sbp). + * of the super block (nilfs->ns_sbp[]). * * In most cases, VFS functions call lock_super() before calling these * methods. So we must be careful not to bring on deadlocks when using @@ -346,8 +363,19 @@ static void nilfs_write_super(struct super_block *sb) struct the_nilfs *nilfs = sbi->s_nilfs; down_write(&nilfs->ns_sem); - if (!(sb->s_flags & MS_RDONLY)) - nilfs_commit_super(sbi); + if (!(sb->s_flags & MS_RDONLY)) { + struct nilfs_super_block **sbp = nilfs->ns_sbp; + u64 t = get_seconds(); + int dupsb; + + if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] && + t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) { + up_write(&nilfs->ns_sem); + return; + } + dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; + nilfs_commit_super(sbi, dupsb); + } sb->s_dirt = 0; up_write(&nilfs->ns_sem); } @@ -436,7 +464,7 @@ static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) down_write(&nilfs->ns_sem); if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { nilfs->ns_mount_state |= NILFS_VALID_FS; - err = nilfs_commit_super(sbi); + err = nilfs_commit_super(sbi, 1); if (likely(!err)) printk(KERN_INFO "NILFS: recovery complete.\n"); } @@ -652,7 +680,7 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi, static int nilfs_setup_super(struct nilfs_sb_info *sbi) { struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block *sbp = nilfs->ns_sbp; + struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); int mnt_count = le16_to_cpu(sbp->s_mnt_count); @@ -674,88 +702,29 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); sbp->s_mtime = cpu_to_le64(get_seconds()); - return nilfs_commit_super(sbi); + return nilfs_commit_super(sbi, 1); } -struct nilfs_super_block * -nilfs_load_super_block(struct super_block *sb, struct buffer_head **pbh) +struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, + u64 pos, int blocksize, + struct buffer_head **pbh) { - int blocksize; - unsigned long offset, sb_index; - - /* - * Adjusting block size - * Blocksize will be enlarged when it is smaller than hardware - * sector size. - * Disk format of superblock does not change. - */ - blocksize = sb_min_blocksize(sb, BLOCK_SIZE); - if (!blocksize) { - printk(KERN_ERR - "NILFS: unable to set blocksize of superblock\n"); - return NULL; - } - sb_index = NILFS_SB_OFFSET_BYTES / blocksize; - offset = NILFS_SB_OFFSET_BYTES % blocksize; + unsigned long long sb_index = pos; + unsigned long offset; + offset = do_div(sb_index, blocksize); *pbh = sb_bread(sb, sb_index); - if (!*pbh) { - printk(KERN_ERR "NILFS: unable to read superblock\n"); + if (!*pbh) return NULL; - } return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } -struct nilfs_super_block * -nilfs_reload_super_block(struct super_block *sb, struct buffer_head **pbh, - int blocksize) -{ - struct nilfs_super_block *sbp; - unsigned long offset, sb_index; - int hw_blocksize = bdev_hardsect_size(sb->s_bdev); - - if (blocksize < hw_blocksize) { - printk(KERN_ERR - "NILFS: blocksize %d too small for device " - "(sector-size = %d).\n", - blocksize, hw_blocksize); - goto failed_sbh; - } - brelse(*pbh); - sb_set_blocksize(sb, blocksize); - - sb_index = NILFS_SB_OFFSET_BYTES / blocksize; - offset = NILFS_SB_OFFSET_BYTES % blocksize; - - *pbh = sb_bread(sb, sb_index); - if (!*pbh) { - printk(KERN_ERR - "NILFS: cannot read superblock on 2nd try.\n"); - goto failed; - } - - sbp = (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); - if (sbp->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { - printk(KERN_ERR - "NILFS: !? Magic mismatch on 2nd try.\n"); - goto failed_sbh; - } - return sbp; - - failed_sbh: - brelse(*pbh); - - failed: - return NULL; -} - int nilfs_store_magic_and_option(struct super_block *sb, struct nilfs_super_block *sbp, char *data) { struct nilfs_sb_info *sbi = NILFS_SB(sb); - /* trying to fill super (1st stage) */ sb->s_magic = le16_to_cpu(sbp->s_magic); /* FS independent flags */ @@ -763,11 +732,6 @@ int nilfs_store_magic_and_option(struct super_block *sb, sb->s_flags |= MS_NOATIME; #endif - if (sb->s_magic != NILFS_SUPER_MAGIC) { - printk("NILFS: Can't find nilfs on dev %s.\n", sb->s_id); - return -EINVAL; - } - nilfs_set_default_options(sbi, sbp); sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); @@ -775,10 +739,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, sbi->s_interval = le32_to_cpu(sbp->s_c_interval); sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); - if (!parse_options(data, sb)) - return -EINVAL; - - return 0; + return !parse_options(data, sb) ? -EINVAL : 0 ; } /** @@ -967,12 +928,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) * the RDONLY flag and then mark the partition as valid again. */ down_write(&nilfs->ns_sem); - sbp = nilfs->ns_sbp; + sbp = nilfs->ns_sbp[0]; if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && (nilfs->ns_mount_state & NILFS_VALID_FS)) sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); sbp->s_mtime = cpu_to_le64(get_seconds()); - nilfs_commit_super(sbi); + nilfs_commit_super(sbi, 1); up_write(&nilfs->ns_sem); } else { /* diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 661ab762d76..33400cf0bbe 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "nilfs.h" #include "segment.h" #include "alloc.h" @@ -105,7 +106,8 @@ void put_nilfs(struct the_nilfs *nilfs) } if (nilfs_init(nilfs)) { nilfs_destroy_gccache(nilfs); - brelse(nilfs->ns_sbh); + brelse(nilfs->ns_sbh[0]); + brelse(nilfs->ns_sbh[1]); } kfree(nilfs); } @@ -115,6 +117,7 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, { struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; + struct nilfs_super_block **sbp = nilfs->ns_sbp; unsigned dat_entry_size, segment_usage_size, checkpoint_size; unsigned inode_size; int err; @@ -124,9 +127,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, return err; down_read(&nilfs->ns_sem); - dat_entry_size = le16_to_cpu(nilfs->ns_sbp->s_dat_entry_size); - checkpoint_size = le16_to_cpu(nilfs->ns_sbp->s_checkpoint_size); - segment_usage_size = le16_to_cpu(nilfs->ns_sbp->s_segment_usage_size); + dat_entry_size = le16_to_cpu(sbp[0]->s_dat_entry_size); + checkpoint_size = le16_to_cpu(sbp[0]->s_checkpoint_size); + segment_usage_size = le16_to_cpu(sbp[0]->s_segment_usage_size); up_read(&nilfs->ns_sem); inode_size = nilfs->ns_inode_size; @@ -270,11 +273,8 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) nilfs_mdt_destroy(nilfs->ns_dat); goto failed; } - if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) { - down_write(&nilfs->ns_sem); - nilfs_update_last_segment(sbi, 0); - up_write(&nilfs->ns_sem); - } + if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) + sbi->s_super->s_dirt = 1; } set_nilfs_loaded(nilfs); @@ -296,9 +296,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) return res; } -static int -nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, - struct nilfs_super_block *sbp) +static int nilfs_store_disk_layout(struct the_nilfs *nilfs, + struct nilfs_super_block *sbp) { if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { printk(KERN_ERR "NILFS: revision mismatch " @@ -309,6 +308,10 @@ nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, NILFS_CURRENT_REV, NILFS_MINOR_REV); return -EINVAL; } + nilfs->ns_sbsize = le16_to_cpu(sbp->s_bytes); + if (nilfs->ns_sbsize > BLOCK_SIZE) + return -EINVAL; + nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); @@ -330,6 +333,122 @@ nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, return 0; } +static int nilfs_valid_sb(struct nilfs_super_block *sbp) +{ + static unsigned char sum[4]; + const int sumoff = offsetof(struct nilfs_super_block, s_sum); + size_t bytes; + u32 crc; + + if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) + return 0; + bytes = le16_to_cpu(sbp->s_bytes); + if (bytes > BLOCK_SIZE) + return 0; + crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, + sumoff); + crc = crc32_le(crc, sum, 4); + crc = crc32_le(crc, (unsigned char *)sbp + sumoff + 4, + bytes - sumoff - 4); + return crc == le32_to_cpu(sbp->s_sum); +} + +static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) +{ + return offset < ((le64_to_cpu(sbp->s_nsegments) * + le32_to_cpu(sbp->s_blocks_per_segment)) << + (le32_to_cpu(sbp->s_log_block_size) + 10)); +} + +static void nilfs_release_super_block(struct the_nilfs *nilfs) +{ + int i; + + for (i = 0; i < 2; i++) { + if (nilfs->ns_sbp[i]) { + brelse(nilfs->ns_sbh[i]); + nilfs->ns_sbh[i] = NULL; + nilfs->ns_sbp[i] = NULL; + } + } +} + +void nilfs_fall_back_super_block(struct the_nilfs *nilfs) +{ + brelse(nilfs->ns_sbh[0]); + nilfs->ns_sbh[0] = nilfs->ns_sbh[1]; + nilfs->ns_sbp[0] = nilfs->ns_sbp[1]; + nilfs->ns_sbh[1] = NULL; + nilfs->ns_sbp[1] = NULL; +} + +void nilfs_swap_super_block(struct the_nilfs *nilfs) +{ + struct buffer_head *tsbh = nilfs->ns_sbh[0]; + struct nilfs_super_block *tsbp = nilfs->ns_sbp[0]; + + nilfs->ns_sbh[0] = nilfs->ns_sbh[1]; + nilfs->ns_sbp[0] = nilfs->ns_sbp[1]; + nilfs->ns_sbh[1] = tsbh; + nilfs->ns_sbp[1] = tsbp; +} + +static int nilfs_load_super_block(struct the_nilfs *nilfs, + struct super_block *sb, int blocksize, + struct nilfs_super_block **sbpp) +{ + struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct buffer_head **sbh = nilfs->ns_sbh; + u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size); + int valid[2], swp = 0; + + sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, + &sbh[0]); + sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]); + + if (!sbp[0]) { + if (!sbp[1]) { + printk(KERN_ERR "NILFS: unable to read superblock\n"); + return -EIO; + } + printk(KERN_WARNING + "NILFS warning: unable to read primary superblock\n"); + } else if (!sbp[1]) + printk(KERN_WARNING + "NILFS warning: unable to read secondary superblock\n"); + + valid[0] = nilfs_valid_sb(sbp[0]); + valid[1] = nilfs_valid_sb(sbp[1]); + swp = valid[1] && + (!valid[0] || + le64_to_cpu(sbp[1]->s_wtime) > le64_to_cpu(sbp[0]->s_wtime)); + + if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) { + brelse(sbh[1]); + sbh[1] = NULL; + sbp[1] = NULL; + swp = 0; + } + if (!valid[swp]) { + nilfs_release_super_block(nilfs); + printk(KERN_ERR "NILFS: Can't find nilfs on dev %s.\n", + sb->s_id); + return -EINVAL; + } + + if (swp) { + printk(KERN_WARNING "NILFS warning: broken superblock. " + "using spare superblock.\n"); + nilfs_swap_super_block(nilfs); + } + + nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime); + nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0; + nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); + *sbpp = sbp[0]; + return 0; +} + /** * init_nilfs - initialize a NILFS instance. * @nilfs: the_nilfs structure @@ -352,16 +471,15 @@ nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) { struct super_block *sb = sbi->s_super; - struct buffer_head *sbh; struct nilfs_super_block *sbp; struct backing_dev_info *bdi; int blocksize; - int err = 0; + int err; down_write(&nilfs->ns_sem); if (nilfs_init(nilfs)) { /* Load values from existing the_nilfs */ - sbp = nilfs->ns_sbp; + sbp = nilfs->ns_sbp[0]; err = nilfs_store_magic_and_option(sb, sbp, data); if (err) goto out; @@ -377,36 +495,50 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) goto out; } - sbp = nilfs_load_super_block(sb, &sbh); - if (!sbp) { + blocksize = sb_min_blocksize(sb, BLOCK_SIZE); + if (!blocksize) { + printk(KERN_ERR "NILFS: unable to set blocksize\n"); err = -EINVAL; goto out; } + err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); + if (err) + goto out; + err = nilfs_store_magic_and_option(sb, sbp, data); if (err) goto failed_sbh; blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); if (sb->s_blocksize != blocksize) { - sbp = nilfs_reload_super_block(sb, &sbh, blocksize); - if (!sbp) { + int hw_blocksize = bdev_hardsect_size(sb->s_bdev); + + if (blocksize < hw_blocksize) { + printk(KERN_ERR + "NILFS: blocksize %d too small for device " + "(sector-size = %d).\n", + blocksize, hw_blocksize); err = -EINVAL; + goto failed_sbh; + } + nilfs_release_super_block(nilfs); + sb_set_blocksize(sb, blocksize); + + err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); + if (err) goto out; /* not failed_sbh; sbh is released automatically when reloading fails. */ - } } nilfs->ns_blocksize_bits = sb->s_blocksize_bits; - err = nilfs_store_disk_layout(nilfs, sb, sbp); + err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); - nilfs->ns_sbh = sbh; - nilfs->ns_sbp = sbp; bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; if (!bdi) @@ -443,7 +575,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) return err; failed_sbh: - brelse(sbh); + nilfs_release_super_block(nilfs); goto out; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index d750e48257c..30fe58778d0 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -49,8 +49,10 @@ enum { * @ns_sem: semaphore for shared states * @ns_writer_mutex: mutex protecting ns_writer attach/detach * @ns_writer_refcount: number of referrers on ns_writer - * @ns_sbh: buffer head of the on-disk super block - * @ns_sbp: pointer to the super block data + * @ns_sbh: buffer heads of on-disk super blocks + * @ns_sbp: pointers to super block data + * @ns_sbwtime: previous write time of super blocks + * @ns_sbsize: size of valid data in super block * @ns_supers: list of nilfs super block structs * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. @@ -101,8 +103,10 @@ struct the_nilfs { * - protecting s_dirt in the super_block struct * (see nilfs_write_super) and the following fields. */ - struct buffer_head *ns_sbh; - struct nilfs_super_block *ns_sbp; + struct buffer_head *ns_sbh[2]; + struct nilfs_super_block *ns_sbp[2]; + time_t ns_sbwtime[2]; + unsigned ns_sbsize; unsigned ns_mount_state; struct list_head ns_supers; @@ -182,6 +186,10 @@ THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(LOADED, loaded) THE_NILFS_FNS(DISCONTINUED, discontinued) +/* Minimum interval of periodical update of superblocks (in seconds) */ +#define NILFS_SB_FREQ 10 +#define NILFS_ALTSB_FREQ 60 /* spare superblock */ + void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct block_device *); void put_nilfs(struct the_nilfs *); @@ -190,6 +198,8 @@ int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); int nilfs_near_disk_full(struct the_nilfs *); +void nilfs_fall_back_super_block(struct the_nilfs *); +void nilfs_swap_super_block(struct the_nilfs *); static inline void get_nilfs(struct the_nilfs *nilfs) diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index cbce6647f7f..1275b309953 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -252,6 +252,10 @@ struct nilfs_super_block { #define NILFS_MIN_NRSVSEGS 8 /* Minimum number of reserved segments */ +/* + * bytes offset of secondary super block + */ +#define NILFS_SB2_OFFSET_BYTES(devsize) ((((devsize) >> 12) - 1) << 12) /* * Maximal count of links to a file -- cgit v1.2.3-70-g09d2 From 4f6b828837b4e3836f2c9ac2f0eab9773b6c1327 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 10 May 2009 22:41:43 +0900 Subject: nilfs2: fix lock order reversal in nilfs_clean_segments ioctl This is a companion patch to ("nilfs2: fix possible circular locking for get information ioctls"). This corrects lock order reversal between mm->mmap_sem and nilfs->ns_segctor_sem in nilfs_clean_segments() which was detected by lockdep check: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3-nilfs-00003-g360bdc1 #7 ------------------------------------------------------- mmap/5294 is trying to acquire lock: (&nilfs->ns_segctor_sem){++++.+}, at: [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] __lock_acquire+0x1066/0x13b0 [] lock_acquire+0xba/0xdd [] might_fault+0x68/0x88 [] copy_from_user+0x2a/0x111 [] nilfs_ioctl_prepare_clean_segments+0x1d/0xf1 [nilfs2] [] nilfs_clean_segments+0x6d/0x1b9 [nilfs2] [] nilfs_ioctl+0x2ad/0x318 [nilfs2] [] vfs_ioctl+0x22/0x69 [] do_vfs_ioctl+0x460/0x499 [] sys_ioctl+0x40/0x5a [] sysenter_do_call+0x12/0x38 [] 0xffffffff -> #0 (&nilfs->ns_segctor_sem){++++.+}: [] __lock_acquire+0xdcc/0x13b0 [] lock_acquire+0xba/0xdd [] down_read+0x2a/0x3e [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] do_page_fault+0x2fb/0x30a [] error_code+0x72/0x78 [] 0xffffffff where nilfs_clean_segments() holds: nilfs->ns_segctor_sem -> copy_from_user() --> page fault -> mm->mmap_sem And, page fault path may hold: page fault -> mm->mmap_sem --> nilfs_page_mkwrite() -> nilfs->ns_segctor_sem Even though nilfs_clean_segments() does not perform write access on given user pages, it may cause deadlock because nilfs->ns_segctor_sem is shared per device and mm->mmap_sem can be shared with other tasks. To avoid this problem, this patch moves all calls of copy_from_user() outside the nilfs->ns_segctor_sem lock in the ioctl. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 163 ++++++++++++++++++++++++++++++---------------------- fs/nilfs2/nilfs.h | 3 +- fs/nilfs2/segment.c | 5 +- fs/nilfs2/segment.h | 3 +- 4 files changed, 100 insertions(+), 74 deletions(-) (limited to 'fs/nilfs2/segment.c') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index e3c693d37d6..49489f68eab 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -25,6 +25,7 @@ #include /* lock_kernel(), unlock_kernel() */ #include /* capable() */ #include /* copy_from_user(), copy_to_user() */ +#include #include #include "nilfs.h" #include "segment.h" @@ -297,10 +298,10 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } -static ssize_t -nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) +static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *inode; struct nilfs_vdesc *vdesc; struct buffer_head *bh, *n; @@ -361,19 +362,10 @@ nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } -static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_move_blocks); -} - -static ssize_t -nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, - int flags, void *buf, size_t size, - size_t nmembs) +static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *cpfile = nilfs->ns_cpfile; struct nilfs_period *periods = buf; int ret, i; @@ -387,36 +379,21 @@ nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, return nmembs; } -static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) +static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_delete_checkpoints); -} + size_t nmembs = argv->v_nmembs; + int ret; -static ssize_t -nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) -{ - int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); return (ret < 0) ? ret : nmembs; } -static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_free_vblocknrs); -} - -static ssize_t -nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, - int flags, void *buf, size_t size, - size_t nmembs) +static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *dat = nilfs_dat_inode(nilfs); struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; @@ -455,18 +432,10 @@ nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, return nmembs; } -static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_mark_blocks_dirty); -} - -static ssize_t -nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) +static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct nilfs_sb_info *sbi = nilfs->ns_writer; int ret; @@ -481,31 +450,19 @@ nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, return (ret < 0) ? ret : nmembs; } -static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_free_segments); -} - int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, - void __user *argp) + struct nilfs_argv *argv, void **kbufs) { - struct nilfs_argv argv[5]; const char *msg; - int dir, ret; - - if (copy_from_user(argv, argp, sizeof(argv))) - return -EFAULT; + int ret; - dir = _IOC_WRITE; - ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); + ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); if (ret < 0) { msg = "cannot read source blocks"; goto failed; } - ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); + + ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); if (ret < 0) { /* * can safely abort because checkpoints can be removed @@ -514,7 +471,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot delete checkpoints"; goto failed; } - ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); + ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], kbufs[2]); if (ret < 0) { /* * can safely abort because DAT file is updated atomically @@ -523,7 +480,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot delete virtual blocks from DAT file"; goto failed; } - ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); + ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], kbufs[3]); if (ret < 0) { /* * can safely abort because the operation is nondestructive. @@ -531,7 +488,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot mark copying blocks dirty"; goto failed; } - ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); + ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]); if (ret < 0) { /* * can safely abort because this operation is atomic. @@ -551,9 +508,75 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { + struct nilfs_argv argv[5]; + const static size_t argsz[5] = { + sizeof(struct nilfs_vdesc), + sizeof(struct nilfs_period), + sizeof(__u64), + sizeof(struct nilfs_bdesc), + sizeof(__u64), + }; + void __user *base; + void *kbufs[5]; + struct the_nilfs *nilfs; + size_t len, nsegs; + int n, ret; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return nilfs_clean_segments(inode->i_sb, argp); + + if (copy_from_user(argv, argp, sizeof(argv))) + return -EFAULT; + + nsegs = argv[4].v_nmembs; + if (argv[4].v_size != argsz[4]) + return -EINVAL; + /* + * argv[4] points to segment numbers this ioctl cleans. We + * use kmalloc() for its buffer because memory used for the + * segment numbers is enough small. + */ + kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, + nsegs * sizeof(__u64)); + if (IS_ERR(kbufs[4])) + return PTR_ERR(kbufs[4]); + + nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + + for (n = 0; n < 4; n++) { + ret = -EINVAL; + if (argv[n].v_size != argsz[n]) + goto out_free; + + if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) + goto out_free; + + len = argv[n].v_size * argv[n].v_nmembs; + base = (void __user *)(unsigned long)argv[n].v_base; + if (len == 0) { + kbufs[n] = NULL; + continue; + } + + kbufs[n] = vmalloc(len); + if (!kbufs[n]) { + ret = -ENOMEM; + goto out_free; + } + if (copy_from_user(kbufs[n], base, len)) { + ret = -EFAULT; + vfree(kbufs[n]); + goto out_free; + } + } + + ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + + out_free: + while (--n > 0) + vfree(kbufs[n]); + kfree(kbufs[4]); + return ret; } static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 3d0c18a16db..da6fc0bba2e 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -236,7 +236,8 @@ extern int nilfs_sync_file(struct file *, struct dentry *, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); -int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); +int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, + void **); /* inode.c */ extern struct inode *nilfs_new_inode(struct inode *, int); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index fb70ec3be20..22c7f65c240 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2589,7 +2589,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) } } -int nilfs_clean_segments(struct super_block *sb, void __user *argp) +int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, + void **kbufs) { struct nilfs_sb_info *sbi = NILFS_SB(sb); struct nilfs_sc_info *sci = NILFS_SC(sbi); @@ -2606,7 +2607,7 @@ int nilfs_clean_segments(struct super_block *sb, void __user *argp) err = nilfs_init_gcdat_inode(nilfs); if (unlikely(err)) goto out_unlock; - err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); + err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); if (unlikely(err)) goto out_unlock; diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index a98fc1ed0bb..476bdd5df5b 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -222,7 +222,8 @@ extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); extern void nilfs_flush_segment(struct super_block *, ino_t); -extern int nilfs_clean_segments(struct super_block *, void __user *); +extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, + void **); extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, __u64 *, size_t); -- cgit v1.2.3-70-g09d2