diff options
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r-- | fs/gfs2/rgrp.c | 833 |
1 files changed, 697 insertions, 136 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f74fb9bd197..4d34887a601 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,6 +35,9 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) +#define RSRV_CONTENTION_FACTOR 4 +#define RGRP_RSRV_MAX_CONTENDERS 2 + #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -178,6 +181,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) } /** + * rs_cmp - multi-block reservation range compare + * @blk: absolute file system block number of the new reservation + * @len: number of blocks in the new reservation + * @rs: existing reservation to compare against + * + * returns: 1 if the block range is beyond the reach of the reservation + * -1 if the block range is before the start of the reservation + * 0 if the block range overlaps with the reservation + */ +static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) +{ + u64 startblk = gfs2_rs_startblk(rs); + + if (blk >= startblk + rs->rs_free) + return 1; + if (blk + len - 1 < startblk) + return -1; + return 0; +} + +/** + * rs_find - Find a rgrp multi-block reservation that contains a given block + * @rgd: The rgrp + * @rgblk: The block we're looking for, relative to the rgrp + */ +static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) +{ + struct rb_node **newn; + int rc; + u64 fsblk = rgblk + rgd->rd_data0; + + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; + while (*newn) { + struct gfs2_blkreserv *cur = + rb_entry(*newn, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(fsblk, 1, cur); + if (rc < 0) + newn = &((*newn)->rb_left); + else if (rc > 0) + newn = &((*newn)->rb_right); + else { + spin_unlock(&rgd->rd_rsspin); + return cur; + } + } + spin_unlock(&rgd->rd_rsspin); + return NULL; +} + +/** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. * @buf: the buffer that holds the bitmaps @@ -417,6 +471,137 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } +/** + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * @ip: the inode for this reservation + */ +int gfs2_rs_alloc(struct gfs2_inode *ip) +{ + int error = 0; + struct gfs2_blkreserv *res; + + if (ip->i_res) + return 0; + + res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!res) + error = -ENOMEM; + + down_write(&ip->i_rw_mutex); + if (ip->i_res) + kmem_cache_free(gfs2_rsrv_cachep, res); + else + ip->i_res = res; + up_write(&ip->i_rw_mutex); + return error; +} + +static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +{ + gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", + rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, + rs->rs_free); +} + +/** + * __rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +static void __rs_deltree(struct gfs2_blkreserv *rs) +{ + struct gfs2_rgrpd *rgd; + + if (!gfs2_rs_active(rs)) + return; + + rgd = rs->rs_rgd; + /* We can't do this: The reason is that when the rgrp is invalidated, + it's in the "middle" of acquiring the glock, but the HOLDER bit + isn't set yet: + BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ + trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); + + if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) + rb_erase(&rs->rs_node, &rgd->rd_rstree); + BUG_ON(!rgd->rd_rs_cnt); + rgd->rd_rs_cnt--; + + if (rs->rs_free) { + /* return reserved blocks to the rgrp and the ip */ + BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); + rs->rs_rgd->rd_reserved -= rs->rs_free; + rs->rs_free = 0; + clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + smp_mb__after_clear_bit(); + } + /* We can't change any of the step 1 or step 2 components of the rs. + E.g. We can't set rs_rgd to NULL because the rgd glock is held and + dequeued through this pointer. + Can't: atomic_set(&rs->rs_sizehint, 0); + Can't: rs->rs_requested = 0; + Can't: rs->rs_rgd = NULL;*/ + rs->rs_bi = NULL; + rs->rs_biblk = 0; +} + +/** + * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +{ + struct gfs2_rgrpd *rgd; + + if (!gfs2_rs_active(rs)) + return; + + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + __rs_deltree(rs); + spin_unlock(&rgd->rd_rsspin); +} + +/** + * gfs2_rs_delete - delete a multi-block reservation + * @ip: The inode for this reservation + * + */ +void gfs2_rs_delete(struct gfs2_inode *ip) +{ + down_write(&ip->i_rw_mutex); + if (ip->i_res) { + gfs2_rs_deltree(ip->i_res); + trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); + BUG_ON(ip->i_res->rs_free); + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); + ip->i_res = NULL; + } + up_write(&ip->i_rw_mutex); +} + +/** + * return_all_reservations - return all reserved blocks back to the rgrp. + * @rgd: the rgrp that needs its space back + * + * We previously reserved a bunch of blocks for allocation. Now we need to + * give them back. This leave the reservation structures in tact, but removes + * all of their corresponding "no-fly zones". + */ +static void return_all_reservations(struct gfs2_rgrpd *rgd) +{ + struct rb_node *n; + struct gfs2_blkreserv *rs; + + spin_lock(&rgd->rd_rsspin); + while ((n = rb_first(&rgd->rd_rstree))) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + __rs_deltree(rs); + } + spin_unlock(&rgd->rd_rsspin); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -439,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) gfs2_free_clones(rgd); kfree(rgd->rd_bits); + return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } @@ -616,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data0 = be64_to_cpu(buf.ri_data0); rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); + spin_lock_init(&rgd->rd_rsspin); error = compute_bitstructs(rgd); if (error) @@ -627,6 +814,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; @@ -736,9 +924,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); } +static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) +{ + struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; + struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; + + if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || + rgl->rl_dinodes != str->rg_dinodes || + rgl->rl_igeneration != str->rg_igeneration) + return 0; + return 1; +} + +static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) +{ + const struct gfs2_rgrp *str = buf; + + rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); + rgl->rl_flags = str->rg_flags; + rgl->rl_free = str->rg_free; + rgl->rl_dinodes = str->rg_dinodes; + rgl->rl_igeneration = str->rg_igeneration; + rgl->__pad = 0UL; +} + +static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) +{ + struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; + u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; + rgl->rl_unlinked = cpu_to_be32(unlinked); +} + +static u32 count_unlinked(struct gfs2_rgrpd *rgd) +{ + struct gfs2_bitmap *bi; + const u32 length = rgd->rd_length; + const u8 *buffer = NULL; + u32 i, goal, count = 0; + + for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { + goal = 0; + buffer = bi->bi_bh->b_data + bi->bi_offset; + WARN_ON(!buffer_uptodate(bi->bi_bh)); + while (goal < bi->bi_len * GFS2_NBBY) { + goal = gfs2_bitfit(buffer, bi->bi_len, goal, + GFS2_BLKST_UNLINKED); + if (goal == BFITNOENT) + break; + count++; + goal++; + } + } + + return count; +} + + /** - * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps - * @gh: The glock holder for the resource group + * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps + * @rgd: the struct gfs2_rgrpd describing the RG to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. @@ -746,9 +990,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) * Returns: errno */ -int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) { - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl = rgd->rd_gl; unsigned int length = rgd->rd_length; @@ -756,6 +999,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) unsigned int x, y; int error; + if (rgd->rd_bits[0].bi_bh != NULL) + return 0; + for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); @@ -782,7 +1028,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; } - + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, + rgd->rd_bits[0].bi_bh->b_data); + } + else if (sdp->sd_args.ar_rgrplvb) { + if (!gfs2_rgrp_lvb_valid(rgd)){ + gfs2_consist_rgrpd(rgd); + error = -EIO; + goto fail; + } + if (rgd->rd_rgl->rl_unlinked == 0) + rgd->rd_flags &= ~GFS2_RDF_CHECK; + } return 0; fail: @@ -796,6 +1055,39 @@ fail: return error; } +int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +{ + u32 rl_flags; + + if (rgd->rd_flags & GFS2_RDF_UPTODATE) + return 0; + + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + return gfs2_rgrp_bh_get(rgd); + + rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); + rl_flags &= ~GFS2_RDF_MASK; + rgd->rd_flags &= GFS2_RDF_MASK; + rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); + if (rgd->rd_rgl->rl_unlinked == 0) + rgd->rd_flags &= ~GFS2_RDF_CHECK; + rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); + rgd->rd_free_clone = rgd->rd_free; + rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); + rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); + return 0; +} + +int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +{ + struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; + struct gfs2_sbd *sdp = rgd->rd_sbd; + + if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) + return 0; + return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); +} + /** * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @gh: The glock holder for the resource group @@ -809,8 +1101,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) for (x = 0; x < length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; - brelse(bi->bi_bh); - bi->bi_bh = NULL; + if (bi->bi_bh) { + brelse(bi->bi_bh); + bi->bi_bh = NULL; + } } } @@ -954,6 +1248,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, bh, 1); gfs2_rgrp_out(rgd, bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); gfs2_trans_end(sdp); } } @@ -974,38 +1269,184 @@ out: } /** - * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode - * @ip: the incore GFS2 inode structure + * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree + * @bi: the bitmap with the blocks + * @ip: the inode structure + * @biblk: the 32-bit block number relative to the start of the bitmap + * @amount: the number of blocks to reserve * - * Returns: the struct gfs2_qadata + * Returns: NULL - reservation was already taken, so not inserted + * pointer to the inserted reservation */ +static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, + struct gfs2_inode *ip, u32 biblk, + int amount) +{ + struct rb_node **newn, *parent = NULL; + int rc; + struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_rgrpd *rgd = rs->rs_rgd; + u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; -struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; + BUG_ON(!ip->i_res); + BUG_ON(gfs2_rs_active(rs)); + /* Figure out where to put new node */ + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + while (*newn) { + struct gfs2_blkreserv *cur = + rb_entry(*newn, struct gfs2_blkreserv, rs_node); + + parent = *newn; + rc = rs_cmp(fsblock, amount, cur); + if (rc > 0) + newn = &((*newn)->rb_right); + else if (rc < 0) + newn = &((*newn)->rb_left); + else { + spin_unlock(&rgd->rd_rsspin); + return NULL; /* reservation already in use */ + } + } + + /* Do our reservation work */ + rs = ip->i_res; + rs->rs_free = amount; + rs->rs_biblk = biblk; + rs->rs_bi = bi; + rb_link_node(&rs->rs_node, parent, newn); + rb_insert_color(&rs->rs_node, &rgd->rd_rstree); + + /* Do our inode accounting for the reservation */ + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ + + /* Do our rgrp accounting for the reservation */ + rgd->rd_reserved += amount; /* blocks reserved */ + rgd->rd_rs_cnt++; /* number of in-tree reservations */ + spin_unlock(&rgd->rd_rsspin); + trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); + return rs; +} + +/** + * unclaimed_blocks - return number of blocks that aren't spoken for + */ +static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) { - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - int error; - BUG_ON(ip->i_qadata != NULL); - ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS); - error = gfs2_rindex_update(sdp); - if (error) - fs_warn(sdp, "rindex update returns %d\n", error); - return ip->i_qadata; + return rgd->rd_free_clone - rgd->rd_reserved; } /** - * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode - * @ip: the incore GFS2 inode structure + * rg_mblk_search - find a group of multiple free blocks + * @rgd: the resource group descriptor + * @rs: the block reservation + * @ip: pointer to the inode for which we're reserving blocks * - * Returns: the struct gfs2_qadata + * This is very similar to rgblk_search, except we're looking for whole + * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing + * on aligned dwords for speed's sake. + * + * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int gfs2_blkrsv_get(struct gfs2_inode *ip) +static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { - BUG_ON(ip->i_res != NULL); - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - return -ENOMEM; - return 0; + struct gfs2_bitmap *bi = rgd->rd_bits; + const u32 length = rgd->rd_length; + u32 blk; + unsigned int buf, x, search_bytes; + u8 *buffer = NULL; + u8 *ptr, *end, *nonzero; + u32 goal, rsv_bytes; + struct gfs2_blkreserv *rs; + u32 best_rs_bytes, unclaimed; + int best_rs_blocks; + + /* Find bitmap block that contains bits for goal block */ + if (rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + for (buf = 0; buf < length; buf++) { + bi = rgd->rd_bits + buf; + /* Convert scope of "goal" from rgrp-wide to within + found bit block */ + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { + goal -= bi->bi_start * GFS2_NBBY; + goto do_search; + } + } + buf = 0; + goal = 0; + +do_search: + best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), + (RGRP_RSRV_MINBLKS * rgd->rd_length)); + best_rs_bytes = (best_rs_blocks * + (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / + GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ + best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); + unclaimed = unclaimed_blocks(rgd); + if (best_rs_bytes * GFS2_NBBY > unclaimed) + best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; + + for (x = 0; x <= length; x++) { + bi = rgd->rd_bits + buf; + + if (test_bit(GBF_FULL, &bi->bi_flags)) + goto skip; + + WARN_ON(!buffer_uptodate(bi->bi_bh)); + if (bi->bi_clone) + buffer = bi->bi_clone + bi->bi_offset; + else + buffer = bi->bi_bh->b_data + bi->bi_offset; + + /* We have to keep the reservations aligned on u64 boundaries + otherwise we could get situations where a byte can't be + used because it's after a reservation, but a free bit still + is within the reservation's area. */ + ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); + end = (buffer + bi->bi_len); + while (ptr < end) { + rsv_bytes = 0; + if ((ptr + best_rs_bytes) <= end) + search_bytes = best_rs_bytes; + else + search_bytes = end - ptr; + BUG_ON(!search_bytes); + nonzero = memchr_inv(ptr, 0, search_bytes); + /* If the lot is all zeroes, reserve the whole size. If + there's enough zeroes to satisfy the request, use + what we can. If there's not enough, keep looking. */ + if (nonzero == NULL) + rsv_bytes = search_bytes; + else if ((nonzero - ptr) * GFS2_NBBY >= + ip->i_res->rs_requested) + rsv_bytes = (nonzero - ptr); + + if (rsv_bytes) { + blk = ((ptr - buffer) * GFS2_NBBY); + BUG_ON(blk >= bi->bi_len * GFS2_NBBY); + rs = rs_insert(bi, ip, blk, + rsv_bytes * GFS2_NBBY); + if (IS_ERR(rs)) + return PTR_ERR(rs); + if (rs) + return 0; + } + ptr += ALIGN(search_bytes, sizeof(u64)); + } +skip: + /* Try next bitmap block (wrap back to rgrp header + if at end) */ + buf++; + buf %= length; + goal = 0; + } + + return BFITNOENT; } /** @@ -1014,24 +1455,26 @@ static int gfs2_blkrsv_get(struct gfs2_inode *ip) * @ip: the inode * * If there's room for the requested blocks to be allocated from the RG: + * This will try to get a multi-block reservation first, and if that doesn't + * fit, it will take what it can. * * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) */ -static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { - const struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = ip->i_res; if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; - if (rgd->rd_free_clone >= rs->rs_requested) + /* Look for a multi-block reservation. */ + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && + rg_mblk_search(rgd, ip) != BFITNOENT) + return 1; + if (unclaimed_blocks(rgd) >= rs->rs_requested) return 1; - return 0; -} -static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) -{ - return (bi->bi_start * GFS2_NBBY) + blk; + return 0; } /** @@ -1101,119 +1544,120 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip } /** - * get_local_rgrp - Choose and lock a rgrp for allocation + * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for - * @last_unlinked: the last unlinked block - * - * Try to acquire rgrp in way which avoids contending with others. + * @requested: the number of blocks to be reserved * * Returns: errno */ -static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrpd *rgd, *begin = NULL; + struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = ip->i_res; - int error, rg_locked, flags = LM_FLAG_TRY; + int error = 0, rg_locked, flags = LM_FLAG_TRY; + u64 last_unlinked = NO_BLOCK; int loops = 0; - if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) - rgd = begin = ip->i_rgd; - else - rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); - - if (rgd == NULL) + if (sdp->sd_args.ar_rgrplvb) + flags |= GL_SKIP; + rs->rs_requested = requested; + if (gfs2_assert_warn(sdp, requested)) { + error = -EINVAL; + goto out; + } + if (gfs2_rs_active(rs)) { + begin = rs->rs_rgd; + flags = 0; /* Yoda: Do or do not. There is no try */ + } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { + rs->rs_rgd = begin = ip->i_rgd; + } else { + rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + } + if (rs->rs_rgd == NULL) return -EBADSLT; while (loops < 3) { rg_locked = 0; - if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { + if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { rg_locked = 1; error = 0; + } else if (!loops && !gfs2_rs_active(rs) && + rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { + /* If the rgrp already is maxed out for contenders, + we can eliminate it as a "first pass" without even + requesting the rgrp glock. */ + error = GLR_TRYFAILED; } else { - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - flags, &rs->rs_rgd_gh); + error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + LM_ST_EXCLUSIVE, flags, + &rs->rs_rgd_gh); + if (!error && sdp->sd_args.ar_rgrplvb) { + error = update_rgrp_lvb(rs->rs_rgd); + if (error) { + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); + return error; + } + } } switch (error) { case 0: - if (try_rgrp_fit(rgd, ip)) { - ip->i_rgd = rgd; + if (gfs2_rs_active(rs)) { + if (unclaimed_blocks(rs->rs_rgd) + + rs->rs_free >= rs->rs_requested) { + ip->i_rgd = rs->rs_rgd; + return 0; + } + /* We have a multi-block reservation, but the + rgrp doesn't have enough free blocks to + satisfy the request. Free the reservation + and look for a suitable rgrp. */ + gfs2_rs_deltree(rs); + } + if (try_rgrp_fit(rs->rs_rgd, ip)) { + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rgd); + ip->i_rgd = rs->rs_rgd; return 0; } - if (rgd->rd_flags & GFS2_RDF_CHECK) - try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); + if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rgd); + try_rgrp_unlink(rs->rs_rgd, &last_unlinked, + ip->i_no_addr); + } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: - rgd = gfs2_rgrpd_get_next(rgd); - if (rgd == begin) { - flags = 0; - loops++; - } + rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); + rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ + if (rs->rs_rgd != begin) /* If we didn't wrap */ + break; + + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && + !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + goto out; + } else if (loops == 2) + /* Flushing the log may release space */ + gfs2_log_flush(sdp, NULL); break; default: - return error; + goto out; } } - - return -ENOSPC; -} - -static void gfs2_blkrsv_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res == NULL); - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; -} - -/** - * gfs2_inplace_reserve - Reserve space in the filesystem - * @ip: the inode to reserve space for - * @requested: the number of blocks to be reserved - * - * Returns: errno - */ - -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_blkreserv *rs; - int error; - u64 last_unlinked = NO_BLOCK; - int tries = 0; - - error = gfs2_blkrsv_get(ip); - if (error) - return error; - - rs = ip->i_res; - rs->rs_requested = requested; - if (gfs2_assert_warn(sdp, requested)) { - error = -EINVAL; - goto out; - } - - do { - error = get_local_rgrp(ip, &last_unlinked); - if (error != -ENOSPC) - break; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - break; - continue; - } - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - } while (tries++ < 3); + error = -ENOSPC; out: if (error) - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; return error; } @@ -1228,9 +1672,15 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; + if (!rs) + return; + + if (!rs->rs_free) + gfs2_rs_deltree(rs); + if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; } /** @@ -1326,7 +1776,27 @@ do_search: if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset; - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + while (1) { + struct gfs2_blkreserv *rs; + u32 rgblk; + + biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + if (biblk == BFITNOENT) + break; + /* Check if this block is reserved() */ + rgblk = gfs2_bi2rgd_blk(bi, biblk); + rs = rs_find(rgd, rgblk); + if (rs == NULL) + break; + + BUG_ON(rs->rs_bi != bi); + biblk = BFITNOENT; + /* This should jump to the first block after the + reservation. */ + goal = rs->rs_biblk + rs->rs_free; + if (goal >= bi->bi_len * GFS2_NBBY) + break; + } if (biblk != BFITNOENT) break; @@ -1362,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, u32 blk, bool dinode, unsigned int *n) { const unsigned int elen = *n; - u32 goal; + u32 goal, rgblk; const u8 *buffer = NULL; + struct gfs2_blkreserv *rs; *n = 0; buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1376,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, goal++; if (goal >= (bi->bi_len * GFS2_NBBY)) break; + rgblk = gfs2_bi2rgd_blk(bi, goal); + rs = rs_find(rgd, rgblk); + if (rs) /* Oops, we bumped into someone's reservation */ + break; if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != GFS2_BLKST_FREE) break; @@ -1451,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) { - const struct gfs2_rgrpd *rgd = gl->gl_object; + struct gfs2_rgrpd *rgd = gl->gl_object; + struct gfs2_blkreserv *trs; + const struct rb_node *n; + if (rgd == NULL) return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_flags, - rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, + rgd->rd_reserved); + spin_lock(&rgd->rd_rsspin); + for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { + trs = rb_entry(n, struct gfs2_blkreserv, rs_node); + dump_rs(seq, trs); + } + spin_unlock(&rgd->rd_rsspin); return 0; } @@ -1471,10 +1956,63 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) } /** + * claim_reserved_blks - Claim previously reserved blocks + * @ip: the inode that's claiming the reservation + * @dinode: 1 if this block is a dinode block, otherwise data block + * @nblocks: desired extent length + * + * Lay claim to previously allocated block reservation blocks. + * Returns: Starting block number of the blocks claimed. + * Sets *nblocks to the actual extent length allocated. + */ +static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, + unsigned int *nblocks) +{ + struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_bitmap *bi; + u64 start_block = gfs2_rs_startblk(rs); + const unsigned int elen = *nblocks; + + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ + gfs2_assert_withdraw(sdp, rgd); + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + bi = rs->rs_bi; + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + + for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { + /* Make sure the bitmap hasn't changed */ + gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, + dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + rs->rs_biblk++; + rs->rs_free--; + + BUG_ON(!rgd->rd_reserved); + rgd->rd_reserved--; + dinode = false; + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + } + + if (!rs->rs_free) { + struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; + + gfs2_rs_deltree(rs); + /* -nblocks because we haven't returned to do the math yet. + I'm doing the math backwards to prevent negative numbers, + but think of it as: + if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) + rg_mblk_search(rgd, ip); + } + return start_block; +} + +/** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number - * @ndata: requested number of blocks/extent length (value/result) + * @nblocks: requested number of blocks/extent length (value/result) * @dinode: 1 if we're allocating a dinode block, else 0 * @generation: the generation number of the inode * @@ -1496,23 +2034,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Only happens if there is a bug in gfs2, return something distinctive * to ensure that it is noticed. */ - if (ip->i_res == NULL) + if (ip->i_res->rs_requested == 0) return -ECANCELED; - rgd = ip->i_rgd; - - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; - else - goal = rgd->rd_last_alloc; - - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + /* Check if we have a multi-block reservation, and if so, claim the + next free block from it. */ + if (gfs2_rs_active(ip->i_res)) { + BUG_ON(!ip->i_res->rs_free); + rgd = ip->i_res->rs_rgd; + block = claim_reserved_blks(ip, dinode, nblocks); + } else { + rgd = ip->i_rgd; - /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) - goto rgrp_error; + if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + + /* Since all blocks are reserved in advance, this shouldn't + happen */ + if (blk == BFITNOENT) { + printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", + *nblocks); + printk(KERN_WARNING "FULL=%d\n", + test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + goto rgrp_error; + } - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + } ndata = *nblocks; if (dinode) ndata--; @@ -1529,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rgd->rd_free < *nblocks) + if (rgd->rd_free < *nblocks) { + printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; + } rgd->rd_free -= *nblocks; if (dinode) { @@ -1542,6 +2096,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -1588,6 +2143,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth) @@ -1624,6 +2180,8 @@ void gfs2_unlink_di(struct inode *inode) trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + update_rgrp_lvb_unlinked(rgd, 1); } static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) @@ -1643,6 +2201,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + update_rgrp_lvb_unlinked(rgd, -1); gfs2_statfs_change(sdp, 0, +1, -1); } @@ -1784,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) for (x = 0; x < rlist->rl_rgrps; x++) gfs2_holder_uninit(&rlist->rl_ghs[x]); kfree(rlist->rl_ghs); + rlist->rl_ghs = NULL; } } |