diff options
author | David Woodhouse <dwmw2@shinybook.infradead.org> | 2005-05-03 08:14:09 +0100 |
---|---|---|
committer | David Woodhouse <dwmw2@shinybook.infradead.org> | 2005-05-03 08:14:09 +0100 |
commit | 27b030d58c8e72fc7a95187a791bd9406e350f02 (patch) | |
tree | ab3bab7f39a5ce5bab65578a7e08fa4dfdeb198c /fs | |
parent | 79d20b14a0d651f15b0ef9a22b6cf12d284a6d38 (diff) | |
parent | 6628465e33ca694bd8fd5c3cf4eb7ff9177bc694 (diff) |
Merge with master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'fs')
50 files changed, 1335 insertions, 897 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c index 6b6bb7c8abf..23c12512802 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -131,8 +131,7 @@ static int afs_file_readpage(struct file *file, struct page *page) vnode = AFS_FS_I(inode); - if (!PageLocked(page)) - PAGE_BUG(page); + BUG_ON(!PageLocked(page)); ret = -ESTALE; if (vnode->flags & AFS_VNODE_DELETED) @@ -40,9 +40,6 @@ #define dprintk(x...) do { ; } while (0) #endif -static long aio_run = 0; /* for testing only */ -static long aio_wakeups = 0; /* for testing only */ - /*------ sysctl variables----*/ atomic_t aio_nr = ATOMIC_INIT(0); /* current system wide number of aio requests */ unsigned aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ @@ -405,7 +402,6 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) req->ki_ctx = ctx; req->ki_cancel = NULL; req->ki_retry = NULL; - req->ki_obj.user = NULL; req->ki_dtor = NULL; req->private = NULL; INIT_LIST_HEAD(&req->ki_run_list); @@ -451,11 +447,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) { if (req->ki_dtor) req->ki_dtor(req); - req->ki_ctx = NULL; - req->ki_filp = NULL; - req->ki_obj.user = NULL; - req->ki_dtor = NULL; - req->private = NULL; kmem_cache_free(kiocb_cachep, req); ctx->reqs_active--; @@ -623,7 +614,6 @@ static inline int __queue_kicked_iocb(struct kiocb *iocb) if (list_empty(&iocb->ki_run_list)) { list_add_tail(&iocb->ki_run_list, &ctx->run_list); - iocb->ki_queued++; return 1; } return 0; @@ -664,10 +654,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) } if (!(iocb->ki_retried & 0xff)) { - pr_debug("%ld retry: %d of %d (kick %ld, Q %ld run %ld, wake %ld)\n", - iocb->ki_retried, - iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes, - iocb->ki_kicked, iocb->ki_queued, aio_run, aio_wakeups); + pr_debug("%ld retry: %d of %d\n", iocb->ki_retried, + iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); } if (!(retry = iocb->ki_retry)) { @@ -774,7 +762,6 @@ out: static int __aio_run_iocbs(struct kioctx *ctx) { struct kiocb *iocb; - int count = 0; LIST_HEAD(run_list); list_splice_init(&ctx->run_list, &run_list); @@ -789,9 +776,7 @@ static int __aio_run_iocbs(struct kioctx *ctx) aio_run_iocb(iocb); if (__aio_put_req(ctx, iocb)) /* drop extra ref */ put_ioctx(ctx); - count++; } - aio_run++; if (!list_empty(&ctx->run_list)) return 1; return 0; @@ -890,10 +875,8 @@ static void queue_kicked_iocb(struct kiocb *iocb) spin_lock_irqsave(&ctx->ctx_lock, flags); run = __queue_kicked_iocb(iocb); spin_unlock_irqrestore(&ctx->ctx_lock, flags); - if (run) { + if (run) aio_queue_work(ctx); - aio_wakeups++; - } } /* @@ -913,7 +896,6 @@ void fastcall kick_iocb(struct kiocb *iocb) return; } - iocb->ki_kicked++; /* If its already kicked we shouldn't queue it again */ if (!kiocbTryKick(iocb)) { queue_kicked_iocb(iocb); @@ -984,7 +966,8 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) tail = info->tail; event = aio_ring_event(info, tail, KM_IRQ0); - tail = (tail + 1) % info->nr; + if (++tail >= info->nr) + tail = 0; event->obj = (u64)(unsigned long)iocb->ki_obj.user; event->data = iocb->ki_user_data; @@ -1008,10 +991,8 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) pr_debug("added to ring %p at [%lu]\n", iocb, tail); - pr_debug("%ld retries: %d of %d (kicked %ld, Q %ld run %ld wake %ld)\n", - iocb->ki_retried, - iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes, - iocb->ki_kicked, iocb->ki_queued, aio_run, aio_wakeups); + pr_debug("%ld retries: %d of %d\n", iocb->ki_retried, + iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); put_rq: /* everything turned out well, dispose of the aiocb. */ ret = __aio_put_req(ctx, iocb); @@ -1119,7 +1100,6 @@ static int read_events(struct kioctx *ctx, int i = 0; struct io_event ent; struct aio_timeout to; - int event_loop = 0; /* testing only */ int retry = 0; /* needed to zero any padding within an entry (there shouldn't be @@ -1186,7 +1166,6 @@ retry: if (to.timed_out) /* Only check after read evt */ break; schedule(); - event_loop++; if (signal_pending(tsk)) { ret = -EINTR; break; @@ -1214,9 +1193,6 @@ retry: if (timeout) clear_timeout(&to); out: - pr_debug("event loop executed %d times\n", event_loop); - pr_debug("aio_run %ld\n", aio_run); - pr_debug("aio_wakeups %ld\n", aio_wakeups); return i ? i : ret; } @@ -1515,8 +1491,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, } req->ki_filp = file; - iocb->aio_key = req->ki_key; - ret = put_user(iocb->aio_key, &user_iocb->aio_key); + ret = put_user(req->ki_key, &user_iocb->aio_key); if (unlikely(ret)) { dprintk("EFAULT: aio_key\n"); goto out_put_req; @@ -1531,13 +1506,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_opcode = iocb->aio_lio_opcode; init_waitqueue_func_entry(&req->ki_wait, aio_wake_function); INIT_LIST_HEAD(&req->ki_wait.task_list); - req->ki_run_list.next = req->ki_run_list.prev = NULL; - req->ki_retry = NULL; req->ki_retried = 0; - req->ki_kicked = 0; - req->ki_queued = 0; - aio_run = 0; - aio_wakeups = 0; ret = aio_setup_iocb(req); @@ -1545,10 +1514,14 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; spin_lock_irq(&ctx->ctx_lock); - list_add_tail(&req->ki_run_list, &ctx->run_list); - /* drain the run list */ - while (__aio_run_iocbs(ctx)) - ; + if (likely(list_empty(&ctx->run_list))) { + aio_run_iocb(req); + } else { + list_add_tail(&req->ki_run_list, &ctx->run_list); + /* drain the run list */ + while (__aio_run_iocbs(ctx)) + ; + } spin_unlock_irq(&ctx->ctx_lock); aio_put_req(req); /* drop extra ref to req */ return 0; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index f5a52c87172..c7b2b889018 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -84,6 +84,7 @@ struct autofs_wait_queue { char *name; /* This is for status reporting upon return */ int status; + atomic_t notified; atomic_t wait_ctr; }; @@ -101,6 +102,7 @@ struct autofs_sb_info { int needs_reghost; struct super_block *sb; struct semaphore wq_sem; + spinlock_t fs_lock; struct autofs_wait_queue *queues; /* Wait queue pointer */ }; @@ -126,9 +128,18 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { static inline int autofs4_ispending(struct dentry *dentry) { struct autofs_info *inf = autofs4_dentry_ino(dentry); + int pending = 0; - return (dentry->d_flags & DCACHE_AUTOFS_PENDING) || - (inf != NULL && inf->flags & AUTOFS_INF_EXPIRING); + if (dentry->d_flags & DCACHE_AUTOFS_PENDING) + return 1; + + if (inf) { + spin_lock(&inf->sbi->fs_lock); + pending = inf->flags & AUTOFS_INF_EXPIRING; + spin_unlock(&inf->sbi->fs_lock); + } + + return pending; } static inline void autofs4_copy_atime(struct file *src, struct file *dst) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 31540a6404d..500425e24fb 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -99,6 +99,10 @@ static int autofs4_check_tree(struct vfsmount *mnt, if (!autofs4_can_expire(top, timeout, do_now)) return 0; + /* Is someone visiting anywhere in the tree ? */ + if (may_umount_tree(mnt)) + return 0; + spin_lock(&dcache_lock); repeat: next = this_parent->d_subdirs.next; @@ -270,10 +274,18 @@ static struct dentry *autofs4_expire(struct super_block *sb, /* Case 2: tree mount, expire iff entire tree is not busy */ if (!exp_leaves) { + /* Lock the tree as we must expire as a whole */ + spin_lock(&sbi->fs_lock); if (autofs4_check_tree(mnt, dentry, timeout, do_now)) { - expired = dentry; - break; + struct autofs_info *inf = autofs4_dentry_ino(dentry); + + /* Set this flag early to catch sys_chdir and the like */ + inf->flags |= AUTOFS_INF_EXPIRING; + spin_unlock(&sbi->fs_lock); + expired = dentry; + break; } + spin_unlock(&sbi->fs_lock); /* Case 3: direct mount, expire individual leaves */ } else { expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index a5256074662..4bb14cc6804 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -206,6 +206,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->version = 0; sbi->sub_version = 0; init_MUTEX(&sbi->wq_sem); + spin_lock_init(&sbi->fs_lock); sbi->queues = NULL; s->s_blocksize = 1024; s->s_blocksize_bits = 10; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 1ab24a662e0..5a40d36e5a5 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -210,17 +210,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->len = len; wq->status = -EINTR; /* Status return if interrupted */ atomic_set(&wq->wait_ctr, 2); + atomic_set(&wq->notified, 1); up(&sbi->wq_sem); - - DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d", - (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); - /* autofs4_notify_daemon() may block */ - if (notify != NFY_NONE) { - autofs4_notify_daemon(sbi,wq, - notify == NFY_MOUNT ? - autofs_ptype_missing : - autofs_ptype_expire_multi); - } } else { atomic_inc(&wq->wait_ctr); up(&sbi->wq_sem); @@ -229,6 +220,17 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); } + if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) { + int type = (notify == NFY_MOUNT ? + autofs_ptype_missing : autofs_ptype_expire_multi); + + DPRINTK(("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", + (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify)); + + /* autofs4_notify_daemon() may block */ + autofs4_notify_daemon(sbi, wq, type); + } + /* wq->name is NULL if and only if the lock is already released */ if ( sbi->catatonic ) { @@ -140,6 +140,7 @@ inline void bio_init(struct bio *bio) * bio_alloc_bioset - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator * @nr_iovecs: number of iovecs to pre-allocate + * @bs: the bio_set to allocate from * * Description: * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. @@ -629,6 +630,7 @@ out: /** * bio_map_user - map user address into bio + * @q: the request_queue_t for the bio * @bdev: destination block device * @uaddr: start of user address * @len: length in bytes diff --git a/fs/buffer.c b/fs/buffer.c index 3b12cf947ab..5f525b3c6d9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -218,7 +218,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) sb = get_super(bdev); if (sb && !(sb->s_flags & MS_RDONLY)) { sb->s_frozen = SB_FREEZE_WRITE; - wmb(); + smp_wmb(); sync_inodes_sb(sb, 0); DQUOT_SYNC(sb); @@ -235,7 +235,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) sync_inodes_sb(sb, 1); sb->s_frozen = SB_FREEZE_TRANS; - wmb(); + smp_wmb(); sync_blockdev(sb->s_bdev); @@ -263,7 +263,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb) if (sb->s_op->unlockfs) sb->s_op->unlockfs(sb); sb->s_frozen = SB_UNFROZEN; - wmb(); + smp_wmb(); wake_up(&sb->s_wait_unfrozen); drop_super(sb); } @@ -774,15 +774,14 @@ repeat: /** * sync_mapping_buffers - write out and wait upon a mapping's "associated" * buffers - * @buffer_mapping - the mapping which backs the buffers' data - * @mapping - the mapping which wants those buffers written + * @mapping: the mapping which wants those buffers written * * Starts I/O against the buffers at mapping->private_list, and waits upon * that I/O. * - * Basically, this is a convenience function for fsync(). @buffer_mapping is - * the blockdev which "owns" the buffers and @mapping is a file or directory - * which needs those buffers to be written for a successful fsync(). + * Basically, this is a convenience function for fsync(). + * @mapping is a file or directory which needs those buffers to be written for + * a successful fsync(). */ int sync_mapping_buffers(struct address_space *mapping) { @@ -1263,6 +1262,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) /** * mark_buffer_dirty - mark a buffer_head as needing writeout + * @bh: the buffer_head to mark dirty * * mark_buffer_dirty() will set the dirty bit against the buffer, then set its * backing page dirty, then tag the page as dirty in its address_space's radix @@ -1501,6 +1501,7 @@ EXPORT_SYMBOL(__breadahead); /** * __bread() - reads a specified block and returns the bh + * @bdev: the block_device to read from * @block: number of block * @size: size (in bytes) to read * @@ -2078,8 +2079,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) int nr, i; int fully_mapped = 1; - if (!PageLocked(page)) - PAGE_BUG(page); + BUG_ON(!PageLocked(page)); blocksize = 1 << inode->i_blkbits; if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); @@ -2917,7 +2917,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free) bh = head; do { - if (buffer_write_io_error(bh)) + if (buffer_write_io_error(bh) && page->mapping) set_bit(AS_EIO, &page->mapping->flags); if (buffer_busy(bh)) goto failed; diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 4d2404305ab..95483baab70 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -4,7 +4,9 @@ Fix error mapping of the TOO_MANY_LINKS (hardlinks) case. Do not oops if root user kills cifs oplock kernel thread or kills the cifsd thread (NB: killing the cifs kernel threads is not recommended, unmount and rmmod cifs will kill them when they are -no longer needed). +no longer needed). Fix readdir to ASCII servers (ie older servers +which do not support Unicode) and also require asterik. + Version 1.33 ------------ diff --git a/fs/cifs/TODO b/fs/cifs/TODO index 1e8490ed694..8cc881694e2 100644 --- a/fs/cifs/TODO +++ b/fs/cifs/TODO @@ -1,4 +1,4 @@ -version 1.32 April 3, 2005 +version 1.34 April 29, 2005 A Partial List of Missing Features ================================== @@ -70,7 +70,15 @@ r) Implement O_DIRECT flag on open (already supported on mount) s) Allow remapping of last remaining character (\) to +0xF000 which (this character is valid for POSIX but not for Windows) -KNOWN BUGS (updated April 3, 2005) +t) Create UID mapping facility so server UIDs can be mapped on a per +mount or a per server basis to client UIDs or nobody if no mapping +exists. This is helpful when Unix extensions are negotiated to +allow better permission checking when UIDs differ on the server +and client. Add new protocol request to the CIFS protocol +standard for asking the server for the corresponding name of a +particular uid. + +KNOWN BUGS (updated April 29, 2005) ==================================== See http://bugzilla.samba.org - search on product "CifsVFS" for current bug list. diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b004fef0a42..741ff0c69f3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2451,12 +2451,14 @@ findFirstRetry: name_len += 2; } else { /* BB add check for overrun of SMB buf BB */ name_len = strnlen(searchName, PATH_MAX); - name_len++; /* trailing null */ /* BB fix here and in unicode clause above ie if(name_len > buffersize-header) free buffer exit; BB */ strncpy(pSMB->FileName, searchName, name_len); - pSMB->FileName[name_len] = 0; /* just in case */ + pSMB->FileName[name_len] = '\\'; + pSMB->FileName[name_len+1] = '*'; + pSMB->FileName[name_len+2] = 0; + name_len += 3; } params = 12 + name_len /* includes null */ ; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 040eb288bb1..ea5888688f9 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -455,12 +455,11 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind) * @goal: place to store the result. * * Normally this function find the prefered place for block allocation, - * stores it in *@goal and returns zero. If the branch had been changed - * under us we return -EAGAIN. + * stores it in *@goal and returns zero. */ -static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], - Indirect *partial, unsigned long *goal) +static unsigned long ext3_find_goal(struct inode *inode, long block, + Indirect chain[4], Indirect *partial) { struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info; @@ -470,15 +469,10 @@ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], */ if (block_i && (block == block_i->last_alloc_logical_block + 1) && (block_i->last_alloc_physical_block != 0)) { - *goal = block_i->last_alloc_physical_block + 1; - return 0; + return block_i->last_alloc_physical_block + 1; } - if (verify_chain(chain, partial)) { - *goal = ext3_find_near(inode, partial); - return 0; - } - return -EAGAIN; + return ext3_find_near(inode, partial); } /** @@ -582,12 +576,9 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode, * @where: location of missing link * @num: number of blocks we are adding * - * This function verifies that chain (up to the missing link) had not - * changed, fills the missing link and does all housekeeping needed in + * This function fills the missing link and does all housekeeping needed in * inode (->i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. Otherwise (== chain had been changed) - * we free the new blocks (forgetting their buffer_heads, indeed) and - * return -EAGAIN. + * chain to new block and return 0. */ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block, @@ -608,12 +599,6 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block, if (err) goto err_out; } - /* Verify that place we are splicing to is still there and vacant */ - - if (!verify_chain(chain, where-1) || *where->p) - /* Writer: end */ - goto changed; - /* That's it */ *where->p = where->key; @@ -657,26 +642,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block, } return err; -changed: - /* - * AKPM: if where[i].bh isn't part of the current updating - * transaction then we explode nastily. Test this code path. - */ - jbd_debug(1, "the chain changed: try again\n"); - err = -EAGAIN; - err_out: for (i = 1; i < num; i++) { BUFFER_TRACE(where[i].bh, "call journal_forget"); ext3_journal_forget(handle, where[i].bh); } - /* For the normal collision cleanup case, we free up the blocks. - * On genuine filesystem errors we don't even think about doing - * that. */ - if (err == -EAGAIN) - for (i = 0; i < num; i++) - ext3_free_blocks(handle, inode, - le32_to_cpu(where[i].key), 1); return err; } @@ -708,7 +678,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, unsigned long goal; int left; int boundary = 0; - int depth = ext3_block_to_path(inode, iblock, offsets, &boundary); + const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary); struct ext3_inode_info *ei = EXT3_I(inode); J_ASSERT(handle != NULL || create == 0); @@ -716,54 +686,55 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, if (depth == 0) goto out; -reread: partial = ext3_get_branch(inode, depth, offsets, chain, &err); /* Simplest case - block found, no allocation needed */ if (!partial) { clear_buffer_new(bh_result); -got_it: - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); - if (boundary) - set_buffer_boundary(bh_result); - /* Clean up and exit */ - partial = chain+depth-1; /* the whole chain */ - goto cleanup; + goto got_it; } /* Next simple case - plain lookup or failed read of indirect block */ - if (!create || err == -EIO) { -cleanup: + if (!create || err == -EIO) + goto cleanup; + + down(&ei->truncate_sem); + + /* + * If the indirect block is missing while we are reading + * the chain(ext3_get_branch() returns -EAGAIN err), or + * if the chain has been changed after we grab the semaphore, + * (either because another process truncated this branch, or + * another get_block allocated this branch) re-grab the chain to see if + * the request block has been allocated or not. + * + * Since we already block the truncate/other get_block + * at this point, we will have the current copy of the chain when we + * splice the branch into the tree. + */ + if (err == -EAGAIN || !verify_chain(chain, partial)) { while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); brelse(partial->bh); partial--; } - BUFFER_TRACE(bh_result, "returned"); -out: - return err; + partial = ext3_get_branch(inode, depth, offsets, chain, &err); + if (!partial) { + up(&ei->truncate_sem); + if (err) + goto cleanup; + clear_buffer_new(bh_result); + goto got_it; + } } /* - * Indirect block might be removed by truncate while we were - * reading it. Handling of that case (forget what we've got and - * reread) is taken out of the main path. - */ - if (err == -EAGAIN) - goto changed; - - goal = 0; - down(&ei->truncate_sem); - - /* lazy initialize the block allocation info here if necessary */ - if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) { + * Okay, we need to do block allocation. Lazily initialize the block + * allocation info here if necessary + */ + if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) ext3_init_block_alloc_info(inode); - } - if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0) { - up(&ei->truncate_sem); - goto changed; - } + goal = ext3_find_goal(inode, iblock, chain, partial); left = (chain + depth) - partial; @@ -771,38 +742,45 @@ out: * Block out ext3_truncate while we alter the tree */ err = ext3_alloc_branch(handle, inode, left, goal, - offsets+(partial-chain), partial); + offsets + (partial - chain), partial); - /* The ext3_splice_branch call will free and forget any buffers + /* + * The ext3_splice_branch call will free and forget any buffers * on the new chain if there is a failure, but that risks using * up transaction credits, especially for bitmaps where the * credits cannot be returned. Can we handle this somehow? We - * may need to return -EAGAIN upwards in the worst case. --sct */ + * may need to return -EAGAIN upwards in the worst case. --sct + */ if (!err) err = ext3_splice_branch(handle, inode, iblock, chain, partial, left); - /* i_disksize growing is protected by truncate_sem - * don't forget to protect it if you're about to implement - * concurrent ext3_get_block() -bzzz */ + /* + * i_disksize growing is protected by truncate_sem. Don't forget to + * protect it if you're about to implement concurrent + * ext3_get_block() -bzzz + */ if (!err && extend_disksize && inode->i_size > ei->i_disksize) ei->i_disksize = inode->i_size; up(&ei->truncate_sem); - if (err == -EAGAIN) - goto changed; if (err) goto cleanup; set_buffer_new(bh_result); - goto got_it; - -changed: +got_it: + map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); + if (boundary) + set_buffer_boundary(bh_result); + /* Clean up and exit */ + partial = chain + depth - 1; /* the whole chain */ +cleanup: while (partial > chain) { - jbd_debug(1, "buffer chain changed, retrying\n"); - BUFFER_TRACE(partial->bh, "brelsing"); + BUFFER_TRACE(partial->bh, "call brelse"); brelse(partial->bh); partial--; } - goto reread; + BUFFER_TRACE(bh_result, "returned"); +out: + return err; } static int ext3_get_block(struct inode *inode, sector_t iblock, diff --git a/fs/fcntl.c b/fs/fcntl.c index 3e7ab16ed15..286a9f8f3d4 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/security.h> #include <linux/ptrace.h> +#include <linux/signal.h> #include <asm/poll.h> #include <asm/siginfo.h> @@ -308,7 +309,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, break; case F_SETSIG: /* arg == 0 restores default behaviour. */ - if (arg < 0 || arg > _NSIG) { + if (!valid_signal(arg)) { break; } err = 0; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d6efb36cab2..8e050fa5821 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -512,7 +512,8 @@ restart: } /** - * sync_inodes + * sync_inodes - writes all inodes to disk + * @wait: wait for completion * * sync_inodes() goes through each super block's dirty inode list, writes the * inodes out, waits on the writeout and puts the inodes back on the normal @@ -604,6 +605,7 @@ EXPORT_SYMBOL(sync_inode); /** * generic_osync_inode - flush all dirty data for a given inode to disk * @inode: inode to write + * @mapping: the address_space that should be flushed * @what: what to write and wait upon * * This can be called by file_write functions for files which have the diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 4efb640c4d0..217e32f37e0 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -333,6 +333,8 @@ void hfs_mdb_close(struct super_block *sb) * Release the resources associated with the in-core MDB. */ void hfs_mdb_put(struct super_block *sb) { + if (!HFS_SB(sb)) + return; /* free the B-trees */ hfs_btree_close(HFS_SB(sb)->ext_tree); hfs_btree_close(HFS_SB(sb)->cat_tree); @@ -340,4 +342,7 @@ void hfs_mdb_put(struct super_block *sb) /* free the buffers holding the primary and alternate MDBs */ brelse(HFS_SB(sb)->mdb_bh); brelse(HFS_SB(sb)->alt_mdb_bh); + + kfree(HFS_SB(sb)); + sb->s_fs_info = NULL; } diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 1e2c193134c..ab783f6afa3 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -297,7 +297,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) res = -EINVAL; if (!parse_options((char *)data, sbi)) { hfs_warn("hfs_fs: unable to parse mount options.\n"); - goto bail3; + goto bail; } sb->s_op = &hfs_super_operations; @@ -310,7 +310,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) hfs_warn("VFS: Can't find a HFS filesystem on dev %s.\n", hfs_mdb_name(sb)); res = -EINVAL; - goto bail2; + goto bail; } /* try to get the root inode */ @@ -340,10 +340,8 @@ bail_iput: iput(root_inode); bail_no_root: hfs_warn("hfs_fs: get root inode failed.\n"); +bail: hfs_mdb_put(sb); -bail2: -bail3: - kfree(sbi); return res; } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 5f8044664a3..d55ad67b8e4 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -208,7 +208,9 @@ static void hfsplus_write_super(struct super_block *sb) static void hfsplus_put_super(struct super_block *sb) { dprint(DBG_SUPER, "hfsplus_put_super\n"); - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb->s_fs_info) + return; + if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; vhdr->modify_date = hfsp_now2mt(); @@ -226,6 +228,8 @@ static void hfsplus_put_super(struct super_block *sb) brelse(HFSPLUS_SB(sb).s_vhbh); if (HFSPLUS_SB(sb).nls) unload_nls(HFSPLUS_SB(sb).nls); + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; } static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index a88ad292485..e6c63d9cac7 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -521,7 +521,7 @@ int hostfs_commit_write(struct file *file, struct page *page, unsigned from, static struct address_space_operations hostfs_aops = { .writepage = hostfs_writepage, .readpage = hostfs_readpage, -/* .set_page_dirty = __set_page_dirty_nobuffers, */ + .set_page_dirty = __set_page_dirty_nobuffers, .prepare_write = hostfs_prepare_write, .commit_write = hostfs_commit_write }; diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 0c607c1388f..771a554701d 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -79,8 +79,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) D2(printk(KERN_DEBUG "jffs2_do_readpage_nolock(): ino #%lu, page at offset 0x%lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT)); - if (!PageLocked(pg)) - PAGE_BUG(pg); + BUG_ON(!PageLocked(pg)); pg_buf = kmap(pg); /* FIXME: Can kmap fail? */ diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 7bc906677b0..24a689179af 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -175,31 +175,22 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, { s64 lblock64 = lblock; int rc = 0; - int take_locks; xad_t xad; s64 xaddr; int xflag; - s32 xlen; - - /* - * If this is a special inode (imap, dmap) - * the lock should already be taken - */ - take_locks = (JFS_IP(ip)->fileset != AGGREGATE_I); + s32 xlen = max_blocks; /* * Take appropriate lock on inode */ - if (take_locks) { - if (create) - IWRITE_LOCK(ip); - else - IREAD_LOCK(ip); - } + if (create) + IWRITE_LOCK(ip); + else + IREAD_LOCK(ip); if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && - (xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0) - == 0) && xlen) { + (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) && + xaddr) { if (xflag & XAD_NOTRECORDED) { if (!create) /* @@ -238,7 +229,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, #ifdef _JFS_4K if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad))) goto unlock; - rc = extAlloc(ip, max_blocks, lblock64, &xad, FALSE); + rc = extAlloc(ip, xlen, lblock64, &xad, FALSE); if (rc) goto unlock; @@ -258,12 +249,10 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, /* * Release lock on inode */ - if (take_locks) { - if (create) - IWRITE_UNLOCK(ip); - else - IREAD_UNLOCK(ip); - } + if (create) + IWRITE_UNLOCK(ip); + else + IREAD_UNLOCK(ip); return rc; } diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index d86e467c6e4..69007fd546e 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -471,6 +471,7 @@ dbUpdatePMap(struct inode *ipbmap, struct metapage *mp; struct jfs_log *log; int lsn, difft, diffp; + unsigned long flags; /* the blocks better be within the mapsize. */ if (blkno + nblocks > bmp->db_mapsize) { @@ -504,6 +505,7 @@ dbUpdatePMap(struct inode *ipbmap, 0); if (mp == NULL) return -EIO; + metapage_wait_for_io(mp); } dp = (struct dmap *) mp->data; @@ -578,34 +580,32 @@ dbUpdatePMap(struct inode *ipbmap, if (mp->lsn != 0) { /* inherit older/smaller lsn */ logdiff(diffp, mp->lsn, log); + LOGSYNC_LOCK(log, flags); if (difft < diffp) { mp->lsn = lsn; /* move bp after tblock in logsync list */ - LOGSYNC_LOCK(log); list_move(&mp->synclist, &tblk->synclist); - LOGSYNC_UNLOCK(log); } /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } else { mp->log = log; mp->lsn = lsn; /* insert bp after tblock in logsync list */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } } diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index e357890adfb..ac41f72d6d5 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -212,7 +212,7 @@ static struct metapage *read_index_page(struct inode *inode, s64 blkno) s32 xlen; rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); - if (rc || (xlen == 0)) + if (rc || (xaddr == 0)) return NULL; return read_metapage(inode, xaddr, PSIZE, 1); @@ -231,7 +231,7 @@ static struct metapage *get_index_page(struct inode *inode, s64 blkno) s32 xlen; rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); - if (rc || (xlen == 0)) + if (rc || (xaddr == 0)) return NULL; return get_metapage(inode, xaddr, PSIZE, 1); @@ -3181,7 +3181,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) d = (struct ldtentry *) & p->slot[stbl[i]]; if (((long) jfs_dirent + d->namlen + 1) > - (dirent_buf + PSIZE)) { + (dirent_buf + PAGE_SIZE)) { /* DBCS codepages could overrun dirent_buf */ index = i; overflow = 1; diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 78383130162..7acff2ce3c8 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -502,7 +502,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) } - ip->i_mapping->a_ops = &jfs_aops; + ip->i_mapping->a_ops = &jfs_metapage_aops; mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); /* Allocations to metadata inodes should not affect quotas */ @@ -2573,9 +2573,18 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) goto out; } - /* assign a buffer for the page */ - mp = get_metapage(ipimap, xaddr, PSIZE, 1); - if (!mp) { + /* + * start transaction of update of the inode map + * addressing structure pointing to the new iag page; + */ + tid = txBegin(sb, COMMIT_FORCE); + down(&JFS_IP(ipimap)->commit_sem); + + /* update the inode map addressing structure to point to it */ + if ((rc = + xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { + txEnd(tid); + up(&JFS_IP(ipimap)->commit_sem); /* Free the blocks allocated for the iag since it was * not successfully added to the inode map */ @@ -2584,6 +2593,29 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) /* release the inode map lock */ IWRITE_UNLOCK(ipimap); + goto out; + } + + /* update the inode map's inode to reflect the extension */ + ipimap->i_size += PSIZE; + inode_add_bytes(ipimap, PSIZE); + + /* assign a buffer for the page */ + mp = get_metapage(ipimap, blkno, PSIZE, 0); + if (!mp) { + /* + * This is very unlikely since we just created the + * extent, but let's try to handle it correctly + */ + xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, + COMMIT_PWMAP); + + txAbort(tid, 0); + txEnd(tid); + + /* release the inode map lock */ + IWRITE_UNLOCK(ipimap); + rc = -EIO; goto out; } @@ -2605,41 +2637,11 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) iagp->inosmap[i] = cpu_to_le32(ONES); /* - * Invalidate the page after writing and syncing it. - * After it's initialized, we access it in a different - * address space + * Write and sync the metapage */ - set_bit(META_discard, &mp->flag); flush_metapage(mp); /* - * start tyransaction of update of the inode map - * addressing structure pointing to the new iag page; - */ - tid = txBegin(sb, COMMIT_FORCE); - down(&JFS_IP(ipimap)->commit_sem); - - /* update the inode map addressing structure to point to it */ - if ((rc = - xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { - txEnd(tid); - up(&JFS_IP(ipimap)->commit_sem); - /* Free the blocks allocated for the iag since it was - * not successfully added to the inode map - */ - dbFree(ipimap, xaddr, (s64) xlen); - - /* release the inode map lock */ - IWRITE_UNLOCK(ipimap); - - goto out; - } - - /* update the inode map's inode to reflect the extension */ - ipimap->i_size += PSIZE; - inode_add_bytes(ipimap, PSIZE); - - /* * txCommit(COMMIT_FORCE) will synchronously write address * index pages and inode after commit in careful update order * of address index pages (right to left, bottom up); @@ -2789,6 +2791,7 @@ diUpdatePMap(struct inode *ipimap, u32 mask; struct jfs_log *log; int lsn, difft, diffp; + unsigned long flags; imap = JFS_IP(ipimap)->i_imap; /* get the iag number containing the inode */ @@ -2805,6 +2808,7 @@ diUpdatePMap(struct inode *ipimap, IREAD_UNLOCK(ipimap); if (rc) return (rc); + metapage_wait_for_io(mp); iagp = (struct iag *) mp->data; /* get the inode number and extent number of the inode within * the iag and the inode number within the extent. @@ -2868,30 +2872,28 @@ diUpdatePMap(struct inode *ipimap, /* inherit older/smaller lsn */ logdiff(difft, lsn, log); logdiff(diffp, mp->lsn, log); + LOGSYNC_LOCK(log, flags); if (difft < diffp) { mp->lsn = lsn; /* move mp after tblock in logsync list */ - LOGSYNC_LOCK(log); list_move(&mp->synclist, &tblk->synclist); - LOGSYNC_UNLOCK(log); } /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); assert(mp->clsn); logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } else { mp->log = log; mp->lsn = lsn; /* insert mp after tblock in logsync list */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } write_metapage(mp); return (0); diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index ebd77c1bed6..c0fd7b3eadc 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -165,6 +165,7 @@ struct jfs_sb_info { /* Formerly in ipbmap */ struct bmap *bmap; /* incore bmap descriptor */ struct nls_table *nls_tab; /* current codepage */ + struct inode *direct_inode; /* metadata inode */ uint state; /* mount/recovery state */ unsigned long flag; /* mount time flags */ uint p_state; /* state prior to going no integrity */ diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index b6a6869ebb4..dfa1200daa6 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -234,6 +234,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, int lsn; int diffp, difft; struct metapage *mp = NULL; + unsigned long flags; jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", log, tblk, lrd, tlck); @@ -254,7 +255,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, */ lsn = log->lsn; - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); /* * initialize page lsn if first log write of the page @@ -310,7 +311,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } } - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); /* * write the log record @@ -334,7 +335,6 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return lsn; } - /* * NAME: lmWriteRecord() * @@ -927,9 +927,8 @@ static void lmPostGC(struct lbuf * bp) * calculate new value of i_nextsync which determines when * this code is called again. * - * this is called only from lmLog(). - * - * PARAMETER: ip - pointer to logs inode. + * PARAMETERS: log - log structure + * nosyncwait - 1 if called asynchronously * * RETURN: 0 * @@ -945,6 +944,15 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) struct lrd lrd; int lsn; struct logsyncblk *lp; + struct jfs_sb_info *sbi; + unsigned long flags; + + /* push dirty metapages out to disk */ + list_for_each_entry(sbi, &log->sb_list, log_list) { + filemap_flush(sbi->ipbmap->i_mapping); + filemap_flush(sbi->ipimap->i_mapping); + filemap_flush(sbi->direct_inode->i_mapping); + } /* * forward syncpt @@ -954,10 +962,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) */ if (log->sync == log->syncpt) { - LOGSYNC_LOCK(log); - /* ToDo: push dirty metapages out to disk */ -// bmLogSync(log); - + LOGSYNC_LOCK(log, flags); if (list_empty(&log->synclist)) log->sync = log->lsn; else { @@ -965,7 +970,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) struct logsyncblk, synclist); log->sync = lp->lsn; } - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } @@ -974,27 +979,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) * reset syncpt = sync */ if (log->sync != log->syncpt) { - struct jfs_sb_info *sbi; - - /* - * We need to make sure all of the "written" metapages - * actually make it to disk - */ - list_for_each_entry(sbi, &log->sb_list, log_list) { - if (sbi->flag & JFS_NOINTEGRITY) - continue; - filemap_fdatawrite(sbi->ipbmap->i_mapping); - filemap_fdatawrite(sbi->ipimap->i_mapping); - filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping); - } - list_for_each_entry(sbi, &log->sb_list, log_list) { - if (sbi->flag & JFS_NOINTEGRITY) - continue; - filemap_fdatawait(sbi->ipbmap->i_mapping); - filemap_fdatawait(sbi->ipimap->i_mapping); - filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping); - } - lrd.logtid = 0; lrd.backchain = 0; lrd.type = cpu_to_le16(LOG_SYNCPT); @@ -1066,6 +1050,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) return lsn; } +/* + * NAME: jfs_syncpt + * + * FUNCTION: write log SYNCPT record for specified log + * + * PARAMETERS: log - log structure + */ +void jfs_syncpt(struct jfs_log *log) +{ LOG_LOCK(log); + lmLogSync(log, 1); + LOG_UNLOCK(log); +} /* * NAME: lmLogOpen() @@ -1547,6 +1543,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) { int i; struct tblock *target = NULL; + struct jfs_sb_info *sbi; /* jfs_write_inode may call us during read-only mount */ if (!log) @@ -1608,12 +1605,18 @@ void jfs_flush_journal(struct jfs_log *log, int wait) if (wait < 2) return; + list_for_each_entry(sbi, &log->sb_list, log_list) { + filemap_fdatawrite(sbi->ipbmap->i_mapping); + filemap_fdatawrite(sbi->ipimap->i_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + } + /* * If there was recent activity, we may need to wait * for the lazycommit thread to catch up */ if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { - for (i = 0; i < 800; i++) { /* Too much? */ + for (i = 0; i < 200; i++) { /* Too much? */ msleep(250); if (list_empty(&log->cqueue) && list_empty(&log->synclist)) @@ -1621,7 +1624,24 @@ void jfs_flush_journal(struct jfs_log *log, int wait) } } assert(list_empty(&log->cqueue)); - assert(list_empty(&log->synclist)); + if (!list_empty(&log->synclist)) { + struct logsyncblk *lp; + + list_for_each_entry(lp, &log->synclist, synclist) { + if (lp->xflag & COMMIT_PAGE) { + struct metapage *mp = (struct metapage *)lp; + dump_mem("orphan metapage", lp, + sizeof(struct metapage)); + dump_mem("page", mp->page, sizeof(struct page)); + } + else + dump_mem("orphan tblock", lp, + sizeof(struct tblock)); + } +// current->state = TASK_INTERRUPTIBLE; +// schedule(); + } + //assert(list_empty(&log->synclist)); clear_bit(log_FLUSH, &log->flag); } @@ -1669,6 +1689,7 @@ int lmLogShutdown(struct jfs_log * log) lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); lbmIOWait(log->bp, lbmFREE); + log->bp = NULL; /* * synchronous update log superblock @@ -1819,20 +1840,34 @@ static int lbmLogInit(struct jfs_log * log) log->lbuf_free = NULL; - for (i = 0; i < LOGPAGES; i++) { - lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); - if (lbuf == 0) - goto error; - lbuf->l_ldata = (char *) get_zeroed_page(GFP_KERNEL); - if (lbuf->l_ldata == 0) { - kfree(lbuf); + for (i = 0; i < LOGPAGES;) { + char *buffer; + uint offset; + struct page *page; + + buffer = (char *) get_zeroed_page(GFP_KERNEL); + if (buffer == NULL) goto error; + page = virt_to_page(buffer); + for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) { + lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); + if (lbuf == NULL) { + if (offset == 0) + free_page((unsigned long) buffer); + goto error; + } + if (offset) /* we already have one reference */ + get_page(page); + lbuf->l_offset = offset; + lbuf->l_ldata = buffer + offset; + lbuf->l_page = page; + lbuf->l_log = log; + init_waitqueue_head(&lbuf->l_ioevent); + + lbuf->l_freelist = log->lbuf_free; + log->lbuf_free = lbuf; + i++; } - lbuf->l_log = log; - init_waitqueue_head(&lbuf->l_ioevent); - - lbuf->l_freelist = log->lbuf_free; - log->lbuf_free = lbuf; } return (0); @@ -1857,12 +1892,10 @@ static void lbmLogShutdown(struct jfs_log * log) lbuf = log->lbuf_free; while (lbuf) { struct lbuf *next = lbuf->l_freelist; - free_page((unsigned long) lbuf->l_ldata); + __free_page(lbuf->l_page); kfree(lbuf); lbuf = next; } - - log->bp = NULL; } @@ -1974,9 +2007,9 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); bio->bi_bdev = log->bdev; - bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata); + bio->bi_io_vec[0].bv_page = bp->l_page; bio->bi_io_vec[0].bv_len = LOGPSIZE; - bio->bi_io_vec[0].bv_offset = 0; + bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; bio->bi_idx = 0; @@ -2115,9 +2148,9 @@ static void lbmStartIO(struct lbuf * bp) bio = bio_alloc(GFP_NOFS, 1); bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); bio->bi_bdev = log->bdev; - bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata); + bio->bi_io_vec[0].bv_page = bp->l_page; bio->bi_io_vec[0].bv_len = LOGPSIZE; - bio->bi_io_vec[0].bv_offset = 0; + bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; bio->bi_idx = 0; @@ -2127,16 +2160,13 @@ static void lbmStartIO(struct lbuf * bp) bio->bi_private = bp; /* check if journaling to disk has been disabled */ - if (!log->no_integrity) { + if (log->no_integrity) { + bio->bi_size = 0; + lbmIODone(bio, 0, 0); + } else { submit_bio(WRITE_SYNC, bio); INCREMENT(lmStat.submitted); } - else { - bio->bi_size = 0; - lbmIODone(bio, 0, 0); /* 2nd argument appears to not be used => 0 - * 3rd argument appears to not be used => 0 - */ - } } diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index 141ad74010c..51291fbc420 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -463,9 +463,10 @@ struct lbuf { s64 l_blkno; /* 8: log page block number */ caddr_t l_ldata; /* 4: data page */ + struct page *l_page; /* The page itself */ + uint l_offset; /* Offset of l_ldata within the page */ wait_queue_head_t l_ioevent; /* 4: i/o done event */ - struct page *l_page; /* The page itself */ }; /* Reuse l_freelist for redrive list */ @@ -489,8 +490,9 @@ struct logsyncblk { */ #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) -#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock) -#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock) +#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) +#define LOGSYNC_UNLOCK(log, flags) \ + spin_unlock_irqrestore(&(log)->synclock, flags) /* compute the difference in bytes of lsn from sync point */ #define logdiff(diff, lsn, log)\ @@ -506,5 +508,6 @@ extern int lmLogShutdown(struct jfs_log * log); extern int lmLogInit(struct jfs_log * log); extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); extern void jfs_flush_journal(struct jfs_log * log, int wait); +extern void jfs_syncpt(struct jfs_log *log); #endif /* _H_JFS_LOGMGR */ diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 4c0a3ac75c0..41bf078dce0 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -1,5 +1,5 @@ /* - * Copyright (C) International Business Machines Corp., 2000-2003 + * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify @@ -18,10 +18,11 @@ */ #include <linux/fs.h> +#include <linux/mm.h> +#include <linux/bio.h> #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/mempool.h> -#include <linux/delay.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -29,8 +30,6 @@ #include "jfs_txnmgr.h" #include "jfs_debug.h" -static DEFINE_SPINLOCK(meta_lock); - #ifdef CONFIG_JFS_STATISTICS static struct { uint pagealloc; /* # of page allocations */ @@ -39,22 +38,8 @@ static struct { } mpStat; #endif - -#define HASH_BITS 10 /* This makes hash_table 1 4K page */ -#define HASH_SIZE (1 << HASH_BITS) -static struct metapage **hash_table = NULL; -static unsigned long hash_order; - - -static inline int metapage_locked(struct metapage *mp) -{ - return test_bit(META_locked, &mp->flag); -} - -static inline int trylock_metapage(struct metapage *mp) -{ - return test_and_set_bit(META_locked, &mp->flag); -} +#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) +#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag) static inline void unlock_metapage(struct metapage *mp) { @@ -62,26 +47,26 @@ static inline void unlock_metapage(struct metapage *mp) wake_up(&mp->wait); } -static void __lock_metapage(struct metapage *mp) +static inline void __lock_metapage(struct metapage *mp) { DECLARE_WAITQUEUE(wait, current); - INCREMENT(mpStat.lockwait); - add_wait_queue_exclusive(&mp->wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); if (metapage_locked(mp)) { - spin_unlock(&meta_lock); + unlock_page(mp->page); schedule(); - spin_lock(&meta_lock); + lock_page(mp->page); } } while (trylock_metapage(mp)); __set_current_state(TASK_RUNNING); remove_wait_queue(&mp->wait, &wait); } -/* needs meta_lock */ +/* + * Must have mp->page locked + */ static inline void lock_metapage(struct metapage *mp) { if (trylock_metapage(mp)) @@ -92,6 +77,110 @@ static inline void lock_metapage(struct metapage *mp) static kmem_cache_t *metapage_cache; static mempool_t *metapage_mempool; +#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE) + +#if MPS_PER_PAGE > 1 + +struct meta_anchor { + int mp_count; + atomic_t io_count; + struct metapage *mp[MPS_PER_PAGE]; +}; +#define mp_anchor(page) ((struct meta_anchor *)page->private) + +static inline struct metapage *page_to_mp(struct page *page, uint offset) +{ + if (!PagePrivate(page)) + return NULL; + return mp_anchor(page)->mp[offset >> L2PSIZE]; +} + +static inline int insert_metapage(struct page *page, struct metapage *mp) +{ + struct meta_anchor *a; + int index; + int l2mp_blocks; /* log2 blocks per metapage */ + + if (PagePrivate(page)) + a = mp_anchor(page); + else { + a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS); + if (!a) + return -ENOMEM; + memset(a, 0, sizeof(struct meta_anchor)); + page->private = (unsigned long)a; + SetPagePrivate(page); + kmap(page); + } + + if (mp) { + l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; + index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); + a->mp_count++; + a->mp[index] = mp; + } + + return 0; +} + +static inline void remove_metapage(struct page *page, struct metapage *mp) +{ + struct meta_anchor *a = mp_anchor(page); + int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; + int index; + + index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); + + BUG_ON(a->mp[index] != mp); + + a->mp[index] = NULL; + if (--a->mp_count == 0) { + kfree(a); + page->private = 0; + ClearPagePrivate(page); + kunmap(page); + } +} + +static inline void inc_io(struct page *page) +{ + atomic_inc(&mp_anchor(page)->io_count); +} + +static inline void dec_io(struct page *page, void (*handler) (struct page *)) +{ + if (atomic_dec_and_test(&mp_anchor(page)->io_count)) + handler(page); +} + +#else +static inline struct metapage *page_to_mp(struct page *page, uint offset) +{ + return PagePrivate(page) ? (struct metapage *)page->private : NULL; +} + +static inline int insert_metapage(struct page *page, struct metapage *mp) +{ + if (mp) { + page->private = (unsigned long)mp; + SetPagePrivate(page); + kmap(page); + } + return 0; +} + +static inline void remove_metapage(struct page *page, struct metapage *mp) +{ + page->private = 0; + ClearPagePrivate(page); + kunmap(page); +} + +#define inc_io(page) do {} while(0) +#define dec_io(page, handler) handler(page) + +#endif + static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; @@ -139,16 +228,6 @@ int __init metapage_init(void) kmem_cache_destroy(metapage_cache); return -ENOMEM; } - /* - * Now the hash list - */ - for (hash_order = 0; - ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE; - hash_order++); - hash_table = - (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order); - assert(hash_table); - memset(hash_table, 0, PAGE_SIZE << hash_order); return 0; } @@ -159,73 +238,388 @@ void metapage_exit(void) kmem_cache_destroy(metapage_cache); } +static inline void drop_metapage(struct page *page, struct metapage *mp) +{ + if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) || + test_bit(META_io, &mp->flag)) + return; + remove_metapage(page, mp); + INCREMENT(mpStat.pagefree); + free_metapage(mp); +} + /* - * Basically same hash as in pagemap.h, but using our hash table + * Metapage address space operations */ -static struct metapage **meta_hash(struct address_space *mapping, - unsigned long index) + +static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, + unsigned int *len) { -#define i (((unsigned long)mapping)/ \ - (sizeof(struct inode) & ~(sizeof(struct inode) -1 ))) -#define s(x) ((x) + ((x) >> HASH_BITS)) - return hash_table + (s(i + index) & (HASH_SIZE - 1)); -#undef i -#undef s + int rc = 0; + int xflag; + s64 xaddr; + sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >> + inode->i_blkbits; + + if (lblock >= file_blocks) + return 0; + if (lblock + *len > file_blocks) + *len = file_blocks - lblock; + + if (inode->i_ino) { + rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0); + if ((rc == 0) && *len) + lblock = (sector_t)xaddr; + else + lblock = 0; + } /* else no mapping */ + + return lblock; } -static struct metapage *search_hash(struct metapage ** hash_ptr, - struct address_space *mapping, - unsigned long index) +static void last_read_complete(struct page *page) { - struct metapage *ptr; + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); +} + +static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done, + int err) +{ + struct page *page = bio->bi_private; + + if (bio->bi_size) + return 1; - for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) { - if ((ptr->mapping == mapping) && (ptr->index == index)) - return ptr; + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + printk(KERN_ERR "metapage_read_end_io: I/O error\n"); + SetPageError(page); } - return NULL; + dec_io(page, last_read_complete); + bio_put(bio); + + return 0; } -static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr) +static void remove_from_logsync(struct metapage *mp) { - if (*hash_ptr) - (*hash_ptr)->hash_prev = mp; + struct jfs_log *log = mp->log; + unsigned long flags; +/* + * This can race. Recheck that log hasn't been set to null, and after + * acquiring logsync lock, recheck lsn + */ + if (!log) + return; + + LOGSYNC_LOCK(log, flags); + if (mp->lsn) { + mp->log = NULL; + mp->lsn = 0; + mp->clsn = 0; + log->count--; + list_del(&mp->synclist); + } + LOGSYNC_UNLOCK(log, flags); +} - mp->hash_prev = NULL; - mp->hash_next = *hash_ptr; - *hash_ptr = mp; +static void last_write_complete(struct page *page) +{ + struct metapage *mp; + unsigned int offset; + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + if (mp && test_bit(META_io, &mp->flag)) { + if (mp->lsn) + remove_from_logsync(mp); + clear_bit(META_io, &mp->flag); + } + /* + * I'd like to call drop_metapage here, but I don't think it's + * safe unless I have the page locked + */ + } + end_page_writeback(page); } -static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr) +static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done, + int err) { - if (mp->hash_prev) - mp->hash_prev->hash_next = mp->hash_next; - else { - assert(*hash_ptr == mp); - *hash_ptr = mp->hash_next; + struct page *page = bio->bi_private; + + BUG_ON(!PagePrivate(page)); + + if (bio->bi_size) + return 1; + + if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) { + printk(KERN_ERR "metapage_write_end_io: I/O error\n"); + SetPageError(page); + } + dec_io(page, last_write_complete); + bio_put(bio); + return 0; +} + +static int metapage_writepage(struct page *page, struct writeback_control *wbc) +{ + struct bio *bio = NULL; + unsigned int block_offset; /* block offset of mp within page */ + struct inode *inode = page->mapping->host; + unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; + unsigned int len; + unsigned int xlen; + struct metapage *mp; + int redirty = 0; + sector_t lblock; + sector_t pblock; + sector_t next_block = 0; + sector_t page_start; + unsigned long bio_bytes = 0; + unsigned long bio_offset = 0; + unsigned int offset; + + page_start = (sector_t)page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + BUG_ON(!PageLocked(page)); + BUG_ON(PageWriteback(page)); + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + + if (!mp || !test_bit(META_dirty, &mp->flag)) + continue; + + if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) { + redirty = 1; + continue; + } + + clear_bit(META_dirty, &mp->flag); + block_offset = offset >> inode->i_blkbits; + lblock = page_start + block_offset; + if (bio) { + if (xlen && lblock == next_block) { + /* Contiguous, in memory & on disk */ + len = min(xlen, blocks_per_mp); + xlen -= len; + bio_bytes += len << inode->i_blkbits; + set_bit(META_io, &mp->flag); + continue; + } + /* Not contiguous */ + if (bio_add_page(bio, page, bio_bytes, bio_offset) < + bio_bytes) + goto add_failed; + /* + * Increment counter before submitting i/o to keep + * count from hitting zero before we're through + */ + inc_io(page); + if (!bio->bi_size) + goto dump_bio; + submit_bio(WRITE, bio); + bio = NULL; + } else { + set_page_writeback(page); + inc_io(page); + } + xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; + pblock = metapage_get_blocks(inode, lblock, &xlen); + if (!pblock) { + /* Need better error handling */ + printk(KERN_ERR "JFS: metapage_get_blocks failed\n"); + dec_io(page, last_write_complete); + continue; + } + set_bit(META_io, &mp->flag); + len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage); + + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_sector = pblock << (inode->i_blkbits - 9); + bio->bi_end_io = metapage_write_end_io; + bio->bi_private = page; + + /* Don't call bio_add_page yet, we may add to this vec */ + bio_offset = offset; + bio_bytes = len << inode->i_blkbits; + + xlen -= len; + next_block = lblock + len; + } + if (bio) { + if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes) + goto add_failed; + if (!bio->bi_size) + goto dump_bio; + + submit_bio(WRITE, bio); + } + if (redirty) + redirty_page_for_writepage(wbc, page); + + unlock_page(page); + + return 0; +add_failed: + /* We should never reach here, since we're only adding one vec */ + printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); + goto skip; +dump_bio: + dump_mem("bio", bio, sizeof(*bio)); +skip: + bio_put(bio); + unlock_page(page); + dec_io(page, last_write_complete); + + return -EIO; +} + +static int metapage_readpage(struct file *fp, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct bio *bio = NULL; + unsigned int block_offset; + unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; + sector_t page_start; /* address of page in fs blocks */ + sector_t pblock; + unsigned int xlen; + unsigned int len; + unsigned int offset; + + BUG_ON(!PageLocked(page)); + page_start = (sector_t)page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + + block_offset = 0; + while (block_offset < blocks_per_page) { + xlen = blocks_per_page - block_offset; + pblock = metapage_get_blocks(inode, page_start + block_offset, + &xlen); + if (pblock) { + if (!PagePrivate(page)) + insert_metapage(page, NULL); + inc_io(page); + if (bio) + submit_bio(READ, bio); + + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_sector = pblock << (inode->i_blkbits - 9); + bio->bi_end_io = metapage_read_end_io; + bio->bi_private = page; + len = xlen << inode->i_blkbits; + offset = block_offset << inode->i_blkbits; + if (bio_add_page(bio, page, len, offset) < len) + goto add_failed; + block_offset += xlen; + } else + block_offset++; } + if (bio) + submit_bio(READ, bio); + else + unlock_page(page); + + return 0; - if (mp->hash_next) - mp->hash_next->hash_prev = mp->hash_prev; +add_failed: + printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); + bio_put(bio); + dec_io(page, last_read_complete); + return -EIO; } +static int metapage_releasepage(struct page *page, int gfp_mask) +{ + struct metapage *mp; + int busy = 0; + unsigned int offset; + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + + if (!mp) + continue; + + jfs_info("metapage_releasepage: mp = 0x%p", mp); + if (mp->count || mp->nohomeok) { + jfs_info("count = %ld, nohomeok = %d", mp->count, + mp->nohomeok); + busy = 1; + continue; + } + wait_on_page_writeback(page); + //WARN_ON(test_bit(META_dirty, &mp->flag)); + if (test_bit(META_dirty, &mp->flag)) { + dump_mem("dirty mp in metapage_releasepage", mp, + sizeof(struct metapage)); + dump_mem("page", page, sizeof(struct page)); + dump_stack(); + } + WARN_ON(mp->lsn); + if (mp->lsn) + remove_from_logsync(mp); + remove_metapage(page, mp); + INCREMENT(mpStat.pagefree); + free_metapage(mp); + } + if (busy) + return -1; + + return 0; +} + +static int metapage_invalidatepage(struct page *page, unsigned long offset) +{ + BUG_ON(offset); + + if (PageWriteback(page)) + return 0; + + return metapage_releasepage(page, 0); +} + +struct address_space_operations jfs_metapage_aops = { + .readpage = metapage_readpage, + .writepage = metapage_writepage, + .sync_page = block_sync_page, + .releasepage = metapage_releasepage, + .invalidatepage = metapage_invalidatepage, + .set_page_dirty = __set_page_dirty_nobuffers, +}; + struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, unsigned int size, int absolute, unsigned long new) { - struct metapage **hash_ptr; int l2BlocksPerPage; int l2bsize; struct address_space *mapping; - struct metapage *mp; + struct metapage *mp = NULL; + struct page *page; unsigned long page_index; unsigned long page_offset; - jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock); - + jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d", + inode->i_ino, lblock, absolute); + + l2bsize = inode->i_blkbits; + l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; + page_index = lblock >> l2BlocksPerPage; + page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize; + if ((page_offset + size) > PAGE_CACHE_SIZE) { + jfs_err("MetaData crosses page boundary!!"); + jfs_err("lblock = %lx, size = %d", lblock, size); + dump_stack(); + return NULL; + } if (absolute) - mapping = inode->i_sb->s_bdev->bd_inode->i_mapping; + mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping; else { /* * If an nfs client tries to read an inode that is larger @@ -237,312 +631,212 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, mapping = inode->i_mapping; } - hash_ptr = meta_hash(mapping, lblock); -again: - spin_lock(&meta_lock); - mp = search_hash(hash_ptr, mapping, lblock); + if (new && (PSIZE == PAGE_CACHE_SIZE)) { + page = grab_cache_page(mapping, page_index); + if (!page) { + jfs_err("grab_cache_page failed!"); + return NULL; + } + SetPageUptodate(page); + } else { + page = read_cache_page(mapping, page_index, + (filler_t *)mapping->a_ops->readpage, NULL); + if (IS_ERR(page)) { + jfs_err("read_cache_page failed!"); + return NULL; + } + lock_page(page); + } + + mp = page_to_mp(page, page_offset); if (mp) { - page_found: - if (test_bit(META_stale, &mp->flag)) { - spin_unlock(&meta_lock); - msleep(1); - goto again; + if (mp->logical_size != size) { + jfs_error(inode->i_sb, + "__get_metapage: mp->logical_size != size"); + jfs_err("logical_size = %d, size = %d", + mp->logical_size, size); + dump_stack(); + goto unlock; } mp->count++; lock_metapage(mp); - spin_unlock(&meta_lock); if (test_bit(META_discard, &mp->flag)) { if (!new) { jfs_error(inode->i_sb, "__get_metapage: using a " "discarded metapage"); - release_metapage(mp); - return NULL; + discard_metapage(mp); + goto unlock; } clear_bit(META_discard, &mp->flag); } - jfs_info("__get_metapage: found 0x%p, in hash", mp); - if (mp->logical_size != size) { - jfs_error(inode->i_sb, - "__get_metapage: mp->logical_size != size"); - release_metapage(mp); - return NULL; - } } else { - l2bsize = inode->i_blkbits; - l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; - page_index = lblock >> l2BlocksPerPage; - page_offset = (lblock - (page_index << l2BlocksPerPage)) << - l2bsize; - if ((page_offset + size) > PAGE_CACHE_SIZE) { - spin_unlock(&meta_lock); - jfs_err("MetaData crosses page boundary!!"); - return NULL; - } - - /* - * Locks held on aggregate inode pages are usually - * not held long, and they are taken in critical code - * paths (committing dirty inodes, txCommit thread) - * - * Attempt to get metapage without blocking, tapping into - * reserves if necessary. - */ - mp = NULL; - if (JFS_IP(inode)->fileset == AGGREGATE_I) { - mp = alloc_metapage(GFP_ATOMIC); - if (!mp) { - /* - * mempool is supposed to protect us from - * failing here. We will try a blocking - * call, but a deadlock is possible here - */ - printk(KERN_WARNING - "__get_metapage: atomic call to mempool_alloc failed.\n"); - printk(KERN_WARNING - "Will attempt blocking call\n"); - } - } - if (!mp) { - struct metapage *mp2; - - spin_unlock(&meta_lock); - mp = alloc_metapage(GFP_NOFS); - spin_lock(&meta_lock); - - /* we dropped the meta_lock, we need to search the - * hash again. - */ - mp2 = search_hash(hash_ptr, mapping, lblock); - if (mp2) { - free_metapage(mp); - mp = mp2; - goto page_found; - } - } + INCREMENT(mpStat.pagealloc); + mp = alloc_metapage(GFP_NOFS); + mp->page = page; mp->flag = 0; - lock_metapage(mp); - if (absolute) - set_bit(META_absolute, &mp->flag); mp->xflag = COMMIT_PAGE; mp->count = 1; - atomic_set(&mp->nohomeok,0); - mp->mapping = mapping; - mp->index = lblock; - mp->page = NULL; + mp->nohomeok = 0; mp->logical_size = size; - add_to_hash(mp, hash_ptr); - spin_unlock(&meta_lock); - - if (new) { - jfs_info("__get_metapage: Calling grab_cache_page"); - mp->page = grab_cache_page(mapping, page_index); - if (!mp->page) { - jfs_err("grab_cache_page failed!"); - goto freeit; - } else { - INCREMENT(mpStat.pagealloc); - unlock_page(mp->page); - } - } else { - jfs_info("__get_metapage: Calling read_cache_page"); - mp->page = read_cache_page(mapping, lblock, - (filler_t *)mapping->a_ops->readpage, NULL); - if (IS_ERR(mp->page)) { - jfs_err("read_cache_page failed!"); - goto freeit; - } else - INCREMENT(mpStat.pagealloc); + mp->data = page_address(page) + page_offset; + mp->index = lblock; + if (unlikely(insert_metapage(page, mp))) { + free_metapage(mp); + goto unlock; } - mp->data = kmap(mp->page) + page_offset; + lock_metapage(mp); } - if (new) + if (new) { + jfs_info("zeroing mp = 0x%p", mp); memset(mp->data, 0, PSIZE); + } - jfs_info("__get_metapage: returning = 0x%p", mp); + unlock_page(page); + jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data); return mp; -freeit: - spin_lock(&meta_lock); - remove_from_hash(mp, hash_ptr); - free_metapage(mp); - spin_unlock(&meta_lock); +unlock: + unlock_page(page); return NULL; } -void hold_metapage(struct metapage * mp, int force) +void grab_metapage(struct metapage * mp) { - spin_lock(&meta_lock); - + jfs_info("grab_metapage: mp = 0x%p", mp); + page_cache_get(mp->page); + lock_page(mp->page); mp->count++; - - if (force) { - ASSERT (!(test_bit(META_forced, &mp->flag))); - if (trylock_metapage(mp)) - set_bit(META_forced, &mp->flag); - } else - lock_metapage(mp); - - spin_unlock(&meta_lock); + lock_metapage(mp); + unlock_page(mp->page); } -static void __write_metapage(struct metapage * mp) +void force_metapage(struct metapage *mp) { - int l2bsize = mp->mapping->host->i_blkbits; - int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; - unsigned long page_index; - unsigned long page_offset; - int rc; - - jfs_info("__write_metapage: mp = 0x%p", mp); - - page_index = mp->page->index; - page_offset = - (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; + struct page *page = mp->page; + jfs_info("force_metapage: mp = 0x%p", mp); + set_bit(META_forcewrite, &mp->flag); + clear_bit(META_sync, &mp->flag); + page_cache_get(page); + lock_page(page); + set_page_dirty(page); + write_one_page(page, 1); + clear_bit(META_forcewrite, &mp->flag); + page_cache_release(page); +} +extern void hold_metapage(struct metapage *mp) +{ lock_page(mp->page); - rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, - page_offset + - mp->logical_size); - if (rc) { - jfs_err("prepare_write return %d!", rc); - ClearPageUptodate(mp->page); +} + +extern void put_metapage(struct metapage *mp) +{ + if (mp->count || mp->nohomeok) { + /* Someone else will release this */ unlock_page(mp->page); - clear_bit(META_dirty, &mp->flag); return; } - rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset, - page_offset + - mp->logical_size); - if (rc) { - jfs_err("commit_write returned %d", rc); - } - + page_cache_get(mp->page); + mp->count++; + lock_metapage(mp); unlock_page(mp->page); - clear_bit(META_dirty, &mp->flag); - - jfs_info("__write_metapage done"); -} - -static inline void sync_metapage(struct metapage *mp) -{ - struct page *page = mp->page; - - page_cache_get(page); - lock_page(page); - - /* we're done with this page - no need to check for errors */ - if (page_has_buffers(page)) - write_one_page(page, 1); - else - unlock_page(page); - page_cache_release(page); + release_metapage(mp); } void release_metapage(struct metapage * mp) { - struct jfs_log *log; - + struct page *page = mp->page; jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); - spin_lock(&meta_lock); - if (test_bit(META_forced, &mp->flag)) { - clear_bit(META_forced, &mp->flag); - mp->count--; - spin_unlock(&meta_lock); - return; - } + BUG_ON(!page); + + lock_page(page); + unlock_metapage(mp); assert(mp->count); - if (--mp->count || atomic_read(&mp->nohomeok)) { - unlock_metapage(mp); - spin_unlock(&meta_lock); + if (--mp->count || mp->nohomeok) { + unlock_page(page); + page_cache_release(page); return; } - if (mp->page) { - set_bit(META_stale, &mp->flag); - spin_unlock(&meta_lock); - kunmap(mp->page); - mp->data = NULL; - if (test_bit(META_dirty, &mp->flag)) - __write_metapage(mp); + if (test_bit(META_dirty, &mp->flag)) { + set_page_dirty(page); if (test_bit(META_sync, &mp->flag)) { - sync_metapage(mp); clear_bit(META_sync, &mp->flag); + write_one_page(page, 1); + lock_page(page); /* write_one_page unlocks the page */ } + } else if (mp->lsn) /* discard_metapage doesn't remove it */ + remove_from_logsync(mp); - if (test_bit(META_discard, &mp->flag)) { - lock_page(mp->page); - block_invalidatepage(mp->page, 0); - unlock_page(mp->page); - } - - page_cache_release(mp->page); - mp->page = NULL; - INCREMENT(mpStat.pagefree); - spin_lock(&meta_lock); - } +#if MPS_PER_PAGE == 1 + /* + * If we know this is the only thing in the page, we can throw + * the page out of the page cache. If pages are larger, we + * don't want to do this. + */ - if (mp->lsn) { - /* - * Remove metapage from logsynclist. - */ - log = mp->log; - LOGSYNC_LOCK(log); - mp->log = NULL; - mp->lsn = 0; - mp->clsn = 0; - log->count--; - list_del(&mp->synclist); - LOGSYNC_UNLOCK(log); + /* Retest mp->count since we may have released page lock */ + if (test_bit(META_discard, &mp->flag) && !mp->count) { + clear_page_dirty(page); + ClearPageUptodate(page); +#ifdef _NOT_YET + if (page->mapping) { + /* Remove from page cache and page cache reference */ + remove_from_page_cache(page); + page_cache_release(page); + metapage_releasepage(page, 0); + } +#endif } - remove_from_hash(mp, meta_hash(mp->mapping, mp->index)); - spin_unlock(&meta_lock); - - free_metapage(mp); +#else + /* Try to keep metapages from using up too much memory */ + drop_metapage(page, mp); +#endif + unlock_page(page); + page_cache_release(page); } void __invalidate_metapages(struct inode *ip, s64 addr, int len) { - struct metapage **hash_ptr; - unsigned long lblock; + sector_t lblock; int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits; + int BlocksPerPage = 1 << l2BlocksPerPage; /* All callers are interested in block device's mapping */ - struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping; + struct address_space *mapping = + JFS_SBI(ip->i_sb)->direct_inode->i_mapping; struct metapage *mp; struct page *page; + unsigned int offset; /* - * First, mark metapages to discard. They will eventually be + * Mark metapages to discard. They will eventually be * released, but should not be written. */ - for (lblock = addr; lblock < addr + len; - lblock += 1 << l2BlocksPerPage) { - hash_ptr = meta_hash(mapping, lblock); -again: - spin_lock(&meta_lock); - mp = search_hash(hash_ptr, mapping, lblock); - if (mp) { - if (test_bit(META_stale, &mp->flag)) { - spin_unlock(&meta_lock); - msleep(1); - goto again; - } + for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len; + lblock += BlocksPerPage) { + page = find_lock_page(mapping, lblock >> l2BlocksPerPage); + if (!page) + continue; + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + if (!mp) + continue; + if (mp->index < addr) + continue; + if (mp->index >= addr + len) + break; clear_bit(META_dirty, &mp->flag); set_bit(META_discard, &mp->flag); - spin_unlock(&meta_lock); - } else { - spin_unlock(&meta_lock); - page = find_lock_page(mapping, lblock>>l2BlocksPerPage); - if (page) { - block_invalidatepage(page, 0); - unlock_page(page); - page_cache_release(page); - } + if (mp->lsn) + remove_from_logsync(mp); } + unlock_page(page); + page_cache_release(page); } } diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index 0e58aba58c3..991e9fb84c7 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -33,38 +33,27 @@ struct metapage { unsigned long flag; /* See Below */ unsigned long count; /* Reference count */ void *data; /* Data pointer */ - - /* list management stuff */ - struct metapage *hash_prev; - struct metapage *hash_next; /* Also used for free list */ - - /* - * mapping & index become redundant, but we need these here to - * add the metapage to the hash before we have the real page - */ - struct address_space *mapping; - unsigned long index; + sector_t index; /* block address of page */ wait_queue_head_t wait; /* implementation */ struct page *page; - unsigned long logical_size; + unsigned int logical_size; /* Journal management */ int clsn; - atomic_t nohomeok; + int nohomeok; struct jfs_log *log; }; /* metapage flag */ #define META_locked 0 -#define META_absolute 1 -#define META_free 2 -#define META_dirty 3 -#define META_sync 4 -#define META_discard 5 -#define META_forced 6 -#define META_stale 7 +#define META_free 1 +#define META_dirty 2 +#define META_sync 3 +#define META_discard 4 +#define META_forcewrite 5 +#define META_io 6 #define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag) @@ -80,7 +69,16 @@ extern struct metapage *__get_metapage(struct inode *inode, __get_metapage(inode, lblock, size, absolute, TRUE) extern void release_metapage(struct metapage *); -extern void hold_metapage(struct metapage *, int); +extern void grab_metapage(struct metapage *); +extern void force_metapage(struct metapage *); + +/* + * hold_metapage and put_metapage are used in conjuction. The page lock + * is not dropped between the two, so no other threads can get or release + * the metapage + */ +extern void hold_metapage(struct metapage *); +extern void put_metapage(struct metapage *); static inline void write_metapage(struct metapage *mp) { @@ -101,6 +99,46 @@ static inline void discard_metapage(struct metapage *mp) release_metapage(mp); } +static inline void metapage_nohomeok(struct metapage *mp) +{ + struct page *page = mp->page; + lock_page(page); + if (!mp->nohomeok++) { + mark_metapage_dirty(mp); + page_cache_get(page); + wait_on_page_writeback(page); + } + unlock_page(page); +} + +/* + * This serializes access to mp->lsn when metapages are added to logsynclist + * without setting nohomeok. i.e. updating imap & dmap + */ +static inline void metapage_wait_for_io(struct metapage *mp) +{ + if (test_bit(META_io, &mp->flag)) + wait_on_page_writeback(mp->page); +} + +/* + * This is called when already holding the metapage + */ +static inline void _metapage_homeok(struct metapage *mp) +{ + if (!--mp->nohomeok) + page_cache_release(mp->page); +} + +static inline void metapage_homeok(struct metapage *mp) +{ + hold_metapage(mp); + _metapage_homeok(mp); + put_metapage(mp); +} + +extern struct address_space_operations jfs_metapage_aops; + /* * This routines invalidate all pages for an extent. */ diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index c535ffd638e..032d111bc33 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -285,11 +285,6 @@ int jfs_mount_rw(struct super_block *sb, int remount) */ logMOUNT(sb); - /* - * Set page cache allocation policy - */ - mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS); - return rc; } diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index f40301d93f7..e93d01aa12c 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -227,6 +227,7 @@ static lid_t txLockAlloc(void) static void txLockFree(lid_t lid) { + TxLock[lid].tid = 0; TxLock[lid].next = TxAnchor.freelock; TxAnchor.freelock = lid; TxAnchor.tlocksInUse--; @@ -566,9 +567,6 @@ void txEnd(tid_t tid) * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag)) { - /* forward log syncpt */ - /* lmSync(log); */ - jfs_info("log barrier off: 0x%x", log->lsn); /* enable new transactions start */ @@ -576,15 +574,22 @@ void txEnd(tid_t tid) /* wakeup all waitors for logsync barrier */ TXN_WAKEUP(&log->syncwait); + + TXN_UNLOCK(); + + /* forward log syncpt */ + jfs_syncpt(log); + + goto wakeup; } } + TXN_UNLOCK(); +wakeup: /* * wakeup all waitors for a free tblock */ TXN_WAKEUP(&TxAnchor.freewait); - - TXN_UNLOCK(); } @@ -633,8 +638,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, /* is page locked by the requester transaction ? */ tlck = lid_to_tlock(lid); - if ((xtid = tlck->tid) == tid) + if ((xtid = tlck->tid) == tid) { + TXN_UNLOCK(); goto grantLock; + } /* * is page locked by anonymous transaction/lock ? @@ -649,6 +656,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, */ if (xtid == 0) { tlck->tid = tid; + TXN_UNLOCK(); tblk = tid_to_tblock(tid); /* * The order of the tlocks in the transaction is important @@ -706,17 +714,18 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, */ tlck->tid = tid; + TXN_UNLOCK(); + /* mark tlock for meta-data page */ if (mp->xflag & COMMIT_PAGE) { tlck->flag = tlckPAGELOCK; /* mark the page dirty and nohomeok */ - mark_metapage_dirty(mp); - atomic_inc(&mp->nohomeok); + metapage_nohomeok(mp); jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", - mp, atomic_read(&mp->nohomeok), tid, tlck); + mp, mp->nohomeok, tid, tlck); /* if anonymous transaction, and buffer is on the group * commit synclist, mark inode to show this. This will @@ -762,8 +771,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; + TXN_LOCK(); list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); + TXN_UNLOCK(); } } @@ -821,8 +832,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, grantLock: tlck->type |= type; - TXN_UNLOCK(); - return tlck; /* @@ -841,11 +850,19 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, BUG(); } INCREMENT(stattx.waitlock); /* statistics */ + TXN_UNLOCK(); release_metapage(mp); + TXN_LOCK(); + xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", tid, xtid, lid); - TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); + + /* Recheck everything since dropping TXN_LOCK */ + if (xtid && (tlck->mp == mp) && (mp->lid == lid)) + TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); + else + TXN_UNLOCK(); jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); return NULL; @@ -906,6 +923,7 @@ static void txUnlock(struct tblock * tblk) struct metapage *mp; struct jfs_log *log; int difft, diffp; + unsigned long flags; jfs_info("txUnlock: tblk = 0x%p", tblk); log = JFS_SBI(tblk->sb)->log; @@ -925,19 +943,14 @@ static void txUnlock(struct tblock * tblk) assert(mp->xflag & COMMIT_PAGE); /* hold buffer - * - * It's possible that someone else has the metapage. - * The only things were changing are nohomeok, which - * is handled atomically, and clsn which is protected - * by the LOGSYNC_LOCK. */ - hold_metapage(mp, 1); + hold_metapage(mp); - assert(atomic_read(&mp->nohomeok) > 0); - atomic_dec(&mp->nohomeok); + assert(mp->nohomeok > 0); + _metapage_homeok(mp); /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); if (mp->clsn) { logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); @@ -945,16 +958,11 @@ static void txUnlock(struct tblock * tblk) mp->clsn = tblk->clsn; } else mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); assert(!(tlck->flag & tlckFREEPAGE)); - if (tlck->flag & tlckWRITEPAGE) { - write_metapage(mp); - } else { - /* release page which has been forced */ - release_metapage(mp); - } + put_metapage(mp); } /* insert tlock, and linelock(s) of the tlock if any, @@ -981,10 +989,10 @@ static void txUnlock(struct tblock * tblk) * has been inserted in logsync list at txUpdateMap()) */ if (tblk->lsn) { - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count--; list_del(&tblk->synclist); - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } } @@ -1573,8 +1581,8 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * the last entry, so don't bother logging this */ mp->lid = 0; - hold_metapage(mp, 0); - atomic_dec(&mp->nohomeok); + grab_metapage(mp); + metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; return 0; @@ -1712,7 +1720,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct maplock *maplock; struct xdlistlock *xadlock; struct pxd_lock *pxdlock; - pxd_t *pxd; + pxd_t *page_pxd; int next, lwm, hwm; ip = tlck->ip; @@ -1722,7 +1730,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); - pxd = &lrd->log.redopage.pxd; + page_pxd = &lrd->log.redopage.pxd; if (tlck->type & tlckBTROOT) { lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); @@ -1752,9 +1760,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); -// *pxd = mp->cm_pxd; - PXDaddress(pxd, mp->index); - PXDlength(pxd, +// *page_pxd = mp->cm_pxd; + PXDaddress(page_pxd, mp->index); + PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); @@ -1776,25 +1784,31 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckALLOCXADLIST; xadlock->count = next - lwm; - if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { + if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i; + pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. + * + * We can fit twice as may pxd's as xads in the lock */ - xadlock->xdlist = &xtlck->pxdlock; - memcpy(xadlock->xdlist, &p->xad[lwm], - sizeof(xad_t) * xadlock->count); - - for (i = 0; i < xadlock->count; i++) + xadlock->flag = mlckALLOCPXDLIST; + pxd = xadlock->xdlist = &xtlck->pxdlock; + for (i = 0; i < xadlock->count; i++) { + PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); + PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); p->xad[lwm + i].flag &= ~(XAD_NEW | XAD_EXTENDED); + pxd++; + } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ + xadlock->flag = mlckALLOCXADLIST; xadlock->xdlist = &p->xad[lwm]; tblk->xflag &= ~COMMIT_LAZY; } @@ -1836,8 +1850,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, if (tblk->xflag & COMMIT_TRUNCATE) { /* write NOREDOPAGE for the page */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); - PXDaddress(pxd, mp->index); - PXDlength(pxd, + PXDaddress(page_pxd, mp->index); + PXDlength(page_pxd, mp->logical_size >> tblk->sb-> s_blocksize_bits); lrd->backchain = @@ -1872,22 +1886,32 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * deleted page itself; */ tlck->flag |= tlckUPDATEMAP; - xadlock->flag = mlckFREEXADLIST; xadlock->count = hwm - XTENTRYSTART + 1; - if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { + if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { + int i; + pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. + * + * We can fit twice as may pxd's as xads in the lock */ - xadlock->xdlist = &xtlck->pxdlock; - memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART], - sizeof(xad_t) * xadlock->count); + xadlock->flag = mlckFREEPXDLIST; + pxd = xadlock->xdlist = &xtlck->pxdlock; + for (i = 0; i < xadlock->count; i++) { + PXDaddress(pxd, + addressXAD(&p->xad[XTENTRYSTART + i])); + PXDlength(pxd, + lengthXAD(&p->xad[XTENTRYSTART + i])); + pxd++; + } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ + xadlock->flag = mlckFREEXADLIST; xadlock->xdlist = &p->xad[XTENTRYSTART]; tblk->xflag &= ~COMMIT_LAZY; } @@ -1918,7 +1942,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * header ? */ if (tlck->type & tlckTRUNCATE) { - pxd_t tpxd; /* truncated extent of xad */ + pxd_t pxd; /* truncated extent of xad */ int twm; /* @@ -1947,8 +1971,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); - PXDaddress(pxd, mp->index); - PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); + PXDaddress(page_pxd, mp->index); + PXDlength(page_pxd, + mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* @@ -1966,7 +1991,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; - tpxd = pxdlock->pxd; /* save to format maplock */ + pxd = pxdlock->pxd; /* save to format maplock */ lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); } @@ -2035,7 +2060,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, pxdlock = (struct pxd_lock *) xadlock; pxdlock->flag = mlckFREEPXD; pxdlock->count = 1; - pxdlock->pxd = tpxd; + pxdlock->pxd = pxd; jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " "hwm:%d", ip, mp, pxdlock->count, hwm); @@ -2253,7 +2278,8 @@ void txForce(struct tblock * tblk) tlck->flag &= ~tlckWRITEPAGE; /* do not release page to freelist */ - + force_metapage(mp); +#if 0 /* * The "right" thing to do here is to * synchronously write the metadata. @@ -2265,9 +2291,10 @@ void txForce(struct tblock * tblk) * we can get by with synchronously writing * the pages when they are released. */ - assert(atomic_read(&mp->nohomeok)); + assert(mp->nohomeok); set_bit(META_dirty, &mp->flag); set_bit(META_sync, &mp->flag); +#endif } } } @@ -2327,7 +2354,7 @@ static void txUpdateMap(struct tblock * tblk) */ mp = tlck->mp; ASSERT(mp->xflag & COMMIT_PAGE); - hold_metapage(mp, 0); + grab_metapage(mp); } /* @@ -2377,8 +2404,8 @@ static void txUpdateMap(struct tblock * tblk) ASSERT(mp->lid == lid); tlck->mp->lid = 0; } - assert(atomic_read(&mp->nohomeok) == 1); - atomic_dec(&mp->nohomeok); + assert(mp->nohomeok == 1); + metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; } @@ -2844,24 +2871,9 @@ static void LogSyncRelease(struct metapage * mp) { struct jfs_log *log = mp->log; - assert(atomic_read(&mp->nohomeok)); + assert(mp->nohomeok); assert(log); - atomic_dec(&mp->nohomeok); - - if (atomic_read(&mp->nohomeok)) - return; - - hold_metapage(mp, 0); - - LOGSYNC_LOCK(log); - mp->log = NULL; - mp->lsn = 0; - mp->clsn = 0; - log->count--; - list_del_init(&mp->synclist); - LOGSYNC_UNLOCK(log); - - release_metapage(mp); + metapage_homeok(mp); } /* diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index f31a9e3f3fe..5cf91785b54 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -49,7 +49,6 @@ */ int jfs_umount(struct super_block *sb) { - struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping; struct jfs_sb_info *sbi = JFS_SBI(sb); struct inode *ipbmap = sbi->ipbmap; struct inode *ipimap = sbi->ipimap; @@ -109,8 +108,8 @@ int jfs_umount(struct super_block *sb) * Make sure all metadata makes it to disk before we mark * the superblock as clean */ - filemap_fdatawrite(bdev_mapping); - filemap_fdatawait(bdev_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); /* * ensure all file system file pages are propagated to their @@ -123,9 +122,6 @@ int jfs_umount(struct super_block *sb) if (log) { /* log = NULL if read-only mount */ updateSuper(sb, FM_CLEAN); - /* Restore default gfp_mask for bdev */ - mapping_set_gfp_mask(bdev_mapping, GFP_USER); - /* * close log: * @@ -140,7 +136,6 @@ int jfs_umount(struct super_block *sb) int jfs_umount_rw(struct super_block *sb) { - struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping; struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; @@ -166,13 +161,10 @@ int jfs_umount_rw(struct super_block *sb) * mark the superblock clean before everything is flushed to * disk. */ - filemap_fdatawrite(bdev_mapping); - filemap_fdatawait(bdev_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); updateSuper(sb, FM_CLEAN); - /* Restore default gfp_mask for bdev */ - mapping_set_gfp_mask(bdev_mapping, GFP_USER); - return lmLogClose(sb); } diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 11c58c54b81..2c1f311914a 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -111,8 +111,8 @@ static struct { /* * forward references */ -static int xtSearch(struct inode *ip, - s64 xoff, int *cmpp, struct btstack * btstack, int flag); +static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, + struct btstack * btstack, int flag); static int xtSplitUp(tid_t tid, struct inode *ip, @@ -159,11 +159,12 @@ int xtLookup(struct inode *ip, s64 lstart, xtpage_t *p; int index; xad_t *xad; - s64 size, xoff, xend; + s64 next, size, xoff, xend; int xlen; s64 xaddr; - *plen = 0; + *paddr = 0; + *plen = llen; if (!no_check) { /* is lookup offset beyond eof ? */ @@ -180,7 +181,7 @@ int xtLookup(struct inode *ip, s64 lstart, * search for the xad entry covering the logical extent */ //search: - if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) { + if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { jfs_err("xtLookup: xtSearch returned %d", rc); return rc; } @@ -198,8 +199,11 @@ int xtLookup(struct inode *ip, s64 lstart, * lstart is a page start address, * i.e., lstart cannot start in a hole; */ - if (cmp) + if (cmp) { + if (next) + *plen = min(next - lstart, llen); goto out; + } /* * lxd covered by xad @@ -284,7 +288,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, if (lstart >= size) return 0; - if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) + if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) return rc; /* @@ -488,6 +492,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, * parameters: * ip - file object; * xoff - extent offset; + * nextp - address of next extent (if any) for search miss * cmpp - comparison result: * btstack - traverse stack; * flag - search process flag (XT_INSERT); @@ -497,7 +502,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, * *cmpp is set to result of comparison with the entry returned. * the page containing the entry is pinned at exit. */ -static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ +static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, int *cmpp, struct btstack * btstack, int flag) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); @@ -511,6 +516,7 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ struct btframe *btsp; int nsplit = 0; /* number of pages to split */ s64 t64; + s64 next = 0; INCREMENT(xtStat.search); @@ -579,6 +585,7 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ * previous and this entry */ *cmpp = 1; + next = t64; goto out; } @@ -623,6 +630,9 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ /* update sequential access heuristics */ jfs_ip->btindex = index; + if (nextp) + *nextp = next; + INCREMENT(xtStat.fastSearch); return 0; } @@ -675,10 +685,11 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ return 0; } - /* search hit - internal page: * descend/search its child page */ + if (index < p->header.nextindex - 1) + next = offsetXAD(&p->xad[index + 1]); goto next; } @@ -694,6 +705,8 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or maxentry index. */ + if (base < p->header.nextindex) + next = offsetXAD(&p->xad[base]); /* * search miss - leaf page: * @@ -727,6 +740,9 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ jfs_ip->btorder = BT_RANDOM; jfs_ip->btindex = base; + if (nextp) + *nextp = next; + return 0; } @@ -793,6 +809,7 @@ int xtInsert(tid_t tid, /* transaction id */ struct xtsplit split; /* split information */ xad_t *xad; int cmp; + s64 next; struct tlock *tlck; struct xtlock *xtlck; @@ -806,7 +823,7 @@ int xtInsert(tid_t tid, /* transaction id */ * n.b. xtSearch() may return index of maxentry of * the full page. */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -814,7 +831,7 @@ int xtInsert(tid_t tid, /* transaction id */ /* This test must follow XT_GETSEARCH since mp must be valid if * we branch to out: */ - if (cmp == 0) { + if ((cmp == 0) || (next && (xlen > next - xoff))) { rc = -EEXIST; goto out; } @@ -1626,7 +1643,7 @@ int xtExtend(tid_t tid, /* transaction id */ jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); /* there must exist extent to be extended */ - if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -1794,7 +1811,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", */ /* there must exist extent to be tailgated */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -1977,7 +1994,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) nxlen = lengthXAD(nxad); nxaddr = addressXAD(nxad); - if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -2291,7 +2308,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (nextindex == le16_to_cpu(p->header.maxentry)) { XT_PUTPAGE(mp); - if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -2438,6 +2455,7 @@ int xtAppend(tid_t tid, /* transaction id */ int nsplit, nblocks, xlen; struct pxdlist pxdlist; pxd_t *pxd; + s64 next; xaddr = *xaddrp; xlen = *xlenp; @@ -2452,7 +2470,7 @@ int xtAppend(tid_t tid, /* transaction id */ * n.b. xtSearch() may return index of maxentry of * the full page. */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -2462,6 +2480,9 @@ int xtAppend(tid_t tid, /* transaction id */ rc = -EEXIST; goto out; } + + if (next) + xlen = min(xlen, (int)(next - xoff)); //insert: /* * insert entry for new extent @@ -2600,7 +2621,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) /* * find the matching entry; xtSearch() pins the page */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) + if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) return rc; XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); @@ -2852,7 +2873,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ */ if (xtype == DATAEXT) { /* search in leaf entry */ - rc = xtSearch(ip, xoff, &cmp, &btstack, 0); + rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); if (rc) return rc; @@ -2958,7 +2979,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ } /* get back parent page */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) + if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) return rc; XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); @@ -3991,7 +4012,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) if (committed_size) { xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; - rc = xtSearch(ip, xoff, &cmp, &btstack, 0); + rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); if (rc) return rc; diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 2eb6869b6e7..c6dc254d325 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -209,6 +209,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) */ txQuiesce(sb); + /* Reset size of direct inode */ + sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size; + if (sbi->mntflag & JFS_INLINELOG) { /* * deactivate old inline log diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 5856866e24f..5e774ed7fb6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -210,6 +210,10 @@ static void jfs_put_super(struct super_block *sb) unload_nls(sbi->nls_tab); sbi->nls_tab = NULL; + truncate_inode_pages(sbi->direct_inode->i_mapping, 0); + iput(sbi->direct_inode); + sbi->direct_inode = NULL; + kfree(sbi); } @@ -358,6 +362,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) } if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + /* + * Invalidate any previously read metadata. fsck may have + * changed the on-disk data since we mounted r/o + */ + truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); + JFS_SBI(sb)->flag = flag; return jfs_mount_rw(sb, 1); } @@ -428,12 +438,26 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &jfs_super_operations; sb->s_export_op = &jfs_export_operations; + /* + * Initialize direct-mapping inode/address-space + */ + inode = new_inode(sb); + if (inode == NULL) + goto out_kfree; + inode->i_ino = 0; + inode->i_nlink = 1; + inode->i_size = sb->s_bdev->bd_inode->i_size; + inode->i_mapping->a_ops = &jfs_metapage_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + + sbi->direct_inode = inode; + rc = jfs_mount(sb); if (rc) { if (!silent) { jfs_err("jfs_mount failed w/return code = %d", rc); } - goto out_kfree; + goto out_mount_failed; } if (sb->s_flags & MS_RDONLY) sbi->log = NULL; @@ -482,6 +506,13 @@ out_no_rw: if (rc) { jfs_err("jfs_umount failed with return code %d", rc); } +out_mount_failed: + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); + truncate_inode_pages(sbi->direct_inode->i_mapping, 0); + make_bad_inode(sbi->direct_inode); + iput(sbi->direct_inode); + sbi->direct_inode = NULL; out_kfree: if (sbi->nls_tab) unload_nls(sbi->nls_tab); @@ -527,8 +558,10 @@ static int jfs_sync_fs(struct super_block *sb, int wait) struct jfs_log *log = JFS_SBI(sb)->log; /* log == NULL indicates read-only mount */ - if (log) + if (log) { jfs_flush_journal(log, wait); + jfs_syncpt(log); + } return 0; } diff --git a/fs/mpage.c b/fs/mpage.c index e7d8d1a7760..32c7c8fcfce 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -160,52 +160,6 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) } while (page_bh != head); } -/** - * mpage_readpages - populate an address space with some pages, and - * start reads against them. - * - * @mapping: the address_space - * @pages: The address of a list_head which contains the target pages. These - * pages have their ->index populated and are otherwise uninitialised. - * - * The page at @pages->prev has the lowest file offset, and reads should be - * issued in @pages->prev to @pages->next order. - * - * @nr_pages: The number of pages at *@pages - * @get_block: The filesystem's block mapper function. - * - * This function walks the pages and the blocks within each page, building and - * emitting large BIOs. - * - * If anything unusual happens, such as: - * - * - encountering a page which has buffers - * - encountering a page which has a non-hole after a hole - * - encountering a page with non-contiguous blocks - * - * then this code just gives up and calls the buffer_head-based read function. - * It does handle a page which has holes at the end - that is a common case: - * the end-of-file on blocksize < PAGE_CACHE_SIZE setups. - * - * BH_Boundary explanation: - * - * There is a problem. The mpage read code assembles several pages, gets all - * their disk mappings, and then submits them all. That's fine, but obtaining - * the disk mappings may require I/O. Reads of indirect blocks, for example. - * - * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be - * submitted in the following order: - * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 - * because the indirect block has to be read to get the mappings of blocks - * 13,14,15,16. Obviously, this impacts performance. - * - * So what we do it to allow the filesystem's get_block() function to set - * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block - * after this one will require I/O against a block which is probably close to - * this one. So you should push what I/O you have currently accumulated. - * - * This all causes the disk requests to be issued in the correct order. - */ static struct bio * do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, sector_t *last_block_in_bio, get_block_t get_block) @@ -320,6 +274,52 @@ confused: goto out; } +/** + * mpage_readpages - populate an address space with some pages, and + * start reads against them. + * + * @mapping: the address_space + * @pages: The address of a list_head which contains the target pages. These + * pages have their ->index populated and are otherwise uninitialised. + * + * The page at @pages->prev has the lowest file offset, and reads should be + * issued in @pages->prev to @pages->next order. + * + * @nr_pages: The number of pages at *@pages + * @get_block: The filesystem's block mapper function. + * + * This function walks the pages and the blocks within each page, building and + * emitting large BIOs. + * + * If anything unusual happens, such as: + * + * - encountering a page which has buffers + * - encountering a page which has a non-hole after a hole + * - encountering a page with non-contiguous blocks + * + * then this code just gives up and calls the buffer_head-based read function. + * It does handle a page which has holes at the end - that is a common case: + * the end-of-file on blocksize < PAGE_CACHE_SIZE setups. + * + * BH_Boundary explanation: + * + * There is a problem. The mpage read code assembles several pages, gets all + * their disk mappings, and then submits them all. That's fine, but obtaining + * the disk mappings may require I/O. Reads of indirect blocks, for example. + * + * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be + * submitted in the following order: + * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 + * because the indirect block has to be read to get the mappings of blocks + * 13,14,15,16. Obviously, this impacts performance. + * + * So what we do it to allow the filesystem's get_block() function to set + * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block + * after this one will require I/O against a block which is probably close to + * this one. So you should push what I/O you have currently accumulated. + * + * This all causes the disk requests to be issued in the correct order. + */ int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block) @@ -727,6 +727,8 @@ retry: &last_block_in_bio, &ret, wbc, writepage_fn); } + if (unlikely(ret == WRITEPAGE_ACTIVATE)) + unlock_page(page); if (ret || (--(wbc->nr_to_write) <= 0)) done = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { diff --git a/fs/proc/base.c b/fs/proc/base.c index 57554bfbed7..e31903aadd9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1419,6 +1419,8 @@ static struct file_operations proc_tgid_attr_operations; static struct inode_operations proc_tgid_attr_inode_operations; #endif +static int get_tid_list(int index, unsigned int *tids, struct inode *dir); + /* SMP-safe */ static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, @@ -1458,7 +1460,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, */ switch(p->type) { case PROC_TGID_TASK: - inode->i_nlink = 3; + inode->i_nlink = 2 + get_tid_list(2, NULL, dir); inode->i_op = &proc_task_inode_operations; inode->i_fop = &proc_task_operations; break; @@ -1701,13 +1703,13 @@ static struct inode_operations proc_self_inode_operations = { }; /** - * proc_pid_unhash - Unhash /proc/<pid> entry from the dcache. + * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. * @p: task that should be flushed. * - * Drops the /proc/<pid> dcache entry from the hash chains. + * Drops the /proc/@pid dcache entry from the hash chains. * - * Dropping /proc/<pid> entries and detach_pid must be synchroneous, - * otherwise e.g. /proc/<pid>/exe might point to the wrong executable, + * Dropping /proc/@pid entries and detach_pid must be synchroneous, + * otherwise e.g. /proc/@pid/exe might point to the wrong executable, * if the pid value is immediately reused. This is enforced by * - caller must acquire spin_lock(p->proc_lock) * - must be called before detach_pid() @@ -1739,8 +1741,8 @@ struct dentry *proc_pid_unhash(struct task_struct *p) } /** - * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries - * @proc_entry: directoy to prune. + * proc_pid_flush - recover memory used by stale /proc/@pid/x entries + * @proc_dentry: directoy to prune. * * Shrink the /proc directory that was used by the just killed thread. */ @@ -1800,8 +1802,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; - inode->i_nlink = 3; inode->i_flags|=S_IMMUTABLE; +#ifdef CONFIG_SECURITY + inode->i_nlink = 5; +#else + inode->i_nlink = 4; +#endif dentry->d_op = &pid_base_dentry_operations; @@ -1855,8 +1861,12 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; - inode->i_nlink = 3; inode->i_flags|=S_IMMUTABLE; +#ifdef CONFIG_SECURITY + inode->i_nlink = 4; +#else + inode->i_nlink = 3; +#endif dentry->d_op = &pid_base_dentry_operations; @@ -1935,7 +1945,8 @@ static int get_tid_list(int index, unsigned int *tids, struct inode *dir) if (--index >= 0) continue; - tids[nr_tids] = tid; + if (tids != NULL) + tids[nr_tids] = tid; nr_tids++; if (nr_tids >= PROC_MAXPIDS) break; @@ -2035,6 +2046,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi } nr_tids = get_tid_list(pos, tid_array, inode); + inode->i_nlink = pos + nr_tids; for (i = 0; i < nr_tids; i++) { unsigned long j = PROC_NUMBUF; diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index a4e2ed544bb..49c479c9454 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -260,8 +260,9 @@ static inline int block_group_used(struct super_block *s, u32 id) { /* * the packing is returned in disk byte order */ -u32 reiserfs_choose_packing(struct inode *dir) { - u32 packing; +__le32 reiserfs_choose_packing(struct inode *dir) +{ + __le32 packing; if (TEST_OPTION(packing_groups, dir->i_sb)) { u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); /* @@ -655,7 +656,7 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t *hint) struct buffer_head * bh; struct item_head * ih; int pos_in_item; - __u32 * item; + __le32 * item; int ret = 0; if (!hint->path) /* reiserfs code can call this function w/o pointer to path @@ -736,7 +737,7 @@ static inline int this_blocknr_allocation_would_make_it_a_large_file(reiserfs_bl #ifdef DISPLACE_NEW_PACKING_LOCALITIES static inline void displace_new_packing_locality (reiserfs_blocknr_hint_t *hint) { - struct reiserfs_key * key = &hint->key; + struct in_core_key * key = &hint->key; hint->th->displace_new_blocks = 0; hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg); @@ -777,7 +778,7 @@ static inline int old_way (reiserfs_blocknr_hint_t * hint) static inline void hundredth_slices (reiserfs_blocknr_hint_t * hint) { - struct reiserfs_key * key = &hint->key; + struct in_core_key * key = &hint->key; b_blocknr_t slice_start; slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100); diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index d1514a9b051..fbde4b01a32 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -209,8 +209,8 @@ static int reiserfs_readdir (struct file * filp, void * dirent, filldir_t filldi /* compose directory item containing "." and ".." entries (entries are not aligned to 4 byte boundary) */ /* the last four params are LE */ -void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid, - __u32 par_dirid, __u32 par_objid) +void make_empty_dir_item_v1 (char * body, __le32 dirid, __le32 objid, + __le32 par_dirid, __le32 par_objid) { struct reiserfs_de_head * deh; @@ -242,8 +242,8 @@ void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid, } /* compose directory item containing "." and ".." entries */ -void make_empty_dir_item (char * body, __u32 dirid, __u32 objid, - __u32 par_dirid, __u32 par_objid) +void make_empty_dir_item (char * body, __le32 dirid, __le32 objid, + __le32 par_dirid, __le32 par_objid) { struct reiserfs_de_head * deh; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 26950113af8..2230afff187 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -166,7 +166,7 @@ static int reiserfs_allocate_blocks_for_region( struct cpu_key key; // cpu key of item that we are going to deal with struct item_head *ih; // pointer to item head that we are going to deal with struct buffer_head *bh; // Buffer head that contains items that we are going to deal with - __u32 * item; // pointer to item we are going to deal with + __le32 * item; // pointer to item we are going to deal with INITIALIZE_PATH(path); // path to item, that we are going to deal with. b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. reiserfs_blocknr_hint_t hint; // hint structure for block allocator. @@ -891,7 +891,7 @@ static int reiserfs_prepare_file_region_for_write( struct item_head *ih = NULL; // pointer to item head that we are going to deal with struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with INITIALIZE_PATH(path); // path to item, that we are going to deal with. - __u32 * item=NULL; // pointer to item we are going to deal with + __le32 * item=NULL; // pointer to item we are going to deal with int item_pos=-1; /* Position in indirect item */ @@ -1284,10 +1284,11 @@ static ssize_t reiserfs_file_write( struct file *file, /* the file we are going reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits)); reiserfs_write_unlock(inode->i_sb); - if ( !num_pages ) { /* If we do not have enough space even for */ - res = -ENOSPC; /* single page, return -ENOSPC */ - if ( pos > (inode->i_size & (inode->i_sb->s_blocksize-1))) - break; // In case we are writing past the file end, break. + if ( !num_pages ) { /* If we do not have enough space even for a single page... */ + if ( pos > inode->i_size+inode->i_sb->s_blocksize-(pos & (inode->i_sb->s_blocksize-1))) { + res = -ENOSPC; + break; // In case we are writing past the end of the last file block, break. + } // Otherwise we are possibly overwriting the file, so // let's set write size to be equal or less than blocksize. // This way we get it correctly for file holes. diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 7543031396f..2711dff1b7b 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -173,7 +173,7 @@ static inline void fix_tail_page_for_writing(struct page *page) { done already or non-hole position has been found in the indirect item */ static inline int allocation_needed (int retval, b_blocknr_t allocated, struct item_head * ih, - __u32 * item, int pos_in_item) + __le32 * item, int pos_in_item) { if (allocated) return 0; @@ -278,7 +278,7 @@ research: bh = get_last_bh (&path); ih = get_ih (&path); if (is_indirect_le_ih (ih)) { - __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih); + __le32 * ind_item = (__le32 *)B_I_PITEM (bh, ih); /* FIXME: here we could cache indirect item or part of it in the inode to avoid search_by_key in case of subsequent @@ -581,7 +581,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block, struct cpu_key key; struct buffer_head * bh, * unbh = NULL; struct item_head * ih, tmp_ih; - __u32 * item; + __le32 * item; int done; int fs_gen; struct reiserfs_transaction_handle *th = NULL; @@ -746,7 +746,7 @@ start_trans: done = 0; do { if (is_statdata_le_ih (ih)) { - __u32 unp = 0; + __le32 unp = 0; struct cpu_key tmp_key; /* indirect item has to be inserted */ @@ -1341,8 +1341,8 @@ void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args key.version = KEY_FORMAT_3_5; key.on_disk_key.k_dir_id = dirino; key.on_disk_key.k_objectid = inode->i_ino; - key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET; - key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS; + key.on_disk_key.k_offset = 0; + key.on_disk_key.k_type = 0; /* look for the object's stat data */ retval = search_item (inode->i_sb, &key, &path_to_sd); @@ -2067,7 +2067,7 @@ static int map_block_for_writepage(struct inode *inode, struct item_head tmp_ih ; struct item_head *ih ; struct buffer_head *bh ; - __u32 *item ; + __le32 *item ; struct cpu_key key ; INITIALIZE_PATH(path) ; int pos_in_item ; diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index 9cf7c13b120..0ce33db1acd 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c @@ -296,10 +296,11 @@ static void print_sequence (__u32 start, int len) static void indirect_print_item (struct item_head * ih, char * item) { int j; - __u32 * unp, prev = INT_MAX; + __le32 * unp; + __u32 prev = INT_MAX; int num; - unp = (__u32 *)item; + unp = (__le32 *)item; if (ih_item_len(ih) % UNFM_P_SIZE) reiserfs_warning (NULL, "indirect_print_item: invalid item len"); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c9ad3a7849f..3072cfdee95 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2306,13 +2306,16 @@ static int journal_init_dev( struct super_block *super, if( !IS_ERR( journal -> j_dev_file ) ) { struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; if( !S_ISBLK( jdev_inode -> i_mode ) ) { - reiserfs_warning (super, "journal_init_dev: '%s' is " - "not a block device", jdev_name ); + reiserfs_warning(super, "journal_init_dev: '%s' is " + "not a block device", jdev_name ); result = -ENOTBLK; + release_journal_dev( super, journal ); } else { /* ok */ journal->j_dev_bd = I_BDEV(jdev_inode); set_blocksize(journal->j_dev_bd, super->s_blocksize); + reiserfs_info(super, "journal_init_dev: journal device: %s\n", + bdevname(journal->j_dev_bd, b)); } } else { result = PTR_ERR( journal -> j_dev_file ); @@ -2321,11 +2324,6 @@ static int journal_init_dev( struct super_block *super, "journal_init_dev: Cannot open '%s': %i", jdev_name, result ); } - if( result != 0 ) { - release_journal_dev( super, journal ); - } - reiserfs_info(super, "journal_init_dev: journal device: %s\n", - bdevname(journal->j_dev_bd, b)); return result; } @@ -2393,7 +2391,7 @@ int journal_init(struct super_block *p_s_sb, const char * j_dev_name, int old_fo jh = (struct reiserfs_journal_header *)(bhjh->b_data); /* make sure that journal matches to the super block */ - if (is_reiserfs_jr(rs) && (jh->jh_journal.jp_journal_magic != sb_jp_journal_magic(rs))) { + if (is_reiserfs_jr(rs) && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != sb_jp_journal_magic(rs))) { reiserfs_warning (p_s_sb, "sh-460: journal header magic %x " "(device %s) does not match to magic found in super " "block %x", diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c index 0785c43a748..bfe8e25ef29 100644 --- a/fs/reiserfs/objectid.c +++ b/fs/reiserfs/objectid.c @@ -11,13 +11,13 @@ // find where objectid map starts #define objectid_map(s,rs) (old_format_only (s) ? \ - (__u32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ - (__u32 *)((rs) + 1)) + (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ + (__le32 *)((rs) + 1)) #ifdef CONFIG_REISERFS_CHECK -static void check_objectid_map (struct super_block * s, __u32 * map) +static void check_objectid_map (struct super_block * s, __le32 * map) { if (le32_to_cpu (map[0]) != 1) reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted: %lx", @@ -27,7 +27,7 @@ static void check_objectid_map (struct super_block * s, __u32 * map) } #else -static void check_objectid_map (struct super_block * s, __u32 * map) +static void check_objectid_map (struct super_block * s, __le32 * map) {;} #endif @@ -52,7 +52,7 @@ __u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th) { struct super_block * s = th->t_super; struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); - __u32 * map = objectid_map (s, rs); + __le32 * map = objectid_map (s, rs); __u32 unused_objectid; BUG_ON (!th->t_trans_id); @@ -97,7 +97,7 @@ void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, { struct super_block * s = th->t_super; struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); - __u32 * map = objectid_map (s, rs); + __le32 * map = objectid_map (s, rs); int i = 0; BUG_ON (!th->t_trans_id); @@ -172,12 +172,12 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s) { int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ; int old_max = sb_oid_maxsize(disk_sb); struct reiserfs_super_block_v1 *disk_sb_v1 ; - __u32 *objectid_map, *new_objectid_map ; + __le32 *objectid_map, *new_objectid_map ; int i ; disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data); - objectid_map = (__u32 *)(disk_sb_v1 + 1) ; - new_objectid_map = (__u32 *)(disk_sb + 1) ; + objectid_map = (__le32 *)(disk_sb_v1 + 1) ; + new_objectid_map = (__le32 *)(disk_sb + 1) ; if (cur_size > new_size) { /* mark everyone used that was listed as free at the end of the objectid diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index f4ea81ae0e0..e242ebc7f6f 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -73,8 +73,8 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset, #define DFL( x ) D4C( rs -> s_v1.x ) #define objectid_map( s, rs ) (old_format_only (s) ? \ - (__u32 *)((struct reiserfs_super_block_v1 *)rs + 1) : \ - (__u32 *)(rs + 1)) + (__le32 *)((struct reiserfs_super_block_v1 *)rs + 1) : \ + (__le32 *)(rs + 1)) #define MAP( i ) D4C( objectid_map( sb, rs )[ i ] ) #define DJF( x ) le32_to_cpu( rs -> x ) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 73ec5212178..da23ba75f3d 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -87,22 +87,20 @@ inline void copy_item_head(struct item_head * p_v_to, inline int comp_short_keys (const struct reiserfs_key * le_key, const struct cpu_key * cpu_key) { - __u32 * p_s_le_u32, * p_s_cpu_u32; - int n_key_length = REISERFS_SHORT_KEY_LEN; - - p_s_le_u32 = (__u32 *)le_key; - p_s_cpu_u32 = (__u32 *)&cpu_key->on_disk_key; - for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) { - if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 ) + __u32 n; + n = le32_to_cpu(le_key->k_dir_id); + if (n < cpu_key->on_disk_key.k_dir_id) return -1; - if ( le32_to_cpu (*p_s_le_u32) > *p_s_cpu_u32 ) + if (n > cpu_key->on_disk_key.k_dir_id) + return 1; + n = le32_to_cpu(le_key->k_objectid); + if (n < cpu_key->on_disk_key.k_objectid) + return -1; + if (n > cpu_key->on_disk_key.k_objectid) return 1; - } - return 0; } - /* k1 is pointer to on-disk structure which is stored in little-endian form. k2 is pointer to cpu variable. Compare keys using all 4 key fields. @@ -152,18 +150,15 @@ inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct re inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from) { + int version; to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id); to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid); // find out version of the key - to->version = le_key_version (from); - if (to->version == KEY_FORMAT_3_5) { - to->on_disk_key.u.k_offset_v1.k_offset = le32_to_cpu (from->u.k_offset_v1.k_offset); - to->on_disk_key.u.k_offset_v1.k_uniqueness = le32_to_cpu (from->u.k_offset_v1.k_uniqueness); - } else { - to->on_disk_key.u.k_offset_v2.k_offset = offset_v2_k_offset(&from->u.k_offset_v2); - to->on_disk_key.u.k_offset_v2.k_type = offset_v2_k_type(&from->u.k_offset_v2); - } + version = le_key_version (from); + to->version = version; + to->on_disk_key.k_offset = le_key_k_offset(version, from); + to->on_disk_key.k_type = le_key_k_type(version, from); } @@ -228,8 +223,14 @@ extern struct tree_balance * cur_tb; const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}}; /* Maximal possible key. It is never in the tree. */ -const struct reiserfs_key MAX_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}}; +const struct reiserfs_key MAX_KEY = { + __constant_cpu_to_le32(0xffffffff), + __constant_cpu_to_le32(0xffffffff), + {{__constant_cpu_to_le32(0xffffffff), + __constant_cpu_to_le32(0xffffffff)},} +}; +const struct in_core_key MAX_IN_CORE_KEY = {~0U, ~0U, ~0ULL>>4, 15}; /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom of the path, and going upwards. We must check the path's validity at each step. If the key is not in @@ -997,7 +998,7 @@ static char prepare_for_delete_or_cut( int n_unfm_number, /* Number of the item unformatted nodes. */ n_counter, n_blk_size; - __u32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */ + __le32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */ __u32 tmp; struct item_head s_ih; /* Item header. */ char c_mode; /* Returned mode of the balance. */ @@ -1059,7 +1060,7 @@ static char prepare_for_delete_or_cut( /* pointers to be cut */ n_unfm_number -= pos_in_item (p_s_path); /* Set pointer to the last unformatted node pointer that is to be cut. */ - p_n_unfm_pointer = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed; + p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed; /* We go through the unformatted nodes pointers of the indirect @@ -1081,8 +1082,8 @@ static char prepare_for_delete_or_cut( need_research = 1 ; break; } - RFALSE( p_n_unfm_pointer < (__u32 *)B_I_PITEM(p_s_bh, &s_ih) || - p_n_unfm_pointer > (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1, + RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) || + p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1, "vs-5265: pointer out of range"); /* Hole, nothing to remove. */ @@ -1431,7 +1432,7 @@ int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode #if defined( USE_INODE_GENERATION_COUNTER ) if( !old_format_only ( th -> t_super ) ) { - __u32 *inode_generation; + __le32 *inode_generation; inode_generation = &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index bcdf2438d15..31e75125f48 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -110,7 +110,7 @@ static void reiserfs_unlockfs(struct super_block *s) { reiserfs_allow_writes(s) ; } -extern const struct reiserfs_key MAX_KEY; +extern const struct in_core_key MAX_IN_CORE_KEY; /* this is used to delete "save link" when there are no items of a @@ -164,7 +164,7 @@ static int finish_unfinished (struct super_block * s) /* compose key to look for "save" links */ max_cpu_key.version = KEY_FORMAT_3_5; - max_cpu_key.on_disk_key = MAX_KEY; + max_cpu_key.on_disk_key = MAX_IN_CORE_KEY; max_cpu_key.key_length = 3; #ifdef CONFIG_QUOTA @@ -216,10 +216,10 @@ static int finish_unfinished (struct super_block * s) /* reiserfs_iget needs k_dirid and k_objectid only */ item = B_I_PITEM (bh, ih); - obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__u32 *)item); + obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__le32 *)item); obj_key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid); - obj_key.on_disk_key.u.k_offset_v1.k_offset = 0; - obj_key.on_disk_key.u.k_offset_v1.k_uniqueness = 0; + obj_key.on_disk_key.k_offset = 0; + obj_key.on_disk_key.k_type = 0; pathrelse (&path); @@ -304,7 +304,7 @@ void add_save_link (struct reiserfs_transaction_handle * th, int retval; struct cpu_key key; struct item_head ih; - __u32 link; + __le32 link; BUG_ON (!th->t_trans_id); @@ -889,12 +889,18 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st char * p; p = NULL; - /* "resize=NNN" */ - *blocks = simple_strtoul (arg, &p, 0); - if (*p != '\0') { - /* NNN does not look like a number */ - reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); - return 0; + /* "resize=NNN" or "resize=auto" */ + + if (!strcmp(arg, "auto")) { + /* From JFS code, to auto-get the size.*/ + *blocks = s->s_bdev->bd_inode->i_size >> s->s_blocksize_bits; + } else { + *blocks = simple_strtoul (arg, &p, 0); + if (*p != '\0') { + /* NNN does not look like a number */ + reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); + return 0; + } } } @@ -903,7 +909,8 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st unsigned long val = simple_strtoul (arg, &p, 0); /* commit=NNN (time in seconds) */ if ( *p != '\0' || val >= (unsigned int)-1) { - reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); return 0; + reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); + return 0; } *commit_max_age = (unsigned int)val; } @@ -1329,7 +1336,7 @@ static int read_super_block (struct super_block * s, int offset) return 1; } - if ( rs->s_v1.s_root_block == -1 ) { + if ( rs->s_v1.s_root_block == cpu_to_le32(-1) ) { brelse(bh) ; reiserfs_warning (s, "Unfinished reiserfsck --rebuild-tree run detected. Please run\n" "reiserfsck --rebuild-tree and wait for a completion. If that fails\n" diff --git a/fs/seq_file.c b/fs/seq_file.c index 650c43ba86c..38ef913767f 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -51,7 +51,10 @@ EXPORT_SYMBOL(seq_open); /** * seq_read - ->read() method for sequential files. - * @file, @buf, @size, @ppos: see file_operations method + * @file: the file to read from + * @buf: the buffer to read to + * @size: the maximum number of bytes to read + * @ppos: the current position in the file * * Ready-made ->f_op->read() */ @@ -219,7 +222,9 @@ Eoverflow: /** * seq_lseek - ->llseek() method for sequential files. - * @file, @offset, @origin: see file_operations method + * @file: the file in question + * @offset: new position + * @origin: 0 for absolute, 1 for relative position * * Ready-made ->f_op->llseek() */ diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index da25aeb0e06..364208071e1 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -96,7 +96,7 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer /** * flush_read_buffer - push buffer to userspace. * @buffer: data buffer for file. - * @userbuf: user-passed buffer. + * @buf: user-passed buffer. * @count: number of bytes requested. * @ppos: file position. * @@ -164,7 +164,7 @@ out: /** * fill_write_buffer - copy buffer from userspace. * @buffer: data buffer for file. - * @userbuf: data from user. + * @buf: data from user. * @count: number of bytes in @userbuf. * * Allocate @buffer->page if it hasn't been already, then diff --git a/fs/udf/file.c b/fs/udf/file.c index 2faa4172b9f..bb40d63f328 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -49,8 +49,7 @@ static int udf_adinicb_readpage(struct file *file, struct page * page) struct inode *inode = page->mapping->host; char *kaddr; - if (!PageLocked(page)) - PAGE_BUG(page); + BUG_ON(!PageLocked(page)); kaddr = kmap(page); memset(kaddr, 0, PAGE_CACHE_SIZE); @@ -67,8 +66,7 @@ static int udf_adinicb_writepage(struct page *page, struct writeback_control *wb struct inode *inode = page->mapping->host; char *kaddr; - if (!PageLocked(page)) - PAGE_BUG(page); + BUG_ON(!PageLocked(page)); kaddr = kmap(page); memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), kaddr, inode->i_size); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 0506e117378..3d68de39fad 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -167,8 +167,8 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err) } page = grab_cache_page(inode->i_mapping, 0); - if (!PageLocked(page)) - PAGE_BUG(page); + BUG_ON(!PageLocked(page)); + if (!PageUptodate(page)) { kaddr = kmap(page); |