diff options
Diffstat (limited to 'fs')
46 files changed, 826 insertions, 393 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1aba036dcab..26e5f502662 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -376,7 +376,7 @@ int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) */ mutex_unlock(&bd_inode->i_mutex); - error = blkdev_issue_flush(bdev, NULL); + error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); if (error == -EOPNOTSUPP) error = 0; @@ -627,41 +627,209 @@ void bd_forget(struct inode *inode) iput(bdev->bd_inode); } -int bd_claim(struct block_device *bdev, void *holder) +/** + * bd_may_claim - test whether a block device can be claimed + * @bdev: block device of interest + * @whole: whole block device containing @bdev, may equal @bdev + * @holder: holder trying to claim @bdev + * + * Test whther @bdev can be claimed by @holder. + * + * CONTEXT: + * spin_lock(&bdev_lock). + * + * RETURNS: + * %true if @bdev can be claimed, %false otherwise. + */ +static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, + void *holder) { - int res; - spin_lock(&bdev_lock); - - /* first decide result */ if (bdev->bd_holder == holder) - res = 0; /* already a holder */ + return true; /* already a holder */ else if (bdev->bd_holder != NULL) - res = -EBUSY; /* held by someone else */ + return false; /* held by someone else */ else if (bdev->bd_contains == bdev) - res = 0; /* is a whole device which isn't held */ + return true; /* is a whole device which isn't held */ - else if (bdev->bd_contains->bd_holder == bd_claim) - res = 0; /* is a partition of a device that is being partitioned */ - else if (bdev->bd_contains->bd_holder != NULL) - res = -EBUSY; /* is a partition of a held device */ + else if (whole->bd_holder == bd_claim) + return true; /* is a partition of a device that is being partitioned */ + else if (whole->bd_holder != NULL) + return false; /* is a partition of a held device */ else - res = 0; /* is a partition of an un-held device */ + return true; /* is a partition of an un-held device */ +} + +/** + * bd_prepare_to_claim - prepare to claim a block device + * @bdev: block device of interest + * @whole: the whole device containing @bdev, may equal @bdev + * @holder: holder trying to claim @bdev + * + * Prepare to claim @bdev. This function fails if @bdev is already + * claimed by another holder and waits if another claiming is in + * progress. This function doesn't actually claim. On successful + * return, the caller has ownership of bd_claiming and bd_holder[s]. + * + * CONTEXT: + * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab + * it multiple times. + * + * RETURNS: + * 0 if @bdev can be claimed, -EBUSY otherwise. + */ +static int bd_prepare_to_claim(struct block_device *bdev, + struct block_device *whole, void *holder) +{ +retry: + /* if someone else claimed, fail */ + if (!bd_may_claim(bdev, whole, holder)) + return -EBUSY; + + /* if someone else is claiming, wait for it to finish */ + if (whole->bd_claiming && whole->bd_claiming != holder) { + wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); + DEFINE_WAIT(wait); + + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&bdev_lock); + schedule(); + finish_wait(wq, &wait); + spin_lock(&bdev_lock); + goto retry; + } + + /* yay, all mine */ + return 0; +} + +/** + * bd_start_claiming - start claiming a block device + * @bdev: block device of interest + * @holder: holder trying to claim @bdev + * + * @bdev is about to be opened exclusively. Check @bdev can be opened + * exclusively and mark that an exclusive open is in progress. Each + * successful call to this function must be matched with a call to + * either bd_claim() or bd_abort_claiming(). If this function + * succeeds, the matching bd_claim() is guaranteed to succeed. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * Pointer to the block device containing @bdev on success, ERR_PTR() + * value on failure. + */ +static struct block_device *bd_start_claiming(struct block_device *bdev, + void *holder) +{ + struct gendisk *disk; + struct block_device *whole; + int partno, err; + + might_sleep(); + + /* + * @bdev might not have been initialized properly yet, look up + * and grab the outer block device the hard way. + */ + disk = get_gendisk(bdev->bd_dev, &partno); + if (!disk) + return ERR_PTR(-ENXIO); + + whole = bdget_disk(disk, 0); + put_disk(disk); + if (!whole) + return ERR_PTR(-ENOMEM); + + /* prepare to claim, if successful, mark claiming in progress */ + spin_lock(&bdev_lock); + + err = bd_prepare_to_claim(bdev, whole, holder); + if (err == 0) { + whole->bd_claiming = holder; + spin_unlock(&bdev_lock); + return whole; + } else { + spin_unlock(&bdev_lock); + bdput(whole); + return ERR_PTR(err); + } +} - /* now impose change */ - if (res==0) { +/* releases bdev_lock */ +static void __bd_abort_claiming(struct block_device *whole, void *holder) +{ + BUG_ON(whole->bd_claiming != holder); + whole->bd_claiming = NULL; + wake_up_bit(&whole->bd_claiming, 0); + + spin_unlock(&bdev_lock); + bdput(whole); +} + +/** + * bd_abort_claiming - abort claiming a block device + * @whole: whole block device returned by bd_start_claiming() + * @holder: holder trying to claim @bdev + * + * Abort a claiming block started by bd_start_claiming(). Note that + * @whole is not the block device to be claimed but the whole device + * returned by bd_start_claiming(). + * + * CONTEXT: + * Grabs and releases bdev_lock. + */ +static void bd_abort_claiming(struct block_device *whole, void *holder) +{ + spin_lock(&bdev_lock); + __bd_abort_claiming(whole, holder); /* releases bdev_lock */ +} + +/** + * bd_claim - claim a block device + * @bdev: block device to claim + * @holder: holder trying to claim @bdev + * + * Try to claim @bdev which must have been opened successfully. This + * function may be called with or without preceding + * blk_start_claiming(). In the former case, this function is always + * successful and terminates the claiming block. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * 0 if successful, -EBUSY if @bdev is already claimed. + */ +int bd_claim(struct block_device *bdev, void *holder) +{ + struct block_device *whole = bdev->bd_contains; + int res; + + might_sleep(); + + spin_lock(&bdev_lock); + + res = bd_prepare_to_claim(bdev, whole, holder); + if (res == 0) { /* note that for a whole device bd_holders * will be incremented twice, and bd_holder will * be set to bd_claim before being set to holder */ - bdev->bd_contains->bd_holders ++; - bdev->bd_contains->bd_holder = bd_claim; + whole->bd_holders++; + whole->bd_holder = bd_claim; bdev->bd_holders++; bdev->bd_holder = holder; } - spin_unlock(&bdev_lock); + + if (whole->bd_claiming) + __bd_abort_claiming(whole, holder); /* releases bdev_lock */ + else + spin_unlock(&bdev_lock); + return res; } - EXPORT_SYMBOL(bd_claim); void bd_release(struct block_device *bdev) @@ -1275,6 +1443,7 @@ EXPORT_SYMBOL(blkdev_get); static int blkdev_open(struct inode * inode, struct file * filp) { + struct block_device *whole = NULL; struct block_device *bdev; int res; @@ -1297,22 +1466,25 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (bdev == NULL) return -ENOMEM; + if (filp->f_mode & FMODE_EXCL) { + whole = bd_start_claiming(bdev, filp); + if (IS_ERR(whole)) { + bdput(bdev); + return PTR_ERR(whole); + } + } + filp->f_mapping = bdev->bd_inode->i_mapping; res = blkdev_get(bdev, filp->f_mode); - if (res) - return res; - if (filp->f_mode & FMODE_EXCL) { - res = bd_claim(bdev, filp); - if (res) - goto out_blkdev_put; + if (whole) { + if (res == 0) + BUG_ON(bd_claim(bdev, filp) != 0); + else + bd_abort_claiming(whole, filp); } - return 0; - - out_blkdev_put: - blkdev_put(bdev, filp->f_mode); return res; } @@ -1523,27 +1695,34 @@ EXPORT_SYMBOL(lookup_bdev); */ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) { - struct block_device *bdev; - int error = 0; + struct block_device *bdev, *whole; + int error; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; + whole = bd_start_claiming(bdev, holder); + if (IS_ERR(whole)) { + bdput(bdev); + return whole; + } + error = blkdev_get(bdev, mode); if (error) - return ERR_PTR(error); + goto out_abort_claiming; + error = -EACCES; if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) - goto blkdev_put; - error = bd_claim(bdev, holder); - if (error) - goto blkdev_put; + goto out_blkdev_put; + BUG_ON(bd_claim(bdev, holder) != 0); return bdev; - -blkdev_put: + +out_blkdev_put: blkdev_put(bdev, mode); +out_abort_claiming: + bd_abort_claiming(whole, holder); return ERR_PTR(error); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b34d32fdaae..c6a4f459ad7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1589,7 +1589,7 @@ static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, - DISCARD_FL_BARRIER); + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); } static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, diff --git a/fs/buffer.c b/fs/buffer.c index 2914d9adfb5..e8aa7081d25 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -275,6 +275,7 @@ void invalidate_bdev(struct block_device *bdev) return; invalidate_bh_lrus(); + lru_add_drain_all(); /* make sure all lru add caches are flushed */ invalidate_mapping_pages(mapping, 0, -1); } EXPORT_SYMBOL(invalidate_bdev); diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 26289e8f416..fcf7487734b 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -90,6 +90,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) * storage */ if (needs_barrier) - blkdev_issue_flush(inode->i_sb->s_bdev, NULL); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); return ret; } diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0d0c3239c1c..ef3d980e67c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) if (ext4_should_writeback_data(inode) && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(inode->i_sb->s_bdev, NULL); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, + NULL, BLKDEV_IFL_WAIT); jbd2_log_wait_commit(journal, commit_tid); } else if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(inode->i_sb->s_bdev, NULL); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); return ret; } diff --git a/fs/fcntl.c b/fs/fcntl.c index 0a140741b39..f74d270ba15 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -14,6 +14,7 @@ #include <linux/dnotify.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/pipe_fs_i.h> #include <linux/security.h> #include <linux/ptrace.h> #include <linux/signal.h> @@ -412,6 +413,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_NOTIFY: err = fcntl_dirnotify(fd, filp, arg); break; + case F_SETPIPE_SZ: + case F_GETPIPE_SZ: + err = pipe_fcntl(filp, cmd, arg); + break; default: break; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 24e85ce1189..5c4161f1fd9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -45,6 +45,7 @@ struct wb_writeback_args { unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; + unsigned int sb_pinned:1; }; /* @@ -192,7 +193,8 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) } static void bdi_alloc_queue_work(struct backing_dev_info *bdi, - struct wb_writeback_args *args) + struct wb_writeback_args *args, + int wait) { struct bdi_work *work; @@ -204,6 +206,8 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, if (work) { bdi_work_init(work, args); bdi_queue_work(bdi, work); + if (wait) + bdi_wait_on_work_clear(work); } else { struct bdi_writeback *wb = &bdi->wb; @@ -230,6 +234,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, + /* + * Setting sb_pinned is not necessary for WB_SYNC_ALL, but + * lets make it explicitly clear. + */ + .sb_pinned = 1, }; struct bdi_work work; @@ -245,21 +254,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * @bdi: the backing device to write from * @sb: write inodes from this super_block * @nr_pages: the number of pages to write + * @sb_locked: caller already holds sb umount sem. * * Description: * This does WB_SYNC_NONE opportunistic writeback. The IO is only * started when this function returns, we make no guarentees on - * completion. Caller need not hold sb s_umount semaphore. + * completion. Caller specifies whether sb umount sem is held already or not. * */ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages) + long nr_pages, int sb_locked) { struct wb_writeback_args args = { .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, + .sb_pinned = sb_locked, }; /* @@ -271,7 +282,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, args.for_background = 1; } - bdi_alloc_queue_work(bdi, &args); + bdi_alloc_queue_work(bdi, &args, sb_locked); } /* @@ -452,11 +463,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) BUG_ON(inode->i_state & I_SYNC); - /* Set I_SYNC, reset I_DIRTY */ - dirty = inode->i_state & I_DIRTY; + /* Set I_SYNC, reset I_DIRTY_PAGES */ inode->i_state |= I_SYNC; - inode->i_state &= ~I_DIRTY; - + inode->i_state &= ~I_DIRTY_PAGES; spin_unlock(&inode_lock); ret = do_writepages(mapping, wbc); @@ -472,6 +481,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ret = err; } + /* + * Some filesystems may redirty the inode during the writeback + * due to delalloc, clear dirty metadata flags right before + * write_inode() + */ + spin_lock(&inode_lock); + dirty = inode->i_state & I_DIRTY; + inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); + spin_unlock(&inode_lock); /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { int err = write_inode(inode, wbc); @@ -577,7 +595,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, /* * Caller must already hold the ref for this */ - if (wbc->sync_mode == WB_SYNC_ALL) { + if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { WARN_ON(!rwsem_is_locked(&sb->s_umount)); return SB_NOT_PINNED; } @@ -751,6 +769,7 @@ static long wb_writeback(struct bdi_writeback *wb, .for_kupdate = args->for_kupdate, .for_background = args->for_background, .range_cyclic = args->range_cyclic, + .sb_pinned = args->sb_pinned, }; unsigned long oldest_jif; long wrote = 0; @@ -852,6 +871,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) unsigned long expired; long nr_pages; + /* + * When set to zero, disable periodic writeback + */ + if (!dirty_writeback_interval) + return 0; + expired = wb->last_old_flush + msecs_to_jiffies(dirty_writeback_interval * 10); if (time_before(jiffies, expired)) @@ -887,6 +912,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) while ((work = get_next_work_item(bdi, wb)) != NULL) { struct wb_writeback_args args = work->args; + int post_clear; /* * Override sync mode, in case we must wait for completion @@ -894,11 +920,13 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) if (force_wait) work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; + post_clear = WB_SYNC_ALL || args.sb_pinned; + /* * If this isn't a data integrity operation, just notify * that we have seen this work and we are now starting it. */ - if (args.sync_mode == WB_SYNC_NONE) + if (!post_clear) wb_clear_pending(wb, work); wrote += wb_writeback(wb, &args); @@ -907,7 +935,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) * This is a data integrity writeback, so only do the * notification when we have completed the work. */ - if (args.sync_mode == WB_SYNC_ALL) + if (post_clear) wb_clear_pending(wb, work); } @@ -947,8 +975,17 @@ int bdi_writeback_task(struct bdi_writeback *wb) break; } - wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); - schedule_timeout_interruptible(wait_jiffies); + if (dirty_writeback_interval) { + wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); + schedule_timeout_interruptible(wait_jiffies); + } else { + set_current_state(TASK_INTERRUPTIBLE); + if (list_empty_careful(&wb->bdi->work_list) && + !kthread_should_stop()) + schedule(); + __set_current_state(TASK_RUNNING); + } + try_to_freeze(); } @@ -974,7 +1011,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages) if (!bdi_has_dirty_io(bdi)) continue; - bdi_alloc_queue_work(bdi, &args); + bdi_alloc_queue_work(bdi, &args, 0); } rcu_read_unlock(); @@ -1183,6 +1220,18 @@ static void wait_sb_inodes(struct super_block *sb) iput(old_inode); } +static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) +{ + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); + long nr_to_write; + + nr_to_write = nr_dirty + nr_unstable + + (inodes_stat.nr_inodes - inodes_stat.nr_unused); + + bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); +} + /** * writeback_inodes_sb - writeback dirty inodes from given super_block * @sb: the superblock @@ -1194,18 +1243,23 @@ static void wait_sb_inodes(struct super_block *sb) */ void writeback_inodes_sb(struct super_block *sb) { - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - long nr_to_write; - - nr_to_write = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); - - bdi_start_writeback(sb->s_bdi, sb, nr_to_write); + __writeback_inodes_sb(sb, 0); } EXPORT_SYMBOL(writeback_inodes_sb); /** + * writeback_inodes_sb_locked - writeback dirty inodes from given super_block + * @sb: the superblock + * + * Like writeback_inodes_sb(), except the caller already holds the + * sb umount sem. + */ +void writeback_inodes_sb_locked(struct super_block *sb) +{ + __writeback_inodes_sb(sb, 1); +} + +/** * writeback_inodes_sb_if_idle - start writeback if none underway * @sb: the superblock * diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8bce73ed4d8..117fa4171f6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, if ((start + nr_sects) != blk) { rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, - DISCARD_FL_BARRIER); + BLKDEV_IFL_WAIT | + BLKDEV_IFL_BARRIER); if (rv) goto fail; nr_sects = 0; @@ -869,7 +870,7 @@ start_new_extent: } if (nr_sects) { rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, - DISCARD_FL_BARRIER); + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); if (rv) goto fail; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 30beb11ef92..076d1cc44f9 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -530,7 +530,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) */ if ((journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, NULL); + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); if (!(journal->j_flags & JBD2_ABORT)) jbd2_journal_update_superblock(journal, 1); return 0; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 671da7fb7ff..75716d3d2be 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -717,7 +717,8 @@ start_journal_io: if (commit_transaction->t_flushed_data_blocks && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, NULL); + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); /* Done it all: now write the commit record asynchronously. */ if (JBD2_HAS_INCOMPAT_FEATURE(journal, @@ -727,7 +728,8 @@ start_journal_io: if (err) __jbd2_journal_abort_hard(journal); if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(journal->j_dev, NULL); + blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); } err = journal_finish_inode_data_buffers(journal, commit_transaction); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index a756168a21c..8c1097327ab 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -674,7 +674,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, start * sects_per_block, nblocks * sects_per_block, GFP_NOFS, - DISCARD_FL_BARRIER); + BLKDEV_IFL_BARRIER); if (ret < 0) return ret; nblocks = 0; @@ -684,7 +684,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, DISCARD_FL_BARRIER); + GFP_NOFS, BLKDEV_IFL_BARRIER); return ret; } diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index a97b477ac0f..6921e7890be 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c @@ -70,14 +70,14 @@ struct riscix_record { #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ defined(CONFIG_ACORN_PARTITION_ADFS) -static int -riscix_partition(struct parsed_partitions *state, struct block_device *bdev, - unsigned long first_sect, int slot, unsigned long nr_sects) +static int riscix_partition(struct parsed_partitions *state, + unsigned long first_sect, int slot, + unsigned long nr_sects) { Sector sect; struct riscix_record *rr; - rr = (struct riscix_record *)read_dev_sector(bdev, first_sect, §); + rr = read_part_sector(state, first_sect, §); if (!rr) return -1; @@ -123,9 +123,9 @@ struct linux_part { #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ defined(CONFIG_ACORN_PARTITION_ADFS) -static int -linux_partition(struct parsed_partitions *state, struct block_device *bdev, - unsigned long first_sect, int slot, unsigned long nr_sects) +static int linux_partition(struct parsed_partitions *state, + unsigned long first_sect, int slot, + unsigned long nr_sects) { Sector sect; struct linux_part *linuxp; @@ -135,7 +135,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev, put_partition(state, slot++, first_sect, size); - linuxp = (struct linux_part *)read_dev_sector(bdev, first_sect, §); + linuxp = read_part_sector(state, first_sect, §); if (!linuxp) return -1; @@ -157,8 +157,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev, #endif #ifdef CONFIG_ACORN_PARTITION_CUMANA -int -adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev) +int adfspart_check_CUMANA(struct parsed_partitions *state) { unsigned long first_sector = 0; unsigned int start_blk = 0; @@ -185,7 +184,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev struct adfs_discrecord *dr; unsigned int nr_sects; - data = read_dev_sector(bdev, start_blk * 2 + 6, §); + data = read_part_sector(state, start_blk * 2 + 6, §); if (!data) return -1; @@ -217,14 +216,14 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev #ifdef CONFIG_ACORN_PARTITION_RISCIX case PARTITION_RISCIX_SCSI: /* RISCiX - we don't know how to find the next one. */ - slot = riscix_partition(state, bdev, first_sector, - slot, nr_sects); + slot = riscix_partition(state, first_sector, slot, + nr_sects); break; #endif case PARTITION_LINUX: - slot = linux_partition(state, bdev, first_sector, - slot, nr_sects); + slot = linux_partition(state, first_sector, slot, + nr_sects); break; } put_dev_sector(sect); @@ -249,8 +248,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev * hda1 = ADFS partition on first drive. * hda2 = non-ADFS partition. */ -int -adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) +int adfspart_check_ADFS(struct parsed_partitions *state) { unsigned long start_sect, nr_sects, sectscyl, heads; Sector sect; @@ -259,7 +257,7 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) unsigned char id; int slot = 1; - data = read_dev_sector(bdev, 6, §); + data = read_part_sector(state, 6, §); if (!data) return -1; @@ -278,21 +276,21 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) /* * Work out start of non-adfs partition. */ - nr_sects = (bdev->bd_inode->i_size >> 9) - start_sect; + nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect; if (start_sect) { switch (id) { #ifdef CONFIG_ACORN_PARTITION_RISCIX case PARTITION_RISCIX_SCSI: case PARTITION_RISCIX_MFM: - slot = riscix_partition(state, bdev, start_sect, - slot, nr_sects); + slot = riscix_partition(state, start_sect, slot, + nr_sects); break; #endif case PARTITION_LINUX: - slot = linux_partition(state, bdev, start_sect, - slot, nr_sects); + slot = linux_partition(state, start_sect, slot, + nr_sects); break; } } @@ -308,10 +306,11 @@ struct ics_part { __le32 size; }; -static int adfspart_check_ICSLinux(struct block_device *bdev, unsigned long block) +static int adfspart_check_ICSLinux(struct parsed_partitions *state, + unsigned long block) { Sector sect; - unsigned char *data = read_dev_sector(bdev, block, §); + unsigned char *data = read_part_sector(state, block, §); int result = 0; if (data) { @@ -349,8 +348,7 @@ static inline int valid_ics_sector(const unsigned char *data) * hda2 = ADFS partition 1 on first drive. * ..etc.. */ -int -adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) +int adfspart_check_ICS(struct parsed_partitions *state) { const unsigned char *data; const struct ics_part *p; @@ -360,7 +358,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) /* * Try ICS style partitions - sector 0 contains partition info. */ - data = read_dev_sector(bdev, 0, §); + data = read_part_sector(state, 0, §); if (!data) return -1; @@ -392,7 +390,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) * partition is. We must not make this visible * to the filesystem. */ - if (size > 1 && adfspart_check_ICSLinux(bdev, start)) { + if (size > 1 && adfspart_check_ICSLinux(state, start)) { start += 1; size -= 1; } @@ -446,8 +444,7 @@ static inline int valid_ptec_sector(const unsigned char *data) * hda2 = ADFS partition 1 on first drive. * ..etc.. */ -int -adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev) +int adfspart_check_POWERTEC(struct parsed_partitions *state) { Sector sect; const unsigned char *data; @@ -455,7 +452,7 @@ adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bd int slot = 1; int i; - data = read_dev_sector(bdev, 0, §); + data = read_part_sector(state, 0, §); if (!data) return -1; @@ -508,8 +505,7 @@ static const char eesox_name[] = { * 1. The individual ADFS boot block entries that are placed on the disk. * 2. The start address of the next entry. */ -int -adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) +int adfspart_check_EESOX(struct parsed_partitions *state) { Sector sect; const unsigned char *data; @@ -518,7 +514,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) sector_t start = 0; int i, slot = 1; - data = read_dev_sector(bdev, 7, §); + data = read_part_sector(state, 7, §); if (!data) return -1; @@ -545,7 +541,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) if (i != 0) { sector_t size; - size = get_capacity(bdev->bd_disk); + size = get_capacity(state->bdev->bd_disk); put_partition(state, slot++, start, size - start); printk("\n"); } diff --git a/fs/partitions/acorn.h b/fs/partitions/acorn.h index 81fd50ecc08..ede82852969 100644 --- a/fs/partitions/acorn.h +++ b/fs/partitions/acorn.h @@ -7,8 +7,8 @@ * format, and everyone stick to it? */ -int adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev); -int adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev); -int adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev); -int adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev); -int adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev); +int adfspart_check_CUMANA(struct parsed_partitions *state); +int adfspart_check_ADFS(struct parsed_partitions *state); +int adfspart_check_ICS(struct parsed_partitions *state); +int adfspart_check_POWERTEC(struct parsed_partitions *state); +int adfspart_check_EESOX(struct parsed_partitions *state); diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c index 9917a8c360f..ba443d4229f 100644 --- a/fs/partitions/amiga.c +++ b/fs/partitions/amiga.c @@ -23,8 +23,7 @@ checksum_block(__be32 *m, int size) return sum; } -int -amiga_partition(struct parsed_partitions *state, struct block_device *bdev) +int amiga_partition(struct parsed_partitions *state) { Sector sect; unsigned char *data; @@ -38,11 +37,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) for (blk = 0; ; blk++, put_dev_sector(sect)) { if (blk == RDB_ALLOCATION_LIMIT) goto rdb_done; - data = read_dev_sector(bdev, blk, §); + data = read_part_sector(state, blk, §); if (!data) { if (warn_no_part) printk("Dev %s: unable to read RDB block %d\n", - bdevname(bdev, b), blk); + bdevname(state->bdev, b), blk); res = -1; goto rdb_done; } @@ -64,7 +63,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) } printk("Dev %s: RDB in block %d has bad checksum\n", - bdevname(bdev, b), blk); + bdevname(state->bdev, b), blk); } /* blksize is blocks per 512 byte standard block */ @@ -75,11 +74,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) put_dev_sector(sect); for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { blk *= blksize; /* Read in terms partition table understands */ - data = read_dev_sector(bdev, blk, §); + data = read_part_sector(state, blk, §); if (!data) { if (warn_no_part) printk("Dev %s: unable to read partition block %d\n", - bdevname(bdev, b), blk); + bdevname(state->bdev, b), blk); res = -1; goto rdb_done; } diff --git a/fs/partitions/amiga.h b/fs/partitions/amiga.h index 2f3e9ce22d5..d094585cada 100644 --- a/fs/partitions/amiga.h +++ b/fs/partitions/amiga.h @@ -2,5 +2,5 @@ * fs/partitions/amiga.h */ -int amiga_partition(struct parsed_partitions *state, struct block_device *bdev); +int amiga_partition(struct parsed_partitions *state); diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c index 1f3572d5b75..4439ff1b6ce 100644 --- a/fs/partitions/atari.c +++ b/fs/partitions/atari.c @@ -30,7 +30,7 @@ static inline int OK_id(char *s) memcmp (s, "RAW", 3) == 0 ; } -int atari_partition(struct parsed_partitions *state, struct block_device *bdev) +int atari_partition(struct parsed_partitions *state) { Sector sect; struct rootsector *rs; @@ -42,12 +42,12 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev) int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ #endif - rs = (struct rootsector *) read_dev_sector(bdev, 0, §); + rs = read_part_sector(state, 0, §); if (!rs) return -1; /* Verify this is an Atari rootsector: */ - hd_size = bdev->bd_inode->i_size >> 9; + hd_size = state->bdev->bd_inode->i_size >> 9; if (!VALID_PARTITION(&rs->part[0], hd_size) && !VALID_PARTITION(&rs->part[1], hd_size) && !VALID_PARTITION(&rs->part[2], hd_size) && @@ -84,7 +84,7 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev) printk(" XGM<"); partsect = extensect = be32_to_cpu(pi->st); while (1) { - xrs = (struct rootsector *)read_dev_sector(bdev, partsect, §2); + xrs = read_part_sector(state, partsect, §2); if (!xrs) { printk (" block %ld read failed\n", partsect); put_dev_sector(sect); diff --git a/fs/partitions/atari.h b/fs/partitions/atari.h index 63186b00e13..fe2d32a89f3 100644 --- a/fs/partitions/atari.h +++ b/fs/partitions/atari.h @@ -31,4 +31,4 @@ struct rootsector u16 checksum; /* checksum for bootable disks */ } __attribute__((__packed__)); -int atari_partition(struct parsed_partitions *state, struct block_device *bdev); +int atari_partition(struct parsed_partitions *state); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e238ab23a9e..5dcd4b0c553 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -45,7 +45,7 @@ extern void md_autodetect_dev(dev_t dev); int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ -static int (*check_part[])(struct parsed_partitions *, struct block_device *) = { +static int (*check_part[])(struct parsed_partitions *) = { /* * Probe partition formats with tables at disk address 0 * that also have an ADFS boot block at 0xdc0. @@ -161,10 +161,11 @@ check_partition(struct gendisk *hd, struct block_device *bdev) struct parsed_partitions *state; int i, res, err; - state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL); + state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); if (!state) return NULL; + state->bdev = bdev; disk_name(hd, 0, state->name); printk(KERN_INFO " %s:", state->name); if (isdigit(state->name[strlen(state->name)-1])) @@ -174,7 +175,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) i = res = err = 0; while (!res && check_part[i]) { memset(&state->parts, 0, sizeof(state->parts)); - res = check_part[i++](state, bdev); + res = check_part[i++](state); if (res < 0) { /* We have hit an I/O error which we don't report now. * But record it, and let the others do their job. @@ -186,6 +187,8 @@ check_partition(struct gendisk *hd, struct block_device *bdev) } if (res > 0) return state; + if (state->access_beyond_eod) + err = -ENOSPC; if (err) /* The partition is unrecognized. So report I/O errors if there were any */ res = err; @@ -538,12 +541,33 @@ exit: disk_part_iter_exit(&piter); } +static bool disk_unlock_native_capacity(struct gendisk *disk) +{ + const struct block_device_operations *bdops = disk->fops; + + if (bdops->unlock_native_capacity && + !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { + printk(KERN_CONT "enabling native capacity\n"); + bdops->unlock_native_capacity(disk); + disk->flags |= GENHD_FL_NATIVE_CAPACITY; + return true; + } else { + printk(KERN_CONT "truncated\n"); + return false; + } +} + int rescan_partitions(struct gendisk *disk, struct block_device *bdev) { + struct parsed_partitions *state = NULL; struct disk_part_iter piter; struct hd_struct *part; - struct parsed_partitions *state; int p, highest, res; +rescan: + if (state && !IS_ERR(state)) { + kfree(state); + state = NULL; + } if (bdev->bd_part_count) return -EBUSY; @@ -562,8 +586,32 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) bdev->bd_invalidated = 0; if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; - if (IS_ERR(state)) /* I/O error reading the partition table */ + if (IS_ERR(state)) { + /* + * I/O error reading the partition table. If any + * partition code tried to read beyond EOD, retry + * after unlocking native capacity. + */ + if (PTR_ERR(state) == -ENOSPC) { + printk(KERN_WARNING "%s: partition table beyond EOD, ", + disk->disk_name); + if (disk_unlock_native_capacity(disk)) + goto rescan; + } return -EIO; + } + /* + * If any partition code tried to read beyond EOD, try + * unlocking native capacity even if partition table is + * sucessfully read as we could be missing some partitions. + */ + if (state->access_beyond_eod) { + printk(KERN_WARNING + "%s: partition table partially beyond EOD, ", + disk->disk_name); + if (disk_unlock_native_capacity(disk)) + goto rescan; + } /* tell userspace that the media / partition table may have changed */ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); @@ -581,7 +629,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) /* add partitions */ for (p = 1; p < state->limit; p++) { sector_t size, from; -try_scan: + size = state->parts[p].size; if (!size) continue; @@ -589,30 +637,21 @@ try_scan: from = state->parts[p].from; if (from >= get_capacity(disk)) { printk(KERN_WARNING - "%s: p%d ignored, start %llu is behind the end of the disk\n", + "%s: p%d start %llu is beyond EOD, ", disk->disk_name, p, (unsigned long long) from); + if (disk_unlock_native_capacity(disk)) + goto rescan; continue; } if (from + size > get_capacity(disk)) { - const struct block_device_operations *bdops = disk->fops; - unsigned long long capacity; - printk(KERN_WARNING - "%s: p%d size %llu exceeds device capacity, ", + "%s: p%d size %llu extends beyond EOD, ", disk->disk_name, p, (unsigned long long) size); - if (bdops->set_capacity && - (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) { - printk(KERN_CONT "enabling native capacity\n"); - capacity = bdops->set_capacity(disk, ~0ULL); - disk->flags |= GENHD_FL_NATIVE_CAPACITY; - if (capacity > get_capacity(disk)) { - set_capacity(disk, capacity); - check_disk_size_change(disk, bdev); - bdev->bd_invalidated = 0; - } - goto try_scan; + if (disk_unlock_native_capacity(disk)) { + /* free state and restart */ + goto rescan; } else { /* * we can not ignore partitions of broken tables @@ -620,7 +659,6 @@ try_scan: * we limit them to the end of the disk to avoid * creating invalid block devices */ - printk(KERN_CONT "limited to end of disk\n"); size = get_capacity(disk) - from; } } diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 98dbe1a8452..52f8bd39939 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h @@ -6,6 +6,7 @@ * description. */ struct parsed_partitions { + struct block_device *bdev; char name[BDEVNAME_SIZE]; struct { sector_t from; @@ -14,8 +15,19 @@ struct parsed_partitions { } parts[DISK_MAX_PARTS]; int next; int limit; + bool access_beyond_eod; }; +static inline void *read_part_sector(struct parsed_partitions *state, + sector_t n, Sector *p) +{ + if (n >= get_capacity(state->bdev->bd_disk)) { + state->access_beyond_eod = true; + return NULL; + } + return read_dev_sector(state->bdev, n, p); +} + static inline void put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) { diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index f5867f8ed21..9efb2cfe241 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -140,8 +140,7 @@ efi_crc32(const void *buf, unsigned long len) * the part[0] entry for this disk, and is the number of * physical sectors available on the disk. */ -static u64 -last_lba(struct block_device *bdev) +static u64 last_lba(struct block_device *bdev) { if (!bdev || !bdev->bd_inode) return 0; @@ -181,27 +180,28 @@ is_pmbr_valid(legacy_mbr *mbr) /** * read_lba(): Read bytes from disk, starting at given LBA - * @bdev + * @state * @lba * @buffer * @size_t * - * Description: Reads @count bytes from @bdev into @buffer. + * Description: Reads @count bytes from @state->bdev into @buffer. * Returns number of bytes read on success, 0 on error. */ -static size_t -read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) +static size_t read_lba(struct parsed_partitions *state, + u64 lba, u8 *buffer, size_t count) { size_t totalreadcount = 0; + struct block_device *bdev = state->bdev; sector_t n = lba * (bdev_logical_block_size(bdev) / 512); - if (!bdev || !buffer || lba > last_lba(bdev)) + if (!buffer || lba > last_lba(bdev)) return 0; while (count) { int copied = 512; Sector sect; - unsigned char *data = read_dev_sector(bdev, n++, §); + unsigned char *data = read_part_sector(state, n++, §); if (!data) break; if (copied > count) @@ -217,19 +217,20 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) /** * alloc_read_gpt_entries(): reads partition entries from disk - * @bdev + * @state * @gpt - GPT header * * Description: Returns ptes on success, NULL on error. * Allocates space for PTEs based on information found in @gpt. * Notes: remember to free pte when you're done! */ -static gpt_entry * -alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) +static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, + gpt_header *gpt) { size_t count; gpt_entry *pte; - if (!bdev || !gpt) + + if (!gpt) return NULL; count = le32_to_cpu(gpt->num_partition_entries) * @@ -240,7 +241,7 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) if (!pte) return NULL; - if (read_lba(bdev, le64_to_cpu(gpt->partition_entry_lba), + if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba), (u8 *) pte, count) < count) { kfree(pte); @@ -252,27 +253,24 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) /** * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk - * @bdev + * @state * @lba is the Logical Block Address of the partition table * * Description: returns GPT header on success, NULL on error. Allocates - * and fills a GPT header starting at @ from @bdev. + * and fills a GPT header starting at @ from @state->bdev. * Note: remember to free gpt when finished with it. */ -static gpt_header * -alloc_read_gpt_header(struct block_device *bdev, u64 lba) +static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state, + u64 lba) { gpt_header *gpt; - unsigned ssz = bdev_logical_block_size(bdev); - - if (!bdev) - return NULL; + unsigned ssz = bdev_logical_block_size(state->bdev); gpt = kzalloc(ssz, GFP_KERNEL); if (!gpt) return NULL; - if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) { + if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) { kfree(gpt); gpt=NULL; return NULL; @@ -283,7 +281,7 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba) /** * is_gpt_valid() - tests one GPT header and PTEs for validity - * @bdev + * @state * @lba is the logical block address of the GPT header to test * @gpt is a GPT header ptr, filled on return. * @ptes is a PTEs ptr, filled on return. @@ -291,16 +289,15 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba) * Description: returns 1 if valid, 0 on error. * If valid, returns pointers to newly allocated GPT header and PTEs. */ -static int -is_gpt_valid(struct block_device *bdev, u64 lba, - gpt_header **gpt, gpt_entry **ptes) +static int is_gpt_valid(struct parsed_partitions *state, u64 lba, + gpt_header **gpt, gpt_entry **ptes) { u32 crc, origcrc; u64 lastlba; - if (!bdev || !gpt || !ptes) + if (!ptes) return 0; - if (!(*gpt = alloc_read_gpt_header(bdev, lba))) + if (!(*gpt = alloc_read_gpt_header(state, lba))) return 0; /* Check the GUID Partition Table signature */ @@ -336,7 +333,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba, /* Check the first_usable_lba and last_usable_lba are * within the disk. */ - lastlba = last_lba(bdev); + lastlba = last_lba(state->bdev); if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), @@ -350,7 +347,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba, goto fail; } - if (!(*ptes = alloc_read_gpt_entries(bdev, *gpt))) + if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) goto fail; /* Check the GUID Partition Entry Array CRC */ @@ -495,7 +492,7 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) /** * find_valid_gpt() - Search disk for valid GPT headers and PTEs - * @bdev + * @state * @gpt is a GPT header ptr, filled on return. * @ptes is a PTEs ptr, filled on return. * Description: Returns 1 if valid, 0 on error. @@ -508,24 +505,25 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) * This protects against devices which misreport their size, and forces * the user to decide to use the Alternate GPT. */ -static int -find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) +static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, + gpt_entry **ptes) { int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; gpt_header *pgpt = NULL, *agpt = NULL; gpt_entry *pptes = NULL, *aptes = NULL; legacy_mbr *legacymbr; u64 lastlba; - if (!bdev || !gpt || !ptes) + + if (!ptes) return 0; - lastlba = last_lba(bdev); + lastlba = last_lba(state->bdev); if (!force_gpt) { /* This will be added to the EFI Spec. per Intel after v1.02. */ legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL); if (legacymbr) { - read_lba(bdev, 0, (u8 *) legacymbr, - sizeof (*legacymbr)); + read_lba(state, 0, (u8 *) legacymbr, + sizeof (*legacymbr)); good_pmbr = is_pmbr_valid(legacymbr); kfree(legacymbr); } @@ -533,15 +531,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) goto fail; } - good_pgpt = is_gpt_valid(bdev, GPT_PRIMARY_PARTITION_TABLE_LBA, + good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA, &pgpt, &pptes); if (good_pgpt) - good_agpt = is_gpt_valid(bdev, + good_agpt = is_gpt_valid(state, le64_to_cpu(pgpt->alternate_lba), &agpt, &aptes); if (!good_agpt && force_gpt) - good_agpt = is_gpt_valid(bdev, lastlba, - &agpt, &aptes); + good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes); /* The obviously unsuccessful case */ if (!good_pgpt && !good_agpt) @@ -583,9 +580,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) } /** - * efi_partition(struct parsed_partitions *state, struct block_device *bdev) + * efi_partition(struct parsed_partitions *state) * @state - * @bdev * * Description: called from check.c, if the disk contains GPT * partitions, sets up partition entries in the kernel. @@ -602,15 +598,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) * 1 if successful * */ -int -efi_partition(struct parsed_partitions *state, struct block_device *bdev) +int efi_partition(struct parsed_partitions *state) { gpt_header *gpt = NULL; gpt_entry *ptes = NULL; u32 i; - unsigned ssz = bdev_logical_block_size(bdev) / 512; + unsigned ssz = bdev_logical_block_size(state->bdev) / 512; - if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) { + if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { kfree(gpt); kfree(ptes); return 0; @@ -623,7 +618,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) u64 size = le64_to_cpu(ptes[i].ending_lba) - le64_to_cpu(ptes[i].starting_lba) + 1ULL; - if (!is_pte_valid(&ptes[i], last_lba(bdev))) + if (!is_pte_valid(&ptes[i], last_lba(state->bdev))) continue; put_partition(state, i+1, start * ssz, size * ssz); diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h index 6998b589abf..b69ab729558 100644 --- a/fs/partitions/efi.h +++ b/fs/partitions/efi.h @@ -110,7 +110,7 @@ typedef struct _legacy_mbr { } __attribute__ ((packed)) legacy_mbr; /* Functions */ -extern int efi_partition(struct parsed_partitions *state, struct block_device *bdev); +extern int efi_partition(struct parsed_partitions *state); #endif diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index fc71aab0846..3e73de5967f 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -58,9 +58,9 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { /* */ -int -ibm_partition(struct parsed_partitions *state, struct block_device *bdev) +int ibm_partition(struct parsed_partitions *state) { + struct block_device *bdev = state->bdev; int blocksize, res; loff_t i_size, offset, size, fmt_size; dasd_information2_t *info; @@ -100,7 +100,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) /* * Get volume label, extract name and type. */ - data = read_dev_sector(bdev, info->label_block*(blocksize/512), §); + data = read_part_sector(state, info->label_block*(blocksize/512), + §); if (data == NULL) goto out_readerr; @@ -193,8 +194,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) */ blk = cchhb2blk(&label->vol.vtoc, geo) + 1; counter = 0; - data = read_dev_sector(bdev, blk * (blocksize/512), - §); + data = read_part_sector(state, blk * (blocksize/512), + §); while (data != NULL) { struct vtoc_format1_label f1; @@ -208,9 +209,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) || f1.DS1FMTID == _ascebc['7'] || f1.DS1FMTID == _ascebc['9']) { blk++; - data = read_dev_sector(bdev, blk * - (blocksize/512), - §); + data = read_part_sector(state, + blk * (blocksize/512), §); continue; } @@ -230,9 +230,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) size * (blocksize >> 9)); counter++; blk++; - data = read_dev_sector(bdev, - blk * (blocksize/512), - §); + data = read_part_sector(state, + blk * (blocksize/512), §); } if (!data) diff --git a/fs/partitions/ibm.h b/fs/partitions/ibm.h index 31f85a6ac45..08fb0804a81 100644 --- a/fs/partitions/ibm.h +++ b/fs/partitions/ibm.h @@ -1 +1 @@ -int ibm_partition(struct parsed_partitions *, struct block_device *); +int ibm_partition(struct parsed_partitions *); diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c index 176d89bcf12..1cc928bb762 100644 --- a/fs/partitions/karma.c +++ b/fs/partitions/karma.c @@ -9,7 +9,7 @@ #include "check.h" #include "karma.h" -int karma_partition(struct parsed_partitions *state, struct block_device *bdev) +int karma_partition(struct parsed_partitions *state) { int i; int slot = 1; @@ -29,7 +29,7 @@ int karma_partition(struct parsed_partitions *state, struct block_device *bdev) } __attribute__((packed)) *label; struct d_partition *p; - data = read_dev_sector(bdev, 0, §); + data = read_part_sector(state, 0, §); if (!data) return -1; diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h index ecf7d3f2a3d..c764b2e9df2 100644 --- a/fs/partitions/karma.h +++ b/fs/partitions/karma.h @@ -4,5 +4,5 @@ #define KARMA_LABEL_MAGIC 0xAB56 -int karma_partition(struct parsed_partitions *state, struct block_device *bdev); +int karma_partition(struct parsed_partitions *state); diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 8652fb99e96..3ceca05b668 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -309,7 +309,7 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1, /** * ldm_validate_privheads - Compare the primary privhead with its backups - * @bdev: Device holding the LDM Database + * @state: Partition check state including device holding the LDM Database * @ph1: Memory struct to fill with ph contents * * Read and compare all three privheads from disk. @@ -321,8 +321,8 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1, * Return: 'true' Success * 'false' Error */ -static bool ldm_validate_privheads (struct block_device *bdev, - struct privhead *ph1) +static bool ldm_validate_privheads(struct parsed_partitions *state, + struct privhead *ph1) { static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 }; struct privhead *ph[3] = { ph1 }; @@ -332,7 +332,7 @@ static bool ldm_validate_privheads (struct block_device *bdev, long num_sects; int i; - BUG_ON (!bdev || !ph1); + BUG_ON (!state || !ph1); ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL); ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL); @@ -346,8 +346,8 @@ static bool ldm_validate_privheads (struct block_device *bdev, /* Read and parse privheads */ for (i = 0; i < 3; i++) { - data = read_dev_sector (bdev, - ph[0]->config_start + off[i], §); + data = read_part_sector(state, ph[0]->config_start + off[i], + §); if (!data) { ldm_crit ("Disk read failed."); goto out; @@ -363,7 +363,7 @@ static bool ldm_validate_privheads (struct block_device *bdev, } } - num_sects = bdev->bd_inode->i_size >> 9; + num_sects = state->bdev->bd_inode->i_size >> 9; if ((ph[0]->config_start > num_sects) || ((ph[0]->config_start + ph[0]->config_size) > num_sects)) { @@ -397,20 +397,20 @@ out: /** * ldm_validate_tocblocks - Validate the table of contents and its backups - * @bdev: Device holding the LDM Database - * @base: Offset, into @bdev, of the database + * @state: Partition check state including device holding the LDM Database + * @base: Offset, into @state->bdev, of the database * @ldb: Cache of the database structures * * Find and compare the four tables of contents of the LDM Database stored on - * @bdev and return the parsed information into @toc1. + * @state->bdev and return the parsed information into @toc1. * * The offsets and sizes of the configs are range-checked against a privhead. * * Return: 'true' @toc1 contains validated TOCBLOCK info * 'false' @toc1 contents are undefined */ -static bool ldm_validate_tocblocks(struct block_device *bdev, - unsigned long base, struct ldmdb *ldb) +static bool ldm_validate_tocblocks(struct parsed_partitions *state, + unsigned long base, struct ldmdb *ldb) { static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; struct tocblock *tb[4]; @@ -420,7 +420,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev, int i, nr_tbs; bool result = false; - BUG_ON(!bdev || !ldb); + BUG_ON(!state || !ldb); ph = &ldb->ph; tb[0] = &ldb->toc; tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); @@ -437,7 +437,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev, * skip any that fail as long as we get at least one valid TOCBLOCK. */ for (nr_tbs = i = 0; i < 4; i++) { - data = read_dev_sector(bdev, base + off[i], §); + data = read_part_sector(state, base + off[i], §); if (!data) { ldm_error("Disk read failed for TOCBLOCK %d.", i); continue; @@ -473,7 +473,7 @@ err: /** * ldm_validate_vmdb - Read the VMDB and validate it - * @bdev: Device holding the LDM Database + * @state: Partition check state including device holding the LDM Database * @base: Offset, into @bdev, of the database * @ldb: Cache of the database structures * @@ -483,8 +483,8 @@ err: * Return: 'true' @ldb contains validated VBDB info * 'false' @ldb contents are undefined */ -static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, - struct ldmdb *ldb) +static bool ldm_validate_vmdb(struct parsed_partitions *state, + unsigned long base, struct ldmdb *ldb) { Sector sect; u8 *data; @@ -492,12 +492,12 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, struct vmdb *vm; struct tocblock *toc; - BUG_ON (!bdev || !ldb); + BUG_ON (!state || !ldb); vm = &ldb->vm; toc = &ldb->toc; - data = read_dev_sector (bdev, base + OFF_VMDB, §); + data = read_part_sector(state, base + OFF_VMDB, §); if (!data) { ldm_crit ("Disk read failed."); return false; @@ -534,21 +534,21 @@ out: /** * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk - * @bdev: Device holding the LDM Database + * @state: Partition check state including device holding the LDM Database * * This function provides a weak test to decide whether the device is a dynamic * disk or not. It looks for an MS-DOS-style partition table containing at * least one partition of type 0x42 (formerly SFS, now used by Windows for * dynamic disks). * - * N.B. The only possible error can come from the read_dev_sector and that is + * N.B. The only possible error can come from the read_part_sector and that is * only likely to happen if the underlying device is strange. If that IS * the case we should return zero to let someone else try. * - * Return: 'true' @bdev is a dynamic disk - * 'false' @bdev is not a dynamic disk, or an error occurred + * Return: 'true' @state->bdev is a dynamic disk + * 'false' @state->bdev is not a dynamic disk, or an error occurred */ -static bool ldm_validate_partition_table (struct block_device *bdev) +static bool ldm_validate_partition_table(struct parsed_partitions *state) { Sector sect; u8 *data; @@ -556,9 +556,9 @@ static bool ldm_validate_partition_table (struct block_device *bdev) int i; bool result = false; - BUG_ON (!bdev); + BUG_ON(!state); - data = read_dev_sector (bdev, 0, §); + data = read_part_sector(state, 0, §); if (!data) { ldm_crit ("Disk read failed."); return false; @@ -1391,8 +1391,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb) /** * ldm_get_vblks - Read the on-disk database of VBLKs into memory - * @bdev: Device holding the LDM Database - * @base: Offset, into @bdev, of the database + * @state: Partition check state including device holding the LDM Database + * @base: Offset, into @state->bdev, of the database * @ldb: Cache of the database structures * * To use the information from the VBLKs, they need to be read from the disk, @@ -1401,8 +1401,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb) * Return: 'true' All the VBLKs were read successfully * 'false' An error occurred */ -static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, - struct ldmdb *ldb) +static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base, + struct ldmdb *ldb) { int size, perbuf, skip, finish, s, v, recs; u8 *data = NULL; @@ -1410,7 +1410,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, bool result = false; LIST_HEAD (frags); - BUG_ON (!bdev || !ldb); + BUG_ON(!state || !ldb); size = ldb->vm.vblk_size; perbuf = 512 / size; @@ -1418,7 +1418,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, finish = (size * ldb->vm.last_vblk_seq) >> 9; for (s = skip; s < finish; s++) { /* For each sector */ - data = read_dev_sector (bdev, base + OFF_VMDB + s, §); + data = read_part_sector(state, base + OFF_VMDB + s, §); if (!data) { ldm_crit ("Disk read failed."); goto out; @@ -1474,8 +1474,7 @@ static void ldm_free_vblks (struct list_head *lh) /** * ldm_partition - Find out whether a device is a dynamic disk and handle it - * @pp: List of the partitions parsed so far - * @bdev: Device holding the LDM Database + * @state: Partition check state including device holding the LDM Database * * This determines whether the device @bdev is a dynamic disk and if so creates * the partitions necessary in the gendisk structure pointed to by @hd. @@ -1485,21 +1484,21 @@ static void ldm_free_vblks (struct list_head *lh) * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3, * and so on: the actual data containing partitions. * - * Return: 1 Success, @bdev is a dynamic disk and we handled it - * 0 Success, @bdev is not a dynamic disk + * Return: 1 Success, @state->bdev is a dynamic disk and we handled it + * 0 Success, @state->bdev is not a dynamic disk * -1 An error occurred before enough information had been read - * Or @bdev is a dynamic disk, but it may be corrupted + * Or @state->bdev is a dynamic disk, but it may be corrupted */ -int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) +int ldm_partition(struct parsed_partitions *state) { struct ldmdb *ldb; unsigned long base; int result = -1; - BUG_ON (!pp || !bdev); + BUG_ON(!state); /* Look for signs of a Dynamic Disk */ - if (!ldm_validate_partition_table (bdev)) + if (!ldm_validate_partition_table(state)) return 0; ldb = kmalloc (sizeof (*ldb), GFP_KERNEL); @@ -1509,15 +1508,15 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) } /* Parse and check privheads. */ - if (!ldm_validate_privheads (bdev, &ldb->ph)) + if (!ldm_validate_privheads(state, &ldb->ph)) goto out; /* Already logged */ /* All further references are relative to base (database start). */ base = ldb->ph.config_start; /* Parse and check tocs and vmdb. */ - if (!ldm_validate_tocblocks (bdev, base, ldb) || - !ldm_validate_vmdb (bdev, base, ldb)) + if (!ldm_validate_tocblocks(state, base, ldb) || + !ldm_validate_vmdb(state, base, ldb)) goto out; /* Already logged */ /* Initialize vblk lists in ldmdb struct */ @@ -1527,13 +1526,13 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) INIT_LIST_HEAD (&ldb->v_comp); INIT_LIST_HEAD (&ldb->v_part); - if (!ldm_get_vblks (bdev, base, ldb)) { + if (!ldm_get_vblks(state, base, ldb)) { ldm_crit ("Failed to read the VBLKs from the database."); goto cleanup; } /* Finally, create the data partition devices. */ - if (ldm_create_data_partitions (pp, ldb)) { + if (ldm_create_data_partitions(state, ldb)) { ldm_debug ("Parsed LDM database successfully."); result = 1; } diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index 30e08e809c1..d1fb50b28d8 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h @@ -209,7 +209,7 @@ struct ldmdb { /* Cache of the database */ struct list_head v_part; }; -int ldm_partition (struct parsed_partitions *state, struct block_device *bdev); +int ldm_partition(struct parsed_partitions *state); #endif /* _FS_PT_LDM_H_ */ diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index dece9b4c881..74465ff7c26 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c @@ -27,7 +27,7 @@ static inline void mac_fix_string(char *stg, int len) stg[i] = 0; } -int mac_partition(struct parsed_partitions *state, struct block_device *bdev) +int mac_partition(struct parsed_partitions *state) { int slot = 1; Sector sect; @@ -42,7 +42,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) struct mac_driver_desc *md; /* Get 0th block and look at the first partition map entry. */ - md = (struct mac_driver_desc *) read_dev_sector(bdev, 0, §); + md = read_part_sector(state, 0, §); if (!md) return -1; if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) { @@ -51,7 +51,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) } secsize = be16_to_cpu(md->block_size); put_dev_sector(sect); - data = read_dev_sector(bdev, secsize/512, §); + data = read_part_sector(state, secsize/512, §); if (!data) return -1; part = (struct mac_partition *) (data + secsize%512); @@ -64,7 +64,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) for (blk = 1; blk <= blocks_in_map; ++blk) { int pos = blk * secsize; put_dev_sector(sect); - data = read_dev_sector(bdev, pos/512, §); + data = read_part_sector(state, pos/512, §); if (!data) return -1; part = (struct mac_partition *) (data + pos%512); @@ -123,7 +123,8 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) } #ifdef CONFIG_PPC_PMAC if (found_root_goodness) - note_bootable_part(bdev->bd_dev, found_root, found_root_goodness); + note_bootable_part(state->bdev->bd_dev, found_root, + found_root_goodness); #endif put_dev_sector(sect); diff --git a/fs/partitions/mac.h b/fs/partitions/mac.h index bbf26e1386f..3c7d9843638 100644 --- a/fs/partitions/mac.h +++ b/fs/partitions/mac.h @@ -41,4 +41,4 @@ struct mac_driver_desc { /* ... more stuff */ }; -int mac_partition(struct parsed_partitions *state, struct block_device *bdev); +int mac_partition(struct parsed_partitions *state); diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index efe3df75f84..15bfb7b1e04 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -64,7 +64,7 @@ msdos_magic_present(unsigned char *p) #define AIX_LABEL_MAGIC2 0xC2 #define AIX_LABEL_MAGIC3 0xD4 #define AIX_LABEL_MAGIC4 0xC1 -static int aix_magic_present(unsigned char *p, struct block_device *bdev) +static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) { struct partition *pt = (struct partition *) (p + 0x1be); Sector sect; @@ -85,7 +85,7 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev) is_extended_partition(pt)) return 0; } - d = read_dev_sector(bdev, 7, §); + d = read_part_sector(state, 7, §); if (d) { if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') ret = 1; @@ -105,15 +105,14 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev) * only for the actual data partitions. */ -static void -parse_extended(struct parsed_partitions *state, struct block_device *bdev, - sector_t first_sector, sector_t first_size) +static void parse_extended(struct parsed_partitions *state, + sector_t first_sector, sector_t first_size) { struct partition *p; Sector sect; unsigned char *data; sector_t this_sector, this_size; - sector_t sector_size = bdev_logical_block_size(bdev) / 512; + sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; int loopct = 0; /* number of links followed without finding a data partition */ int i; @@ -126,7 +125,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev, return; if (state->next == state->limit) return; - data = read_dev_sector(bdev, this_sector, §); + data = read_part_sector(state, this_sector, §); if (!data) return; @@ -198,9 +197,8 @@ done: /* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also indicates linux swap. Be careful before believing this is Solaris. */ -static void -parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, - sector_t offset, sector_t size, int origin) +static void parse_solaris_x86(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_SOLARIS_X86_PARTITION Sector sect; @@ -208,7 +206,7 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, int i; short max_nparts; - v = (struct solaris_x86_vtoc *)read_dev_sector(bdev, offset+1, §); + v = read_part_sector(state, offset + 1, §); if (!v) return; if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { @@ -245,16 +243,15 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, * Create devices for BSD partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ -static void -parse_bsd(struct parsed_partitions *state, struct block_device *bdev, - sector_t offset, sector_t size, int origin, char *flavour, - int max_partitions) +static void parse_bsd(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin, char *flavour, + int max_partitions) { Sector sect; struct bsd_disklabel *l; struct bsd_partition *p; - l = (struct bsd_disklabel *)read_dev_sector(bdev, offset+1, §); + l = read_part_sector(state, offset + 1, §); if (!l) return; if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { @@ -291,33 +288,28 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev, } #endif -static void -parse_freebsd(struct parsed_partitions *state, struct block_device *bdev, - sector_t offset, sector_t size, int origin) +static void parse_freebsd(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL - parse_bsd(state, bdev, offset, size, origin, - "bsd", BSD_MAXPARTITIONS); + parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS); #endif } -static void -parse_netbsd(struct parsed_partitions *state, struct block_device *bdev, - sector_t offset, sector_t size, int origin) +static void parse_netbsd(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL - parse_bsd(state, bdev, offset, size, origin, - "netbsd", BSD_MAXPARTITIONS); + parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS); #endif } -static void -parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, - sector_t offset, sector_t size, int origin) +static void parse_openbsd(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL - parse_bsd(state, bdev, offset, size, origin, - "openbsd", OPENBSD_MAXPARTITIONS); + parse_bsd(state, offset, size, origin, "openbsd", + OPENBSD_MAXPARTITIONS); #endif } @@ -325,16 +317,15 @@ parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, * Create devices for Unixware partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ -static void -parse_unixware(struct parsed_partitions *state, struct block_device *bdev, - sector_t offset, sector_t size, int origin) +static void parse_unixware(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_UNIXWARE_DISKLABEL Sector sect; struct unixware_disklabel *l; struct unixware_slice *p; - l = (struct unixware_disklabel *)read_dev_sector(bdev, offset+29, §); + l = read_part_sector(state, offset + 29, §); if (!l) return; if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || @@ -365,9 +356,8 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev, * Anand Krishnamurthy <anandk@wiproge.med.ge.com> * Rajeev V. Pillai <rajeevvp@yahoo.com> */ -static void -parse_minix(struct parsed_partitions *state, struct block_device *bdev, - sector_t offset, sector_t size, int origin) +static void parse_minix(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_MINIX_SUBPARTITION Sector sect; @@ -375,7 +365,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev, struct partition *p; int i; - data = read_dev_sector(bdev, offset, §); + data = read_part_sector(state, offset, §); if (!data) return; @@ -404,8 +394,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev, static struct { unsigned char id; - void (*parse)(struct parsed_partitions *, struct block_device *, - sector_t, sector_t, int); + void (*parse)(struct parsed_partitions *, sector_t, sector_t, int); } subtypes[] = { {FREEBSD_PARTITION, parse_freebsd}, {NETBSD_PARTITION, parse_netbsd}, @@ -417,16 +406,16 @@ static struct { {0, NULL}, }; -int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) +int msdos_partition(struct parsed_partitions *state) { - sector_t sector_size = bdev_logical_block_size(bdev) / 512; + sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; Sector sect; unsigned char *data; struct partition *p; struct fat_boot_sector *fb; int slot; - data = read_dev_sector(bdev, 0, §); + data = read_part_sector(state, 0, §); if (!data) return -1; if (!msdos_magic_present(data + 510)) { @@ -434,7 +423,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) return 0; } - if (aix_magic_present(data, bdev)) { + if (aix_magic_present(state, data)) { put_dev_sector(sect); printk( " [AIX]"); return 0; @@ -503,7 +492,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) put_partition(state, slot, start, n); printk(" <"); - parse_extended(state, bdev, start, size); + parse_extended(state, start, size); printk(" >"); continue; } @@ -532,8 +521,8 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) if (!subtypes[n].parse) continue; - subtypes[n].parse(state, bdev, start_sect(p)*sector_size, - nr_sects(p)*sector_size, slot); + subtypes[n].parse(state, start_sect(p) * sector_size, + nr_sects(p) * sector_size, slot); } put_dev_sector(sect); return 1; diff --git a/fs/partitions/msdos.h b/fs/partitions/msdos.h index 01e5e0b6902..38c781c490b 100644 --- a/fs/partitions/msdos.h +++ b/fs/partitions/msdos.h @@ -4,5 +4,5 @@ #define MSDOS_LABEL_MAGIC 0xAA55 -int msdos_partition(struct parsed_partitions *state, struct block_device *bdev); +int msdos_partition(struct parsed_partitions *state); diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index c05c17bc5df..fc22b85d436 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c @@ -10,7 +10,7 @@ #include "check.h" #include "osf.h" -int osf_partition(struct parsed_partitions *state, struct block_device *bdev) +int osf_partition(struct parsed_partitions *state) { int i; int slot = 1; @@ -49,7 +49,7 @@ int osf_partition(struct parsed_partitions *state, struct block_device *bdev) } * label; struct d_partition * partition; - data = read_dev_sector(bdev, 0, §); + data = read_part_sector(state, 0, §); if (!data) return -1; diff --git a/fs/partitions/osf.h b/fs/partitions/osf.h index 427b8eab314..20ed2315ec1 100644 --- a/fs/partitions/osf.h +++ b/fs/partitions/osf.h @@ -4,4 +4,4 @@ #define DISKLABELMAGIC (0x82564557UL) -int osf_partition(struct parsed_partitions *state, struct block_device *bdev); +int osf_partition(struct parsed_partitions *state); diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c index ed5ac83fe83..43b1df9aa16 100644 --- a/fs/partitions/sgi.c +++ b/fs/partitions/sgi.c @@ -27,7 +27,7 @@ struct sgi_disklabel { __be32 _unused1; /* Padding */ }; -int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) +int sgi_partition(struct parsed_partitions *state) { int i, csum; __be32 magic; @@ -39,7 +39,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) struct sgi_partition *p; char b[BDEVNAME_SIZE]; - label = (struct sgi_disklabel *) read_dev_sector(bdev, 0, §); + label = read_part_sector(state, 0, §); if (!label) return -1; p = &label->partitions[0]; @@ -57,7 +57,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) } if(csum) { printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n", - bdevname(bdev, b)); + bdevname(state->bdev, b)); put_dev_sector(sect); return 0; } diff --git a/fs/partitions/sgi.h b/fs/partitions/sgi.h index 5d5595c0992..b9553ebdd5a 100644 --- a/fs/partitions/sgi.h +++ b/fs/partitions/sgi.h @@ -2,7 +2,7 @@ * fs/partitions/sgi.h */ -extern int sgi_partition(struct parsed_partitions *state, struct block_device *bdev); +extern int sgi_partition(struct parsed_partitions *state); #define SGI_LABEL_MAGIC 0x0be5a941 diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index c95e6a62c01..a32660e25f7 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c @@ -10,7 +10,7 @@ #include "check.h" #include "sun.h" -int sun_partition(struct parsed_partitions *state, struct block_device *bdev) +int sun_partition(struct parsed_partitions *state) { int i; __be16 csum; @@ -61,7 +61,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) int use_vtoc; int nparts; - label = (struct sun_disklabel *)read_dev_sector(bdev, 0, §); + label = read_part_sector(state, 0, §); if (!label) return -1; @@ -78,7 +78,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) csum ^= *ush--; if (csum) { printk("Dev %s Sun disklabel: Csum bad, label corrupted\n", - bdevname(bdev, b)); + bdevname(state->bdev, b)); put_dev_sector(sect); return 0; } diff --git a/fs/partitions/sun.h b/fs/partitions/sun.h index 7f864d1f86d..2424baa8319 100644 --- a/fs/partitions/sun.h +++ b/fs/partitions/sun.h @@ -5,4 +5,4 @@ #define SUN_LABEL_MAGIC 0xDABE #define SUN_VTOC_SANITY 0x600DDEEE -int sun_partition(struct parsed_partitions *state, struct block_device *bdev); +int sun_partition(struct parsed_partitions *state); diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c index 4eba27b7864..9030c864428 100644 --- a/fs/partitions/sysv68.c +++ b/fs/partitions/sysv68.c @@ -46,7 +46,7 @@ struct slice { }; -int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) +int sysv68_partition(struct parsed_partitions *state) { int i, slices; int slot = 1; @@ -55,7 +55,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) struct dkblk0 *b; struct slice *slice; - data = read_dev_sector(bdev, 0, §); + data = read_part_sector(state, 0, §); if (!data) return -1; @@ -68,7 +68,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) i = be32_to_cpu(b->dk_ios.ios_slcblk); put_dev_sector(sect); - data = read_dev_sector(bdev, i, §); + data = read_part_sector(state, i, §); if (!data) return -1; diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h index fa733f68431..bf2f5ffa97a 100644 --- a/fs/partitions/sysv68.h +++ b/fs/partitions/sysv68.h @@ -1 +1 @@ -extern int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev); +extern int sysv68_partition(struct parsed_partitions *state); diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c index ec852c11dce..db9eef26036 100644 --- a/fs/partitions/ultrix.c +++ b/fs/partitions/ultrix.c @@ -9,7 +9,7 @@ #include "check.h" #include "ultrix.h" -int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev) +int ultrix_partition(struct parsed_partitions *state) { int i; Sector sect; @@ -26,7 +26,7 @@ int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev) #define PT_MAGIC 0x032957 /* Partition magic number */ #define PT_VALID 1 /* Indicates if struct is valid */ - data = read_dev_sector(bdev, (16384 - sizeof(*label))/512, §); + data = read_part_sector(state, (16384 - sizeof(*label))/512, §); if (!data) return -1; diff --git a/fs/partitions/ultrix.h b/fs/partitions/ultrix.h index a74bf8e2d37..a3cc00b2bde 100644 --- a/fs/partitions/ultrix.h +++ b/fs/partitions/ultrix.h @@ -2,4 +2,4 @@ * fs/partitions/ultrix.h */ -int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev); +int ultrix_partition(struct parsed_partitions *state); diff --git a/fs/pipe.c b/fs/pipe.c index 37ba29ff315..d79872eba09 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/log2.h> #include <linux/mount.h> #include <linux/pipe_fs_i.h> #include <linux/uio.h> @@ -18,11 +19,18 @@ #include <linux/pagemap.h> #include <linux/audit.h> #include <linux/syscalls.h> +#include <linux/fcntl.h> #include <asm/uaccess.h> #include <asm/ioctls.h> /* + * The max size that a non-root user is allowed to grow the pipe. Can + * be set by root in /proc/sys/fs/pipe-max-pages + */ +unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; + +/* * We use a start+len construction, which provides full use of the * allocated memory. * -- Florian Coosmann (FGC) @@ -390,7 +398,7 @@ redo: if (!buf->len) { buf->ops = NULL; ops->release(pipe, buf); - curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); + curbuf = (curbuf + 1) & (pipe->buffers - 1); pipe->curbuf = curbuf; pipe->nrbufs = --bufs; do_wakeup = 1; @@ -472,7 +480,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ if (pipe->nrbufs && chars != 0) { int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & - (PIPE_BUFFERS-1); + (pipe->buffers - 1); struct pipe_buffer *buf = pipe->bufs + lastbuf; const struct pipe_buf_operations *ops = buf->ops; int offset = buf->offset + buf->len; @@ -518,8 +526,8 @@ redo1: break; } bufs = pipe->nrbufs; - if (bufs < PIPE_BUFFERS) { - int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); + if (bufs < pipe->buffers) { + int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); struct pipe_buffer *buf = pipe->bufs + newbuf; struct page *page = pipe->tmp_page; char *src; @@ -580,7 +588,7 @@ redo2: if (!total_len) break; } - if (bufs < PIPE_BUFFERS) + if (bufs < pipe->buffers) continue; if (filp->f_flags & O_NONBLOCK) { if (!ret) @@ -640,7 +648,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) nrbufs = pipe->nrbufs; while (--nrbufs >= 0) { count += pipe->bufs[buf].len; - buf = (buf+1) & (PIPE_BUFFERS-1); + buf = (buf+1) & (pipe->buffers - 1); } mutex_unlock(&inode->i_mutex); @@ -671,7 +679,7 @@ pipe_poll(struct file *filp, poll_table *wait) } if (filp->f_mode & FMODE_WRITE) { - mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; + mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0; /* * Most Unices do not set POLLERR for FIFOs but on Linux they * behave exactly like pipes for poll(). @@ -877,25 +885,32 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode) pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); if (pipe) { - init_waitqueue_head(&pipe->wait); - pipe->r_counter = pipe->w_counter = 1; - pipe->inode = inode; + pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL); + if (pipe->bufs) { + init_waitqueue_head(&pipe->wait); + pipe->r_counter = pipe->w_counter = 1; + pipe->inode = inode; + pipe->buffers = PIPE_DEF_BUFFERS; + return pipe; + } + kfree(pipe); } - return pipe; + return NULL; } void __free_pipe_info(struct pipe_inode_info *pipe) { int i; - for (i = 0; i < PIPE_BUFFERS; i++) { + for (i = 0; i < pipe->buffers; i++) { struct pipe_buffer *buf = pipe->bufs + i; if (buf->ops) buf->ops->release(pipe, buf); } if (pipe->tmp_page) __free_page(pipe->tmp_page); + kfree(pipe->bufs); kfree(pipe); } @@ -1094,6 +1109,89 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) } /* + * Allocate a new array of pipe buffers and copy the info over. Returns the + * pipe size if successful, or return -ERROR on error. + */ +static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) +{ + struct pipe_buffer *bufs; + + /* + * Must be a power-of-2 currently + */ + if (!is_power_of_2(arg)) + return -EINVAL; + + /* + * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't + * expect a lot of shrink+grow operations, just free and allocate + * again like we would do for growing. If the pipe currently + * contains more buffers than arg, then return busy. + */ + if (arg < pipe->nrbufs) + return -EBUSY; + + bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); + if (unlikely(!bufs)) + return -ENOMEM; + + /* + * The pipe array wraps around, so just start the new one at zero + * and adjust the indexes. + */ + if (pipe->nrbufs) { + const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); + const unsigned int head = pipe->nrbufs - tail; + + if (head) + memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); + if (tail) + memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); + } + + pipe->curbuf = 0; + kfree(pipe->bufs); + pipe->bufs = bufs; + pipe->buffers = arg; + return arg; +} + +long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pipe_inode_info *pipe; + long ret; + + pipe = file->f_path.dentry->d_inode->i_pipe; + if (!pipe) + return -EBADF; + + mutex_lock(&pipe->inode->i_mutex); + + switch (cmd) { + case F_SETPIPE_SZ: + if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) + return -EINVAL; + /* + * The pipe needs to be at least 2 pages large to + * guarantee POSIX behaviour. + */ + if (arg < 2) + return -EINVAL; + ret = pipe_set_size(pipe, arg); + break; + case F_GETPIPE_SZ: + ret = pipe->buffers; + break; + default: + ret = -EINVAL; + break; + } + + mutex_unlock(&pipe->inode->i_mutex); + return ret; +} + +/* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need * any operations on the root directory. However, we need a non-trivial diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 1d9c12714c5..9977df9f3a5 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp, barrier_done = reiserfs_commit_for_inode(inode); reiserfs_write_unlock(inode->i_sb); if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) - blkdev_issue_flush(inode->i_sb->s_bdev, NULL); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); if (barrier_done < 0) return barrier_done; return (err < 0) ? -EIO : 0; diff --git a/fs/splice.c b/fs/splice.c index 9313b6124a2..ac22b00d86c 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -193,8 +193,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, break; } - if (pipe->nrbufs < PIPE_BUFFERS) { - int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); + if (pipe->nrbufs < pipe->buffers) { + int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); struct pipe_buffer *buf = pipe->bufs + newbuf; buf->page = spd->pages[page_nr]; @@ -214,7 +214,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, if (!--spd->nr_pages) break; - if (pipe->nrbufs < PIPE_BUFFERS) + if (pipe->nrbufs < pipe->buffers) continue; break; @@ -265,6 +265,36 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) page_cache_release(spd->pages[i]); } +/* + * Check if we need to grow the arrays holding pages and partial page + * descriptions. + */ +int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) +{ + if (pipe->buffers <= PIPE_DEF_BUFFERS) + return 0; + + spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); + spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); + + if (spd->pages && spd->partial) + return 0; + + kfree(spd->pages); + kfree(spd->partial); + return -ENOMEM; +} + +void splice_shrink_spd(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) +{ + if (pipe->buffers <= PIPE_DEF_BUFFERS) + return; + + kfree(spd->pages); + kfree(spd->partial); +} + static int __generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, @@ -272,8 +302,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, { struct address_space *mapping = in->f_mapping; unsigned int loff, nr_pages, req_pages; - struct page *pages[PIPE_BUFFERS]; - struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; struct page *page; pgoff_t index, end_index; loff_t isize; @@ -286,15 +316,18 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, .spd_release = spd_release_page, }; + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + index = *ppos >> PAGE_CACHE_SHIFT; loff = *ppos & ~PAGE_CACHE_MASK; req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); + nr_pages = min(req_pages, pipe->buffers); /* * Lookup the (hopefully) full range of pages we need. */ - spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); + spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); index += spd.nr_pages; /* @@ -335,7 +368,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, unlock_page(page); } - pages[spd.nr_pages++] = page; + spd.pages[spd.nr_pages++] = page; index++; } @@ -356,7 +389,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * this_len is the max we'll use from this page */ this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); - page = pages[page_nr]; + page = spd.pages[page_nr]; if (PageReadahead(page)) page_cache_async_readahead(mapping, &in->f_ra, in, @@ -393,8 +426,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, error = -ENOMEM; break; } - page_cache_release(pages[page_nr]); - pages[page_nr] = page; + page_cache_release(spd.pages[page_nr]); + spd.pages[page_nr] = page; } /* * page was already under io and is now done, great @@ -451,8 +484,8 @@ fill_it: len = this_len; } - partial[page_nr].offset = loff; - partial[page_nr].len = this_len; + spd.partial[page_nr].offset = loff; + spd.partial[page_nr].len = this_len; len -= this_len; loff = 0; spd.nr_pages++; @@ -464,12 +497,13 @@ fill_it: * we got, 'nr_pages' is how many pages are in the map. */ while (page_nr < nr_pages) - page_cache_release(pages[page_nr++]); + page_cache_release(spd.pages[page_nr++]); in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; if (spd.nr_pages) - return splice_to_pipe(pipe, &spd); + error = splice_to_pipe(pipe, &spd); + splice_shrink_spd(pipe, &spd); return error; } @@ -560,9 +594,9 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, unsigned int nr_pages; unsigned int nr_freed; size_t offset; - struct page *pages[PIPE_BUFFERS]; - struct partial_page partial[PIPE_BUFFERS]; - struct iovec vec[PIPE_BUFFERS]; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; + struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; pgoff_t index; ssize_t res; size_t this_len; @@ -576,11 +610,22 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, .spd_release = spd_release_page, }; + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + + res = -ENOMEM; + vec = __vec; + if (pipe->buffers > PIPE_DEF_BUFFERS) { + vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); + if (!vec) + goto shrink_ret; + } + index = *ppos >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { + for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { struct page *page; page = alloc_page(GFP_USER); @@ -591,7 +636,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); vec[i].iov_base = (void __user *) page_address(page); vec[i].iov_len = this_len; - pages[i] = page; + spd.pages[i] = page; spd.nr_pages++; len -= this_len; offset = 0; @@ -610,11 +655,11 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, nr_freed = 0; for (i = 0; i < spd.nr_pages; i++) { this_len = min_t(size_t, vec[i].iov_len, res); - partial[i].offset = 0; - partial[i].len = this_len; + spd.partial[i].offset = 0; + spd.partial[i].len = this_len; if (!this_len) { - __free_page(pages[i]); - pages[i] = NULL; + __free_page(spd.pages[i]); + spd.pages[i] = NULL; nr_freed++; } res -= this_len; @@ -625,13 +670,18 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, if (res > 0) *ppos += res; +shrink_ret: + if (vec != __vec) + kfree(vec); + splice_shrink_spd(pipe, &spd); return res; err: for (i = 0; i < spd.nr_pages; i++) - __free_page(pages[i]); + __free_page(spd.pages[i]); - return error; + res = error; + goto shrink_ret; } EXPORT_SYMBOL(default_file_splice_read); @@ -784,7 +834,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, if (!buf->len) { buf->ops = NULL; ops->release(pipe, buf); - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); pipe->nrbufs--; if (pipe->inode) sd->need_wakeup = true; @@ -1211,7 +1261,7 @@ out_release: * If we did an incomplete transfer we must release * the pipe buffers in question: */ - for (i = 0; i < PIPE_BUFFERS; i++) { + for (i = 0; i < pipe->buffers; i++) { struct pipe_buffer *buf = pipe->bufs + i; if (buf->ops) { @@ -1371,7 +1421,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, */ static int get_iovec_page_array(const struct iovec __user *iov, unsigned int nr_vecs, struct page **pages, - struct partial_page *partial, int aligned) + struct partial_page *partial, int aligned, + unsigned int pipe_buffers) { int buffers = 0, error = 0; @@ -1414,8 +1465,8 @@ static int get_iovec_page_array(const struct iovec __user *iov, break; npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (npages > PIPE_BUFFERS - buffers) - npages = PIPE_BUFFERS - buffers; + if (npages > pipe_buffers - buffers) + npages = pipe_buffers - buffers; error = get_user_pages_fast((unsigned long)base, npages, 0, &pages[buffers]); @@ -1450,7 +1501,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, * or if we mapped the max number of pages that we have * room for. */ - if (error < npages || buffers == PIPE_BUFFERS) + if (error < npages || buffers == pipe_buffers) break; nr_vecs--; @@ -1593,8 +1644,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) { struct pipe_inode_info *pipe; - struct page *pages[PIPE_BUFFERS]; - struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, @@ -1602,17 +1653,25 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, .ops = &user_page_pipe_buf_ops, .spd_release = spd_release_page, }; + long ret; pipe = pipe_info(file->f_path.dentry->d_inode); if (!pipe) return -EBADF; - spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, - flags & SPLICE_F_GIFT); + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + + spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, + spd.partial, flags & SPLICE_F_GIFT, + pipe->buffers); if (spd.nr_pages <= 0) - return spd.nr_pages; + ret = spd.nr_pages; + else + ret = splice_to_pipe(pipe, &spd); - return splice_to_pipe(pipe, &spd); + splice_shrink_spd(pipe, &spd); + return ret; } /* @@ -1738,13 +1797,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check ->nrbufs without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (pipe->nrbufs < PIPE_BUFFERS) + if (pipe->nrbufs < pipe->buffers) return 0; ret = 0; pipe_lock(pipe); - while (pipe->nrbufs >= PIPE_BUFFERS) { + while (pipe->nrbufs >= pipe->buffers) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; @@ -1810,7 +1869,7 @@ retry: * Cannot make any progress, because either the input * pipe is empty or the output pipe is full. */ - if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { + if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { /* Already processed some buffers, break */ if (ret) break; @@ -1831,7 +1890,7 @@ retry: } ibuf = ipipe->bufs + ipipe->curbuf; - nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; + nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); obuf = opipe->bufs + nbuf; if (len >= ibuf->len) { @@ -1841,7 +1900,7 @@ retry: *obuf = *ibuf; ibuf->ops = NULL; opipe->nrbufs++; - ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; + ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); ipipe->nrbufs--; input_wakeup = true; } else { @@ -1914,11 +1973,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, * If we have iterated all input buffers or ran out of * output room, break. */ - if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) + if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) break; - ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); - nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); + ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); + nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); /* * Get a reference to this pipe buffer, diff --git a/fs/sync.c b/fs/sync.c index 5a537ccd2e8..e8cbd415e50 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) if (wait) sync_inodes_sb(sb); else - writeback_inodes_sb(sb); + writeback_inodes_sb_locked(sb); if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e9002513e08..f24dbe5efde 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -725,7 +725,8 @@ void xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) { - blkdev_issue_flush(buftarg->bt_bdev, NULL); + blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); } STATIC void |