From bfcd17a6c5529bc37234cfa720a047cf9397bcfc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 6 Aug 2008 15:12:22 +0200 Subject: Configure out file locking features This patch adds the CONFIG_FILE_LOCKING option which allows to remove support for advisory locks. With this patch enabled, the flock() system call, the F_GETLK, F_SETLK and F_SETLKW operations of fcntl() and NFS support are disabled. These features are not necessarly needed on embedded systems. It allows to save ~11 Kb of kernel code and data: text data bss dec hex filename 1125436 118764 212992 1457192 163c28 vmlinux.old 1114299 118564 212992 1445855 160fdf vmlinux -11137 -200 0 -11337 -2C49 +/- This patch has originally been written by Matt Mackall , and is part of the Linux Tiny project. Signed-off-by: Thomas Petazzoni Signed-off-by: Matt Mackall Cc: matthew@wil.cx Cc: linux-fsdevel@vger.kernel.org Cc: mpm@selenic.com Cc: akpm@linux-foundation.org Signed-off-by: J. Bruce Fields --- include/linux/fs.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 7 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668f..9f540165a07 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -983,6 +983,13 @@ struct file_lock { #include +extern void send_sigio(struct fown_struct *fown, int fd, int band); + +/* fs/sync.c */ +extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, + loff_t endbyte, unsigned int flags); + +#ifdef CONFIG_FILE_LOCKING extern int fcntl_getlk(struct file *, struct flock __user *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, struct flock __user *); @@ -993,14 +1000,9 @@ extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, struct flock64 __user *); #endif -extern void send_sigio(struct fown_struct *fown, int fd, int band); extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); -/* fs/sync.c */ -extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, - loff_t endbyte, unsigned int flags); - /* fs/locks.c */ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); @@ -1023,6 +1025,37 @@ extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); extern struct seq_operations locks_seq_operations; +#else /* !CONFIG_FILE_LOCKING */ +#define fcntl_getlk(a, b) ({ -EINVAL; }) +#define fcntl_setlk(a, b, c, d) ({ -EACCES; }) +#if BITS_PER_LONG == 32 +#define fcntl_getlk64(a, b) ({ -EINVAL; }) +#define fcntl_setlk64(a, b, c, d) ({ -EACCES; }) +#endif +#define fcntl_setlease(a, b, c) ({ 0; }) +#define fcntl_getlease(a) ({ 0; }) +#define locks_init_lock(a) ({ }) +#define __locks_copy_lock(a, b) ({ }) +#define locks_copy_lock(a, b) ({ }) +#define locks_remove_posix(a, b) ({ }) +#define locks_remove_flock(a) ({ }) +#define posix_test_lock(a, b) ({ 0; }) +#define posix_lock_file(a, b, c) ({ -ENOLCK; }) +#define posix_lock_file_wait(a, b) ({ -ENOLCK; }) +#define posix_unblock_lock(a, b) (-ENOENT) +#define vfs_test_lock(a, b) ({ 0; }) +#define vfs_lock_file(a, b, c, d) (-ENOLCK) +#define vfs_cancel_lock(a, b) ({ 0; }) +#define flock_lock_file_wait(a, b) ({ -ENOLCK; }) +#define __break_lease(a, b) ({ 0; }) +#define lease_get_mtime(a, b) ({ }) +#define generic_setlease(a, b, c) ({ -EINVAL; }) +#define vfs_setlease(a, b, c) ({ -EINVAL; }) +#define lease_modify(a, b) ({ -EINVAL; }) +#define lock_may_read(a, b, c) ({ 1; }) +#define lock_may_write(a, b, c) ({ 1; }) +#endif /* !CONFIG_FILE_LOCKING */ + struct fasync_struct { int magic; @@ -1554,9 +1587,12 @@ extern int vfs_statfs(struct dentry *, struct kstatfs *); /* /sys/fs */ extern struct kobject *fs_kobj; +extern int rw_verify_area(int, struct file *, loff_t *, size_t); + #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 +#ifdef CONFIG_FILE_LOCKING extern int locks_mandatory_locked(struct inode *); extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); @@ -1587,8 +1623,6 @@ static inline int locks_verify_locked(struct inode *inode) return 0; } -extern int rw_verify_area(int, struct file *, loff_t *, size_t); - static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) @@ -1609,6 +1643,15 @@ static inline int break_lease(struct inode *inode, unsigned int mode) return __break_lease(inode, mode); return 0; } +#else /* !CONFIG_FILE_LOCKING */ +#define locks_mandatory_locked(a) ({ 0; }) +#define locks_mandatory_area(a, b, c, d, e) ({ 0; }) +#define __mandatory_lock(a) ({ 0; }) +#define mandatory_lock(a) ({ 0; }) +#define locks_verify_locked(a) ({ 0; }) +#define locks_verify_truncate(a, b, c) ({ 0; }) +#define break_lease(a, b) ({ 0; }) +#endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ -- cgit v1.2.3-70-g09d2 From af558e33bedab672f5cfd3260bce7445e353fe21 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Sep 2007 12:34:25 -0400 Subject: nfsd: common grace period control Rewrite grace period code to unify management of grace period across lockd and nfsd. The current code has lockd and nfsd cooperate to compute a grace period which is satisfactory to them both, and then individually enforce it. This creates a slight race condition, since the enforcement is not coordinated. It's also more complicated than necessary. Here instead we have lockd and nfsd each inform common code when they enter the grace period, and when they're ready to leave the grace period, and allow normal locking only after both of them are ready to leave. We also expect the locks_start_grace()/locks_end_grace() interface here to be simpler to build on for future cluster/high-availability work, which may require (for example) putting individual filesystems into grace, or enforcing grace periods across multiple cluster nodes. Signed-off-by: J. Bruce Fields --- fs/lockd/Makefile | 2 +- fs/lockd/grace.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++ fs/lockd/svc.c | 20 ++++------------ fs/lockd/svc4proc.c | 12 +++++----- fs/lockd/svcproc.c | 12 +++++----- fs/nfsd/lockd.c | 1 - fs/nfsd/nfs4proc.c | 8 +++---- fs/nfsd/nfs4state.c | 34 ++++++++++++-------------- include/linux/fs.h | 8 +++++++ include/linux/lockd/bind.h | 9 ------- 10 files changed, 104 insertions(+), 61 deletions(-) create mode 100644 fs/lockd/grace.c (limited to 'include/linux/fs.h') diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile index 7725a0a9a55..97f6073ab33 100644 --- a/fs/lockd/Makefile +++ b/fs/lockd/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_LOCKD) += lockd.o lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ - svcproc.o svcsubs.o mon.o xdr.o + svcproc.o svcsubs.o mon.o xdr.o grace.o lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o lockd-objs := $(lockd-objs-y) diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c new file mode 100644 index 00000000000..183cc1f0af1 --- /dev/null +++ b/fs/lockd/grace.c @@ -0,0 +1,59 @@ +/* + * Common code for control of lockd and nfsv4 grace periods. + */ + +#include +#include + +static LIST_HEAD(grace_list); +static DEFINE_SPINLOCK(grace_lock); + +/** + * locks_start_grace + * @lm: who this grace period is for + * + * A grace period is a period during which locks should not be given + * out. Currently grace periods are only enforced by the two lock + * managers (lockd and nfsd), using the locks_in_grace() function to + * check when they are in a grace period. + * + * This function is called to start a grace period. + */ +void locks_start_grace(struct lock_manager *lm) +{ + spin_lock(&grace_lock); + list_add(&lm->list, &grace_list); + spin_unlock(&grace_lock); +} +EXPORT_SYMBOL_GPL(locks_start_grace); + +/** + * locks_end_grace + * @lm: who this grace period is for + * + * Call this function to state that the given lock manager is ready to + * resume regular locking. The grace period will not end until all lock + * managers that called locks_start_grace() also call locks_end_grace(). + * Note that callers count on it being safe to call this more than once, + * and the second call should be a no-op. + */ +void locks_end_grace(struct lock_manager *lm) +{ + spin_lock(&grace_lock); + list_del_init(&lm->list); + spin_unlock(&grace_lock); +} +EXPORT_SYMBOL_GPL(locks_end_grace); + +/** + * locks_in_grace + * + * Lock managers call this function to determine when it is OK for them + * to answer ordinary lock requests, and when they should accept only + * lock reclaims. + */ +int locks_in_grace(void) +{ + return !list_empty(&grace_list); +} +EXPORT_SYMBOL_GPL(locks_in_grace); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index f345ef7fb8a..f013aed1153 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -51,7 +51,6 @@ static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; -int nlmsvc_grace_period; unsigned long nlmsvc_timeout; /* @@ -85,30 +84,21 @@ static unsigned long get_lockd_grace_period(void) return nlm_timeout * 5 * HZ; } -unsigned long get_nfs_grace_period(void) -{ - unsigned long lockdgrace = get_lockd_grace_period(); - unsigned long nfsdgrace = 0; - - if (nlmsvc_ops) - nfsdgrace = nlmsvc_ops->get_grace_period(); - - return max(lockdgrace, nfsdgrace); -} -EXPORT_SYMBOL(get_nfs_grace_period); +static struct lock_manager lockd_manager = { +}; static void grace_ender(struct work_struct *not_used) { - nlmsvc_grace_period = 0; + locks_end_grace(&lockd_manager); } static DECLARE_DELAYED_WORK(grace_period_end, grace_ender); static void set_grace_period(void) { - unsigned long grace_period = get_nfs_grace_period() + jiffies; + unsigned long grace_period = get_lockd_grace_period(); - nlmsvc_grace_period = 1; + locks_start_grace(&lockd_manager); cancel_delayed_work_sync(&grace_period_end); schedule_delayed_work(&grace_period_end, grace_period); } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4a714f64515..7ca617367b3 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -89,7 +89,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept test requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rc; } @@ -123,7 +123,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if (locks_in_grace() && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rc; } @@ -169,7 +169,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -202,7 +202,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -341,7 +341,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if (locks_in_grace() && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -374,7 +374,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 76262c1986f..1b013e19880 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -118,7 +118,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept test requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rc; } @@ -153,7 +153,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if (locks_in_grace() && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rc; } @@ -199,7 +199,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -232,7 +232,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -373,7 +373,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if (locks_in_grace() && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -406,7 +406,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 15c6faeec77..b2786a5f9af 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -70,7 +70,6 @@ nlm_fclose(struct file *filp) static struct nlmsvc_binding nfsd_nlm_ops = { .fopen = nlm_fopen, /* open file for locking */ .fclose = nlm_fclose, /* close file */ - .get_grace_period = get_nfs4_grace_period, }; void diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e5b51ffafc6..669461e291a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -201,10 +201,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { @@ -575,7 +575,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - if (nfs4_in_grace()) + if (locks_in_grace()) return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); @@ -596,7 +596,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!cstate->save_fh.fh_dentry) return status; - if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags + if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1578d7a2667..0cc7ff5d5ab 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -61,7 +61,6 @@ static time_t lease_time = 90; /* default lease time */ static time_t user_lease_time = 90; static time_t boot_time; -static int in_grace = 1; static u32 current_ownerid = 1; static u32 current_fileid = 1; static u32 current_delegid = 1; @@ -1640,7 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta case NFS4_OPEN_CLAIM_NULL: /* Let's not give out any delegations till everyone's * had the chance to reclaim theirs.... */ - if (nfs4_in_grace()) + if (locks_in_grace()) goto out; if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) goto out; @@ -1816,12 +1815,15 @@ out: return status; } +struct lock_manager nfsd4_manager = { +}; + static void -end_grace(void) +nfsd4_end_grace(void) { dprintk("NFSD: end of grace period\n"); nfsd4_recdir_purge_old(); - in_grace = 0; + locks_end_grace(&nfsd4_manager); } static time_t @@ -1838,8 +1840,8 @@ nfs4_laundromat(void) nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); - if (in_grace) - end_grace(); + if (locks_in_grace()) + nfsd4_end_grace(); list_for_each_safe(pos, next, &client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { @@ -1974,7 +1976,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) return nfserr_bad_stateid; else if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; - else if (nfs4_in_grace()) { + else if (locks_in_grace()) { /* Answer in remaining cases depends on existance of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; @@ -1993,7 +1995,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) static inline int io_during_grace_disallowed(struct inode *inode, int flags) { - return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) + return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) && mandatory_lock(inode); } @@ -2693,10 +2695,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, filp = lock_stp->st_vfs_file; status = nfserr_grace; - if (nfs4_in_grace() && !lock->lk_reclaim) + if (locks_in_grace() && !lock->lk_reclaim) goto out; status = nfserr_no_grace; - if (!nfs4_in_grace() && lock->lk_reclaim) + if (!locks_in_grace() && lock->lk_reclaim) goto out; locks_init_lock(&file_lock); @@ -2779,7 +2781,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int error; __be32 status; - if (nfs4_in_grace()) + if (locks_in_grace()) return nfserr_grace; if (check_lock_length(lockt->lt_offset, lockt->lt_length)) @@ -3192,9 +3194,9 @@ __nfs4_state_start(void) unsigned long grace_time; boot_time = get_seconds(); - grace_time = get_nfs_grace_period(); + grace_time = get_nfs4_grace_period(); lease_time = user_lease_time; - in_grace = 1; + locks_start_grace(&nfsd4_manager); printk(KERN_INFO "NFSD: starting %ld-second grace period\n", grace_time/HZ); laundry_wq = create_singlethread_workqueue("nfsd4"); @@ -3213,12 +3215,6 @@ nfs4_state_start(void) return; } -int -nfs4_in_grace(void) -{ - return in_grace; -} - time_t nfs4_lease_time(void) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 9f540165a07..27cfa723b92 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -942,6 +942,14 @@ struct lock_manager_operations { int (*fl_change)(struct file_lock **, int); }; +struct lock_manager { + struct list_head list; +}; + +void locks_start_grace(struct lock_manager *); +void locks_end_grace(struct lock_manager *); +int locks_in_grace(void); + /* that will die - we need it for nfs_lock_info */ #include diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 3d25bcd139d..1f0465c374d 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -27,7 +27,6 @@ struct nlmsvc_binding { struct nfs_fh *, struct file **); void (*fclose)(struct file *); - unsigned long (*get_grace_period)(void); }; extern struct nlmsvc_binding * nlmsvc_ops; @@ -56,12 +55,4 @@ extern int nlmclnt_proc(struct nlm_host *host, int cmd, extern int lockd_up(int proto); extern void lockd_down(void); -unsigned long get_nfs_grace_period(void); - -#ifdef CONFIG_NFSD_V4 -unsigned long get_nfs4_grace_period(void); -#else -static inline unsigned long get_nfs4_grace_period(void) {return 0;} -#endif - #endif /* LINUX_LOCKD_BIND_H */ -- cgit v1.2.3-70-g09d2 From c4b929b85bdb64afacbbf6453b1f2bf7e14c9e89 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 8 Oct 2008 19:44:18 -0400 Subject: vfs: vfs-level fiemap interface Basic vfs-level fiemap infrastructure, which sets up a new ->fiemap inode operation. Userspace can get extent information on a file via fiemap ioctl. As input, the fiemap ioctl takes a struct fiemap which includes an array of struct fiemap_extent (fm_extents). Size of the extent array is passed as fm_extent_count and number of extents returned will be written into fm_mapped_extents. Offset and length fields on the fiemap structure (fm_start, fm_length) describe a logical range which will be searched for extents. All extents returned will at least partially contain this range. The actual extent offsets and ranges returned will be unmodified from their offset and range on-disk. The fiemap ioctl returns '0' on success. On error, -1 is returned and errno is set. If errno is equal to EBADR, then fm_flags will contain those flags which were passed in which the kernel did not understand. On all other errors, the contents of fm_extents is undefined. As fiemap evolved, there have been many authors of the vfs patch. As far as I can tell, the list includes: Kalpak Shah Andreas Dilger Eric Sandeen Mark Fasheh Signed-off-by: Mark Fasheh Signed-off-by: "Theodore Ts'o" Cc: Michael Kerrisk Cc: linux-api@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org --- Documentation/filesystems/fiemap.txt | 228 +++++++++++++++++++++++++++++++++++ fs/ioctl.c | 155 ++++++++++++++++++++++++ include/linux/fiemap.h | 64 ++++++++++ include/linux/fs.h | 18 +++ 4 files changed, 465 insertions(+) create mode 100644 Documentation/filesystems/fiemap.txt create mode 100644 include/linux/fiemap.h (limited to 'include/linux/fs.h') diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt new file mode 100644 index 00000000000..1e3defcfe50 --- /dev/null +++ b/Documentation/filesystems/fiemap.txt @@ -0,0 +1,228 @@ +============ +Fiemap Ioctl +============ + +The fiemap ioctl is an efficient method for userspace to get file +extent mappings. Instead of block-by-block mapping (such as bmap), fiemap +returns a list of extents. + + +Request Basics +-------------- + +A fiemap request is encoded within struct fiemap: + +struct fiemap { + __u64 fm_start; /* logical offset (inclusive) at + * which to start mapping (in) */ + __u64 fm_length; /* logical length of mapping which + * userspace cares about (in) */ + __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ + __u32 fm_mapped_extents; /* number of extents that were + * mapped (out) */ + __u32 fm_extent_count; /* size of fm_extents array (in) */ + __u32 fm_reserved; + struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ +}; + + +fm_start, and fm_length specify the logical range within the file +which the process would like mappings for. Extents returned mirror +those on disk - that is, the logical offset of the 1st returned extent +may start before fm_start, and the range covered by the last returned +extent may end after fm_length. All offsets and lengths are in bytes. + +Certain flags to modify the way in which mappings are looked up can be +set in fm_flags. If the kernel doesn't understand some particular +flags, it will return EBADR and the contents of fm_flags will contain +the set of flags which caused the error. If the kernel is compatible +with all flags passed, the contents of fm_flags will be unmodified. +It is up to userspace to determine whether rejection of a particular +flag is fatal to it's operation. This scheme is intended to allow the +fiemap interface to grow in the future but without losing +compatibility with old software. + +fm_extent_count specifies the number of elements in the fm_extents[] array +that can be used to return extents. If fm_extent_count is zero, then the +fm_extents[] array is ignored (no extents will be returned), and the +fm_mapped_extents count will hold the number of extents needed in +fm_extents[] to hold the file's current mapping. Note that there is +nothing to prevent the file from changing between calls to FIEMAP. + +The following flags can be set in fm_flags: + +* FIEMAP_FLAG_SYNC +If this flag is set, the kernel will sync the file before mapping extents. + +* FIEMAP_FLAG_XATTR +If this flag is set, the extents returned will describe the inodes +extended attribute lookup tree, instead of it's data tree. + + +Extent Mapping +-------------- + +Extent information is returned within the embedded fm_extents array +which userspace must allocate along with the fiemap structure. The +number of elements in the fiemap_extents[] array should be passed via +fm_extent_count. The number of extents mapped by kernel will be +returned via fm_mapped_extents. If the number of fiemap_extents +allocated is less than would be required to map the requested range, +the maximum number of extents that can be mapped in the fm_extent[] +array will be returned and fm_mapped_extents will be equal to +fm_extent_count. In that case, the last extent in the array will not +complete the requested range and will not have the FIEMAP_EXTENT_LAST +flag set (see the next section on extent flags). + +Each extent is described by a single fiemap_extent structure as +returned in fm_extents. + +struct fiemap_extent { + __u64 fe_logical; /* logical offset in bytes for the start of + * the extent */ + __u64 fe_physical; /* physical offset in bytes for the start + * of the extent */ + __u64 fe_length; /* length in bytes for the extent */ + __u64 fe_reserved64[2]; + __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ + __u32 fe_reserved[3]; +}; + +All offsets and lengths are in bytes and mirror those on disk. It is valid +for an extents logical offset to start before the request or it's logical +length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is +returned, fe_logical, fe_physical, and fe_length will be aligned to the +block size of the file system. With the exception of extents flagged as +FIEMAP_EXTENT_MERGED, adjacent extents will not be merged. + +The fe_flags field contains flags which describe the extent returned. +A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in +the file so that the process making fiemap calls can determine when no +more extents are available, without having to call the ioctl again. + +Some flags are intentionally vague and will always be set in the +presence of other more specific flags. This way a program looking for +a general property does not have to know all existing and future flags +which imply that property. + +For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL +are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking +for inline or tail-packed data can key on the specific flag. Software +which simply cares not to try operating on non-aligned extents +however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to +worry about all present and future flags which might imply unaligned +data. Note that the opposite is not true - it would be valid for +FIEMAP_EXTENT_NOT_ALIGNED to appear alone. + +* FIEMAP_EXTENT_LAST +This is the last extent in the file. A mapping attempt past this +extent will return nothing. + +* FIEMAP_EXTENT_UNKNOWN +The location of this extent is currently unknown. This may indicate +the data is stored on an inaccessible volume or that no storage has +been allocated for the file yet. + +* FIEMAP_EXTENT_DELALLOC + - This will also set FIEMAP_EXTENT_UNKNOWN. +Delayed allocation - while there is data for this extent, it's +physical location has not been allocated yet. + +* FIEMAP_EXTENT_ENCODED +This extent does not consist of plain filesystem blocks but is +encoded (e.g. encrypted or compressed). Reading the data in this +extent via I/O to the block device will have undefined results. + +Note that it is *always* undefined to try to update the data +in-place by writing to the indicated location without the +assistance of the filesystem, or to access the data using the +information returned by the FIEMAP interface while the filesystem +is mounted. In other words, user applications may only read the +extent data via I/O to the block device while the filesystem is +unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is +clear; user applications must not try reading or writing to the +filesystem via the block device under any other circumstances. + +* FIEMAP_EXTENT_DATA_ENCRYPTED + - This will also set FIEMAP_EXTENT_ENCODED +The data in this extent has been encrypted by the file system. + +* FIEMAP_EXTENT_NOT_ALIGNED +Extent offsets and length are not guaranteed to be block aligned. + +* FIEMAP_EXTENT_DATA_INLINE + This will also set FIEMAP_EXTENT_NOT_ALIGNED +Data is located within a meta data block. + +* FIEMAP_EXTENT_DATA_TAIL + This will also set FIEMAP_EXTENT_NOT_ALIGNED +Data is packed into a block with data from other files. + +* FIEMAP_EXTENT_UNWRITTEN +Unwritten extent - the extent is allocated but it's data has not been +initialized. This indicates the extent's data will be all zero if read +through the filesystem but the contents are undefined if read directly from +the device. + +* FIEMAP_EXTENT_MERGED +This will be set when a file does not support extents, i.e., it uses a block +based addressing scheme. Since returning an extent for each block back to +userspace would be highly inefficient, the kernel will try to merge most +adjacent blocks into 'extents'. + + +VFS -> File System Implementation +--------------------------------- + +File systems wishing to support fiemap must implement a ->fiemap callback on +their inode_operations structure. The fs ->fiemap call is responsible for +defining it's set of supported fiemap flags, and calling a helper function on +each discovered extent: + +struct inode_operations { + ... + + int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, + u64 len); + +->fiemap is passed struct fiemap_extent_info which describes the +fiemap request: + +struct fiemap_extent_info { + unsigned int fi_flags; /* Flags as passed from user */ + unsigned int fi_extents_mapped; /* Number of mapped extents */ + unsigned int fi_extents_max; /* Size of fiemap_extent array */ + struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */ +}; + +It is intended that the file system should not need to access any of this +structure directly. + + +Flag checking should be done at the beginning of the ->fiemap callback via the +fiemap_check_flags() helper: + +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); + +The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The +set of fiemap flags which the fs understands should be passed via fs_flags. If +fiemap_check_flags finds invalid user flags, it will place the bad values in +fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from +fiemap_check_flags(), it should immediately exit, returning that error back to +ioctl_fiemap(). + + +For each extent in the request range, the file system should call +the helper function, fiemap_fill_next_extent(): + +int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, + u64 phys, u64 len, u32 flags, u32 dev); + +fiemap_fill_next_extent() will use the passed values to populate the +next free extent in the fm_extents array. 'General' extent flags will +automatically be set from specific flags on behalf of the calling file +system so that the userspace API is not broken. + +fiemap_fill_next_extent() returns 0 on success, and 1 when the +user-supplied fm_extents array is full. If an error is encountered +while copying the extent to user memory, -EFAULT will be returned. diff --git a/fs/ioctl.c b/fs/ioctl.c index 7db32b3382d..045d9601fbb 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -16,6 +16,9 @@ #include +/* So that the fiemap access checks can't overflow on 32 bit machines. */ +#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) + /** * vfs_ioctl - call filesystem specific ioctl methods * @filp: open file to invoke ioctl method on @@ -71,6 +74,156 @@ static int ioctl_fibmap(struct file *filp, int __user *p) return put_user(res, p); } +/** + * fiemap_fill_next_extent - Fiemap helper function + * @fieinfo: Fiemap context passed into ->fiemap + * @logical: Extent logical start offset, in bytes + * @phys: Extent physical start offset, in bytes + * @len: Extent length, in bytes + * @flags: FIEMAP_EXTENT flags that describe this extent + * + * Called from file system ->fiemap callback. Will populate extent + * info as passed in via arguments and copy to user memory. On + * success, extent count on fieinfo is incremented. + * + * Returns 0 on success, -errno on error, 1 if this was the last + * extent that will fit in user array. + */ +#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) +#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) +#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) +int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, + u64 phys, u64 len, u32 flags) +{ + struct fiemap_extent extent; + struct fiemap_extent *dest = fieinfo->fi_extents_start; + + /* only count the extents */ + if (fieinfo->fi_extents_max == 0) { + fieinfo->fi_extents_mapped++; + return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; + } + + if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) + return 1; + + if (flags & SET_UNKNOWN_FLAGS) + flags |= FIEMAP_EXTENT_UNKNOWN; + if (flags & SET_NO_UNMOUNTED_IO_FLAGS) + flags |= FIEMAP_EXTENT_ENCODED; + if (flags & SET_NOT_ALIGNED_FLAGS) + flags |= FIEMAP_EXTENT_NOT_ALIGNED; + + memset(&extent, 0, sizeof(extent)); + extent.fe_logical = logical; + extent.fe_physical = phys; + extent.fe_length = len; + extent.fe_flags = flags; + + dest += fieinfo->fi_extents_mapped; + if (copy_to_user(dest, &extent, sizeof(extent))) + return -EFAULT; + + fieinfo->fi_extents_mapped++; + if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) + return 1; + return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; +} +EXPORT_SYMBOL(fiemap_fill_next_extent); + +/** + * fiemap_check_flags - check validity of requested flags for fiemap + * @fieinfo: Fiemap context passed into ->fiemap + * @fs_flags: Set of fiemap flags that the file system understands + * + * Called from file system ->fiemap callback. This will compute the + * intersection of valid fiemap flags and those that the fs supports. That + * value is then compared against the user supplied flags. In case of bad user + * flags, the invalid values will be written into the fieinfo structure, and + * -EBADR is returned, which tells ioctl_fiemap() to return those values to + * userspace. For this reason, a return code of -EBADR should be preserved. + * + * Returns 0 on success, -EBADR on bad flags. + */ +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) +{ + u32 incompat_flags; + + incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); + if (incompat_flags) { + fieinfo->fi_flags = incompat_flags; + return -EBADR; + } + return 0; +} +EXPORT_SYMBOL(fiemap_check_flags); + +static int fiemap_check_ranges(struct super_block *sb, + u64 start, u64 len, u64 *new_len) +{ + *new_len = len; + + if (len == 0) + return -EINVAL; + + if (start > sb->s_maxbytes) + return -EFBIG; + + /* + * Shrink request scope to what the fs can actually handle. + */ + if ((len > sb->s_maxbytes) || + (sb->s_maxbytes - len) < start) + *new_len = sb->s_maxbytes - start; + + return 0; +} + +static int ioctl_fiemap(struct file *filp, unsigned long arg) +{ + struct fiemap fiemap; + struct fiemap_extent_info fieinfo = { 0, }; + struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + u64 len; + int error; + + if (!inode->i_op->fiemap) + return -EOPNOTSUPP; + + if (copy_from_user(&fiemap, (struct fiemap __user *)arg, + sizeof(struct fiemap))) + return -EFAULT; + + if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) + return -EINVAL; + + error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, + &len); + if (error) + return error; + + fieinfo.fi_flags = fiemap.fm_flags; + fieinfo.fi_extents_max = fiemap.fm_extent_count; + fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); + + if (fiemap.fm_extent_count != 0 && + !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, + fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) + return -EFAULT; + + if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) + filemap_write_and_wait(inode->i_mapping); + + error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); + fiemap.fm_flags = fieinfo.fi_flags; + fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; + if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) + error = -EFAULT; + + return error; +} + static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -80,6 +233,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd, switch (cmd) { case FIBMAP: return ioctl_fibmap(filp, p); + case FS_IOC_FIEMAP: + return ioctl_fiemap(filp, arg); case FIGETBSZ: return put_user(inode->i_sb->s_blocksize, p); case FIONREAD: diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h new file mode 100644 index 00000000000..671decbd2ae --- /dev/null +++ b/include/linux/fiemap.h @@ -0,0 +1,64 @@ +/* + * FS_IOC_FIEMAP ioctl infrastructure. + * + * Some portions copyright (C) 2007 Cluster File Systems, Inc + * + * Authors: Mark Fasheh + * Kalpak Shah + * Andreas Dilger + */ + +#ifndef _LINUX_FIEMAP_H +#define _LINUX_FIEMAP_H + +struct fiemap_extent { + __u64 fe_logical; /* logical offset in bytes for the start of + * the extent from the beginning of the file */ + __u64 fe_physical; /* physical offset in bytes for the start + * of the extent from the beginning of the disk */ + __u64 fe_length; /* length in bytes for this extent */ + __u64 fe_reserved64[2]; + __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ + __u32 fe_reserved[3]; +}; + +struct fiemap { + __u64 fm_start; /* logical offset (inclusive) at + * which to start mapping (in) */ + __u64 fm_length; /* logical length of mapping which + * userspace wants (in) */ + __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ + __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ + __u32 fm_extent_count; /* size of fm_extents array (in) */ + __u32 fm_reserved; + struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ +}; + +#define FIEMAP_MAX_OFFSET (~0ULL) + +#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ +#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ + +#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) + +#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ +#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ +#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. + * Sets EXTENT_UNKNOWN. */ +#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read + * while fs is unmounted */ +#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs. + * Sets EXTENT_NO_BYPASS. */ +#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be + * block aligned. */ +#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata. + * Sets EXTENT_NOT_ALIGNED.*/ +#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block. + * Sets EXTENT_NOT_ALIGNED.*/ +#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but + * no data (i.e. zero). */ +#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively + * support extents. Result + * merged for efficiency. */ + +#endif /* _LINUX_FIEMAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668f..194fb237a30 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -231,6 +231,7 @@ extern int dir_notify_enable; #define FS_IOC_SETFLAGS _IOW('f', 2, long) #define FS_IOC_GETVERSION _IOR('v', 1, long) #define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) #define FS_IOC32_GETFLAGS _IOR('f', 1, int) #define FS_IOC32_SETFLAGS _IOW('f', 2, int) #define FS_IOC32_GETVERSION _IOR('v', 1, int) @@ -291,6 +292,7 @@ extern int dir_notify_enable; #include #include #include +#include #include #include @@ -1178,6 +1180,20 @@ extern void dentry_unhash(struct dentry *dentry); */ extern int file_permission(struct file *, int); +/* + * VFS FS_IOC_FIEMAP helper definitions. + */ +struct fiemap_extent_info { + unsigned int fi_flags; /* Flags as passed from user */ + unsigned int fi_extents_mapped; /* Number of mapped extents */ + unsigned int fi_extents_max; /* Size of fiemap_extent array */ + struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent + * array */ +}; +int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, + u64 phys, u64 len, u32 flags); +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); + /* * File types * @@ -1287,6 +1303,8 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); + int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, + u64 len); }; struct seq_file; -- cgit v1.2.3-70-g09d2 From 68c9d702bb72f367f3b148963ec6cf5e07ff7f65 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 3 Oct 2008 17:32:43 -0400 Subject: generic block based fiemap implementation Any block based fs (this patch includes ext3) just has to declare its own fiemap() function and then call this generic function with its own get_block_t. This works well for block based filesystems that will map multiple contiguous blocks at one time, but will work for filesystems that only map one block at a time, you will just end up with an "extent" for each block. One gotcha is this will not play nicely where there is hole+data after the EOF. This function will assume its hit the end of the data as soon as it hits a hole after the EOF, so if there is any data past that it will not pick that up. AFAIK no block based fs does this anyway, but its in the comments of the function anyway just in case. Signed-off-by: Josef Bacik Signed-off-by: Mark Fasheh Signed-off-by: "Theodore Ts'o" Cc: linux-fsdevel@vger.kernel.org --- fs/ext2/ext2.h | 2 + fs/ext2/file.c | 1 + fs/ext2/inode.c | 8 ++++ fs/ext3/file.c | 1 + fs/ext3/inode.c | 8 ++++ fs/ioctl.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ext3_fs.h | 2 + include/linux/fs.h | 3 ++ 8 files changed, 143 insertions(+) (limited to 'include/linux/fs.h') diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 47d88da2d33..bae998c1e44 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); extern void ext2_get_inode_flags(struct ext2_inode_info *); +extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); int __ext2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5f2fa9c3629..45ed0712218 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = { #endif .setattr = ext2_setattr, .permission = ext2_permission, + .fiemap = ext2_fiemap, }; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 991d6dfeb51..7658b33e265 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" #include "xip.h" @@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_ } +int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + return generic_block_fiemap(inode, fieinfo, start, len, + ext2_get_block); +} + static int ext2_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, ext2_get_block, wbc); diff --git a/fs/ext3/file.c b/fs/ext3/file.c index acc4913d301..3be1e0689c9 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = { .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .fiemap = ext3_fiemap, }; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 507d8689b11..ebfec4d0148 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -981,6 +982,13 @@ out: return ret; } +int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + return generic_block_fiemap(inode, fieinfo, start, len, + ext3_get_block); +} + /* * `handle' can be NULL if create is zero */ diff --git a/fs/ioctl.c b/fs/ioctl.c index 045d9601fbb..33a6b7ecb8b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include @@ -224,6 +226,122 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) return error; } +#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) +#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); + +/* + * @inode - the inode to map + * @arg - the pointer to userspace where we copy everything to + * @get_block - the fs's get_block function + * + * This does FIEMAP for block based inodes. Basically it will just loop + * through get_block until we hit the number of extents we want to map, or we + * go past the end of the file and hit a hole. + * + * If it is possible to have data blocks beyond a hole past @inode->i_size, then + * please do not use this function, it will stop at the first unmapped block + * beyond i_size + */ +int generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len, get_block_t *get_block) +{ + struct buffer_head tmp; + unsigned int start_blk; + long long length = 0, map_len = 0; + u64 logical = 0, phys = 0, size = 0; + u32 flags = FIEMAP_EXTENT_MERGED; + int ret = 0; + + if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) + return ret; + + start_blk = logical_to_blk(inode, start); + + /* guard against change */ + mutex_lock(&inode->i_mutex); + + length = (long long)min_t(u64, len, i_size_read(inode)); + map_len = length; + + do { + /* + * we set b_size to the total size we want so it will map as + * many contiguous blocks as possible at once + */ + memset(&tmp, 0, sizeof(struct buffer_head)); + tmp.b_size = map_len; + + ret = get_block(inode, start_blk, &tmp, 0); + if (ret) + break; + + /* HOLE */ + if (!buffer_mapped(&tmp)) { + /* + * first hole after going past the EOF, this is our + * last extent + */ + if (length <= 0) { + flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, + flags); + break; + } + + length -= blk_to_logical(inode, 1); + + /* if we have holes up to/past EOF then we're done */ + if (length <= 0) + break; + + start_blk++; + } else { + if (length <= 0 && size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, + flags); + if (ret) + break; + } + + logical = blk_to_logical(inode, start_blk); + phys = blk_to_logical(inode, tmp.b_blocknr); + size = tmp.b_size; + flags = FIEMAP_EXTENT_MERGED; + + length -= tmp.b_size; + start_blk += logical_to_blk(inode, size); + + /* + * if we are past the EOF we need to loop again to see + * if there is a hole so we can mark this extent as the + * last one, and if not keep mapping things until we + * find a hole, or we run out of slots in the extent + * array + */ + if (length <= 0) + continue; + + ret = fiemap_fill_next_extent(fieinfo, logical, phys, + size, flags); + if (ret) + break; + } + cond_resched(); + } while (1); + + mutex_unlock(&inode->i_mutex); + + /* if ret is 1 then we just hit the end of the extent array */ + if (ret == 1) + ret = 0; + + return ret; +} +EXPORT_SYMBOL(generic_block_fiemap); + static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 80171ee89a2..8120fa1bc23 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -837,6 +837,8 @@ extern void ext3_truncate (struct inode *); extern void ext3_set_inode_flags(struct inode *); extern void ext3_get_inode_flags(struct ext3_inode_info *); extern void ext3_set_aops(struct inode *inode); +extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); /* ioctl.c */ extern int ext3_ioctl (struct inode *, struct file *, unsigned int, diff --git a/include/linux/fs.h b/include/linux/fs.h index 194fb237a30..385c9a197df 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1998,6 +1998,9 @@ extern int vfs_fstat(unsigned int, struct kstat *); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); +extern int generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len, get_block_t *get_block); extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); -- cgit v1.2.3-70-g09d2 From fb2dce862d9f9a68e6b9374579056ec9eca02a63 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:01:53 +0100 Subject: Add 'discard' request handling Some block devices benefit from a hint that they can forget the contents of certain sectors. Add basic support for this to the block core, along with a 'blkdev_issue_discard()' helper function which issues such requests. The caller doesn't get to provide an end_io functio, since blkdev_issue_discard() will automatically split the request up into multiple bios if appropriate. Neither does the function wait for completion -- it's expected that callers won't care about when, or even _if_, the request completes. It's only a hint to the device anyway. By definition, the file system doesn't _care_ about these sectors any more. [With feedback from OGAWA Hirofumi and Jens Axboe Signed-off-by: Jens Axboe --- block/blk-barrier.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ block/blk-core.c | 28 ++++++++++++++------ block/blk-settings.c | 17 +++++++++++++ include/linux/bio.h | 8 ++++-- include/linux/blkdev.h | 16 ++++++++++++ include/linux/fs.h | 3 ++- 6 files changed, 130 insertions(+), 11 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index a09ead19f9c..273121c0eb8 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -315,3 +315,72 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) return ret; } EXPORT_SYMBOL(blkdev_issue_flush); + +static void blkdev_discard_end_io(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + + bio_put(bio); +} + +/** + * blkdev_issue_discard - queue a discard + * @bdev: blockdev to issue discard for + * @sector: start sector + * @nr_sects: number of sectors to discard + * + * Description: + * Issue a discard request for the sectors in question. Does not wait. + */ +int blkdev_issue_discard(struct block_device *bdev, sector_t sector, + unsigned nr_sects) +{ + struct request_queue *q; + struct bio *bio; + int ret = 0; + + if (bdev->bd_disk == NULL) + return -ENXIO; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + if (!q->prepare_discard_fn) + return -EOPNOTSUPP; + + while (nr_sects && !ret) { + bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return -ENOMEM; + + bio->bi_end_io = blkdev_discard_end_io; + bio->bi_bdev = bdev; + + bio->bi_sector = sector; + + if (nr_sects > q->max_hw_sectors) { + bio->bi_size = q->max_hw_sectors << 9; + nr_sects -= q->max_hw_sectors; + sector += q->max_hw_sectors; + } else { + bio->bi_size = nr_sects << 9; + nr_sects = 0; + } + bio_get(bio); + submit_bio(WRITE_DISCARD, bio); + + /* Check if it failed immediately */ + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + bio_put(bio); + } + return ret; +} +EXPORT_SYMBOL(blkdev_issue_discard); diff --git a/block/blk-core.c b/block/blk-core.c index a496727df7e..1e143c4f9d3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1079,6 +1079,10 @@ void init_request_from_bio(struct request *req, struct bio *bio) */ if (unlikely(bio_barrier(bio))) req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); + if (unlikely(bio_discard(bio))) { + req->cmd_flags |= (REQ_SOFTBARRIER | REQ_DISCARD); + req->q->prepare_discard_fn(req->q, req); + } if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; @@ -1095,7 +1099,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; - int el_ret, nr_sectors, barrier, err; + int el_ret, nr_sectors, barrier, discard, err; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); int rw_flags; @@ -1115,6 +1119,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) goto end_io; } + discard = bio_discard(bio); + if (unlikely(discard) && !q->prepare_discard_fn) { + err = -EOPNOTSUPP; + goto end_io; + } + spin_lock_irq(q->queue_lock); if (unlikely(barrier) || elv_queue_empty(q)) @@ -1405,7 +1415,8 @@ end_io: if (bio_check_eod(bio, nr_sectors)) goto end_io; - if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { + if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || + (bio_discard(bio) && !q->prepare_discard_fn)) { err = -EOPNOTSUPP; goto end_io; } @@ -1487,7 +1498,6 @@ void submit_bio(int rw, struct bio *bio) * go through the normal accounting stuff before submission. */ if (bio_has_data(bio)) { - if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { @@ -1881,7 +1891,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, struct request_queue *q = rq->q; unsigned long flags = 0UL; - if (bio_has_data(rq->bio)) { + if (bio_has_data(rq->bio) || blk_discard_rq(rq)) { if (__end_that_request_first(rq, error, nr_bytes)) return 1; @@ -1939,7 +1949,7 @@ EXPORT_SYMBOL_GPL(blk_end_request); **/ int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { - if (bio_has_data(rq->bio) && + if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) && __end_that_request_first(rq, error, nr_bytes)) return 1; @@ -2012,12 +2022,14 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ rq->cmd_flags |= (bio->bi_rw & 3); - rq->nr_phys_segments = bio_phys_segments(q, bio); - rq->nr_hw_segments = bio_hw_segments(q, bio); + if (bio_has_data(bio)) { + rq->nr_phys_segments = bio_phys_segments(q, bio); + rq->nr_hw_segments = bio_hw_segments(q, bio); + rq->buffer = bio_data(bio); + } rq->current_nr_sectors = bio_cur_sectors(bio); rq->hard_cur_sectors = rq->current_nr_sectors; rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); - rq->buffer = bio_data(bio); rq->data_len = bio->bi_size; rq->bio = rq->biotail = bio; diff --git a/block/blk-settings.c b/block/blk-settings.c index dfc77012843..539d873c820 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -32,6 +32,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) } EXPORT_SYMBOL(blk_queue_prep_rq); +/** + * blk_queue_set_discard - set a discard_sectors function for queue + * @q: queue + * @dfn: prepare_discard function + * + * It's possible for a queue to register a discard callback which is used + * to transform a discard request into the appropriate type for the + * hardware. If none is registered, then discard requests are failed + * with %EOPNOTSUPP. + * + */ +void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) +{ + q->prepare_discard_fn = dfn; +} +EXPORT_SYMBOL(blk_queue_set_discard); + /** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue diff --git a/include/linux/bio.h b/include/linux/bio.h index 17f1fbdb31b..1fdfc5621c8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -149,6 +149,8 @@ struct bio { * bit 2 -- barrier * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * bit 5 -- metadata request + * bit 6 -- discard sectors */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ @@ -156,6 +158,7 @@ struct bio { #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 #define BIO_RW_META 5 +#define BIO_RW_DISCARD 6 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -186,13 +189,14 @@ struct bio { #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) +#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) static inline unsigned int bio_cur_sectors(struct bio *bio) { if (bio->bi_vcnt) return bio_iovec(bio)->bv_len >> 9; - - return 0; + else /* dataless requests such as discard */ + return bio->bi_size >> 9; } static inline void *bio_data(struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e0ba018f5e8..26ececbbebe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -89,6 +89,7 @@ enum { enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ __REQ_FAILFAST, /* no low level driver retries */ + __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ @@ -111,6 +112,7 @@ enum rq_flag_bits { }; #define REQ_RW (1 << __REQ_RW) +#define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_FAILFAST (1 << __REQ_FAILFAST) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) @@ -252,6 +254,7 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); +typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -307,6 +310,7 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; + prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -546,6 +550,7 @@ enum { #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) +#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) /* rq->queuelist of dequeued request must be list_empty() */ @@ -796,6 +801,7 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); +extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); extern int blk_do_ordered(struct request_queue *, struct request **); @@ -837,6 +843,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } extern int blkdev_issue_flush(struct block_device *, sector_t *); +extern int blkdev_issue_discard(struct block_device *, sector_t sector, + unsigned nr_sects); + +static inline int sb_issue_discard(struct super_block *sb, + sector_t block, unsigned nr_blocks) +{ + block <<= (sb->s_blocksize_bits - 9); + nr_blocks <<= (sb->s_blocksize_bits - 9); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks); +} /* * command filter functions diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668f..eb013131913 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -86,7 +86,8 @@ extern int dir_notify_enable; #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) -#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) +#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) +#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD)) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.3-70-g09d2 From d30a2605be9d5132d95944916e8f578fcfe4f976 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 11 Aug 2008 15:58:42 +0100 Subject: Add BLKDISCARD ioctl to allow userspace to discard sectors We may well want mkfs tools to use this to mark the whole device as unwanted before they format it, for example. The ioctl takes a pair of uint64_ts, which are start offset and length in _bytes_. Although at the moment it might make sense for them both to be in 512-byte sectors, I don't want to limit the ABI to that. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- block/compat_ioctl.c | 1 + block/ioctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 1 + 3 files changed, 78 insertions(+) (limited to 'include/linux/fs.h') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index c23177e4623..1e559fba7bd 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -788,6 +788,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); case BLKFLSBUF: case BLKROSET: + case BLKDISCARD: /* * the ones below are implemented in blkdev_locked_ioctl, * but we call blkdev_ioctl, which gets the lock for us diff --git a/block/ioctl.c b/block/ioctl.c index 77185e5c026..342298bb608 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -111,6 +111,69 @@ static int blkdev_reread_part(struct block_device *bdev) return res; } +static void blk_ioc_discard_endio(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + complete(bio->bi_private); +} + +static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, + uint64_t len) +{ + struct request_queue *q = bdev_get_queue(bdev); + int ret = 0; + + if (start & 511) + return -EINVAL; + if (len & 511) + return -EINVAL; + start >>= 9; + len >>= 9; + + if (start + len > (bdev->bd_inode->i_size >> 9)) + return -EINVAL; + + if (!q->prepare_discard_fn) + return -EOPNOTSUPP; + + while (len && !ret) { + DECLARE_COMPLETION_ONSTACK(wait); + struct bio *bio; + + bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return -ENOMEM; + + bio->bi_end_io = blk_ioc_discard_endio; + bio->bi_bdev = bdev; + bio->bi_private = &wait; + bio->bi_sector = start; + + if (len > q->max_hw_sectors) { + bio->bi_size = q->max_hw_sectors << 9; + len -= q->max_hw_sectors; + start += q->max_hw_sectors; + } else { + bio->bi_size = len << 9; + len = 0; + } + submit_bio(WRITE_DISCARD, bio); + + wait_for_completion(&wait); + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + bio_put(bio); + } + return ret; +} + static int put_ushort(unsigned long arg, unsigned short val) { return put_user(val, (unsigned short __user *)arg); @@ -258,6 +321,19 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, set_device_ro(bdev, n); unlock_kernel(); return 0; + + case BLKDISCARD: { + uint64_t range[2]; + + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + + return blk_ioctl_discard(bdev, range[0], range[1]); + } + case HDIO_GETGEO: { struct hd_geometry geo; diff --git a/include/linux/fs.h b/include/linux/fs.h index eb013131913..88358ca6af2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -223,6 +223,7 @@ extern int dir_notify_enable; #define BLKTRACESTART _IO(0x12,116) #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3-70-g09d2 From e17fc0a1ccf88f6d4dcb363729f3141b0958c325 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 9 Aug 2008 16:42:20 +0100 Subject: Allow elevators to sort/merge discard requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit But blkdev_issue_discard() still emits requests which are interpreted as soft barriers, because naïve callers might otherwise issue subsequent writes to those same sectors, which might cross on the queue (if they're reallocated quickly enough). Callers still _can_ issue non-barrier discard requests, but they have to take care of queue ordering for themselves. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- block/blk-core.c | 12 +++++++----- block/blk-merge.c | 27 +++++++++++++++++---------- block/elevator.c | 12 ++++++++++-- block/ioctl.c | 2 +- include/linux/bio.h | 2 +- include/linux/blkdev.h | 5 +++-- include/linux/fs.h | 3 ++- 8 files changed, 42 insertions(+), 23 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index e5448131d4f..988b63479b2 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -372,7 +372,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, nr_sects = 0; } bio_get(bio); - submit_bio(WRITE_DISCARD, bio); + submit_bio(DISCARD_BARRIER, bio); /* Check if it failed immediately */ if (bio_flagged(bio, BIO_EOPNOTSUPP)) diff --git a/block/blk-core.c b/block/blk-core.c index 1e143c4f9d3..1261516dd42 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1077,12 +1077,13 @@ void init_request_from_bio(struct request *req, struct bio *bio) /* * REQ_BARRIER implies no merging, but lets make it explicit */ - if (unlikely(bio_barrier(bio))) - req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); if (unlikely(bio_discard(bio))) { - req->cmd_flags |= (REQ_SOFTBARRIER | REQ_DISCARD); + req->cmd_flags |= REQ_DISCARD; + if (bio_barrier(bio)) + req->cmd_flags |= REQ_SOFTBARRIER; req->q->prepare_discard_fn(req->q, req); - } + } else if (unlikely(bio_barrier(bio))) + req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; @@ -1114,7 +1115,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); barrier = bio_barrier(bio); - if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { + if (unlikely(barrier) && bio_has_data(bio) && + (q->next_ordered == QUEUE_ORDERED_NONE)) { err = -EOPNOTSUPP; goto end_io; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 5efc9e7a68b..6cf8f0c70a5 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -11,7 +11,7 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) { - if (blk_fs_request(rq)) { + if (blk_fs_request(rq) || blk_discard_rq(rq)) { rq->hard_sector += nsect; rq->hard_nr_sectors -= nsect; @@ -131,13 +131,17 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) return 0; - if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) - return 0; if (bio->bi_size + nxt->bi_size > q->max_segment_size) return 0; + if (!bio_has_data(bio)) + return 1; + + if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) + return 0; + /* - * bio and nxt are contigous in memory, check if the queue allows + * bio and nxt are contiguous in memory; check if the queue allows * these two to be merged into one */ if (BIO_SEG_BOUNDARY(q, bio, nxt)) @@ -153,8 +157,9 @@ static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, blk_recount_segments(q, bio); if (!bio_flagged(nxt, BIO_SEG_VALID)) blk_recount_segments(q, nxt); - if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || - BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) + if (bio_has_data(bio) && + (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || + BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))) return 0; if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) return 0; @@ -317,8 +322,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, if (!bio_flagged(bio, BIO_SEG_VALID)) blk_recount_segments(q, bio); len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) - && !BIOVEC_VIRT_OVERSIZE(len)) { + if (!bio_has_data(bio) || + (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) + && !BIOVEC_VIRT_OVERSIZE(len))) { int mergeable = ll_new_mergeable(q, req, bio); if (mergeable) { @@ -356,8 +362,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, blk_recount_segments(q, bio); if (!bio_flagged(req->bio, BIO_SEG_VALID)) blk_recount_segments(q, req->bio); - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && - !BIOVEC_VIRT_OVERSIZE(len)) { + if (!bio_has_data(bio) || + (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && + !BIOVEC_VIRT_OVERSIZE(len))) { int mergeable = ll_new_mergeable(q, req, bio); if (mergeable) { diff --git a/block/elevator.c b/block/elevator.c index ed6f8f32d27..4f5127054e3 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -74,6 +74,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) if (!rq_mergeable(rq)) return 0; + /* + * Don't merge file system requests and discard requests + */ + if (bio_discard(bio) != bio_discard(rq->bio)) + return 0; + /* * different data direction or already started, don't merge */ @@ -438,6 +444,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); + if (blk_discard_rq(rq) != blk_discard_rq(pos)) + break; if (rq_data_dir(rq) != rq_data_dir(pos)) break; if (pos->cmd_flags & stop_flags) @@ -607,7 +615,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) break; case ELEVATOR_INSERT_SORT: - BUG_ON(!blk_fs_request(rq)); + BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq)); rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { @@ -692,7 +700,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where, * this request is scheduling boundary, update * end_sector */ - if (blk_fs_request(rq)) { + if (blk_fs_request(rq) || blk_discard_rq(rq)) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } diff --git a/block/ioctl.c b/block/ioctl.c index 342298bb608..375c57922b0 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -161,7 +161,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, bio->bi_size = len << 9; len = 0; } - submit_bio(WRITE_DISCARD, bio); + submit_bio(DISCARD_NOBARRIER, bio); wait_for_completion(&wait); diff --git a/include/linux/bio.h b/include/linux/bio.h index 1fdfc5621c8..33c3947d61e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -188,8 +188,8 @@ struct bio { #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) #define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) +#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 727886d25c4..e9eb35c9bf2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -541,7 +541,7 @@ enum { #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) -#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) +#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) @@ -598,7 +598,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw) #define RQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ + (blk_discard_rq(rq) || blk_fs_request((rq)))) /* * q->prep_rq_fn return values diff --git a/include/linux/fs.h b/include/linux/fs.h index 88358ca6af2..860689f541b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,7 +87,8 @@ extern int dir_notify_enable; #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) -#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD)) +#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) +#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.3-70-g09d2 From 1f0142905d4812966831613847db38a66da29eb8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:23 +0900 Subject: block: adjust formatting for large minors and add ext_range sysfs attr With extended minors and the soon-to-follow debug feature, large minor numbers for block devices will be common. This patch does the followings to make printouts pretty. * Adapt print formats such that large minors don't break the formatting. * For extended MAJ:MIN, %02x%02x for MAJ:MIN used in printk_all_partitions() doesn't cut it anymore. Update it such that %03x:%05x is used if either MAJ or MIN doesn't fit in %02x. * Implement ext_range sysfs attribute which shows total minors the device can use including both conventional minor space and the extended one. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 45 ++++++++++++++++++++++++++++++++++----------- include/linux/fs.h | 1 + 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/genhd.c b/block/genhd.c index 7bbfed05cec..ee4b13520e5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -366,6 +366,18 @@ void blk_free_devt(dev_t devt) } } +static char *bdevt_str(dev_t devt, char *buf) +{ + if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { + char tbuf[BDEVT_SIZE]; + snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); + snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); + } else + snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); + + return buf; +} + /* * Register device numbers dev..(dev+range-1) * range must be nonzero @@ -521,7 +533,8 @@ void __init printk_all_partitions(void) struct gendisk *disk = dev_to_disk(dev); struct disk_part_iter piter; struct hd_struct *part; - char buf[BDEVNAME_SIZE]; + char name_buf[BDEVNAME_SIZE]; + char devt_buf[BDEVT_SIZE]; /* * Don't show empty devices or things that have been @@ -536,10 +549,10 @@ void __init printk_all_partitions(void) * numbers in hex - the same format as the root= * option takes. */ - printk("%02x%02x %10llu %s", - MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)), + printk("%s %10llu %s", + bdevt_str(disk_devt(disk), devt_buf), (unsigned long long)get_capacity(disk) >> 1, - disk_name(disk, 0, buf)); + disk_name(disk, 0, name_buf)); if (disk->driverfs_dev != NULL && disk->driverfs_dev->driver != NULL) printk(" driver: %s\n", @@ -550,10 +563,10 @@ void __init printk_all_partitions(void) /* now show the partitions */ disk_part_iter_init(&piter, disk, 0); while ((part = disk_part_iter_next(&piter))) - printk(" %02x%02x %10llu %s\n", - MAJOR(part_devt(part)), MINOR(part_devt(part)), + printk(" %s %10llu %s\n", + bdevt_str(part_devt(part), devt_buf), (unsigned long long)part->nr_sects >> 1, - disk_name(disk, part->partno, buf)); + disk_name(disk, part->partno, name_buf)); disk_part_iter_exit(&piter); } class_dev_iter_exit(&iter); @@ -630,14 +643,14 @@ static int show_partition(struct seq_file *seqf, void *v) return 0; /* show the full disk and all non-0 size partitions of it */ - seq_printf(seqf, "%4d %4d %10llu %s\n", + seq_printf(seqf, "%4d %7d %10llu %s\n", MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), (unsigned long long)get_capacity(sgp) >> 1, disk_name(sgp, 0, buf)); disk_part_iter_init(&piter, sgp, 0); while ((part = disk_part_iter_next(&piter))) - seq_printf(seqf, "%4d %4d %10llu %s\n", + seq_printf(seqf, "%4d %7d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), (unsigned long long)part->nr_sects >> 1, disk_name(sgp, part->partno, buf)); @@ -691,6 +704,14 @@ static ssize_t disk_range_show(struct device *dev, return sprintf(buf, "%d\n", disk->minors); } +static ssize_t disk_ext_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + + return sprintf(buf, "%d\n", disk_max_parts(disk) + 1); +} + static ssize_t disk_removable_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -780,6 +801,7 @@ static ssize_t disk_fail_store(struct device *dev, #endif static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); +static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL); @@ -792,6 +814,7 @@ static struct device_attribute dev_attr_fail = static struct attribute *disk_attrs[] = { &dev_attr_range.attr, + &dev_attr_ext_range.attr, &dev_attr_removable.attr, &dev_attr_ro.attr, &dev_attr_size.attr, @@ -858,7 +881,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) cpu = disk_stat_lock(); disk_round_stats(cpu, gp); disk_stat_unlock(); - seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", + seq_printf(seqf, "%4d %7d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)), disk_name(gp, 0, buf), disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), @@ -877,7 +900,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) cpu = disk_stat_lock(); part_round_stats(cpu, hd); disk_stat_unlock(); - seq_printf(seqf, "%4d %4d %s %lu %lu %llu " + seq_printf(seqf, "%4d %7d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), disk_name(gp, hd->partno, buf), diff --git a/include/linux/fs.h b/include/linux/fs.h index 860689f541b..02a9fb5a830 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1685,6 +1685,7 @@ extern void chrdev_show(struct seq_file *,off_t); /* fs/block_dev.c */ #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ +#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ #ifdef CONFIG_BLOCK #define BLKDEV_MAJOR_HASH_SIZE 255 -- cgit v1.2.3-70-g09d2 From 0c002c2f74e10baa9021d3ecc50585c6eafea568 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:20 -0600 Subject: Wrapper for lower-level revalidate_disk routines. This is a wrapper for the lower-level revalidate_disk call-backs such as sd_revalidate_disk(). It allows us to perform pre and post operations when calling them. We will use this wrapper in a later patch to adjust block device sizes after an online resize (a _post_ operation). Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- fs/block_dev.c | 21 +++++++++++++++++++++ include/linux/fs.h | 1 + 2 files changed, 22 insertions(+) (limited to 'include/linux/fs.h') diff --git a/fs/block_dev.c b/fs/block_dev.c index c3fa19bd64d..4eeb69a8873 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -852,6 +852,27 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +/** + * revalidate_disk - wrapper for lower-level driver's revalidate_disk + * call-back + * + * @disk: struct gendisk to be revalidated + * + * This routine is a wrapper for lower-level driver's revalidate_disk + * call-backs. It is used to do common pre and post operations needed + * for all revalidate_disk operations. + */ +int revalidate_disk(struct gendisk *disk) +{ + int ret = 0; + + if (disk->fops->revalidate_disk) + ret = disk->fops->revalidate_disk(disk); + + return ret; +} +EXPORT_SYMBOL(revalidate_disk); + /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This diff --git a/include/linux/fs.h b/include/linux/fs.h index 02a9fb5a830..d63461f9798 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,6 +1722,7 @@ extern int fs_may_remount_ro(struct super_block *); */ #define bio_data_dir(bio) ((bio)->bi_rw & 1) +extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); -- cgit v1.2.3-70-g09d2 From c3279d1454cdfed02a557d789d8a6d08ab4cbe70 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:25 -0600 Subject: Adjust block device size after an online resize of a disk. The revalidate_disk routine now checks if a disk has been resized by comparing the gendisk capacity to the bdev inode size. If they are different (usually because the disk has been resized underneath the kernel) the bdev inode size is adjusted to match the capacity. Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- fs/block_dev.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 2 files changed, 39 insertions(+) (limited to 'include/linux/fs.h') diff --git a/fs/block_dev.c b/fs/block_dev.c index 4eeb69a8873..b721955d382 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -852,6 +852,34 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +/** + * check_disk_size_change - checks for disk size change and adjusts + * bdev size. + * + * @disk: struct gendisk to check + * @bdev: struct bdev to adjust. + * + * This routine checks to see if the bdev size does not match the disk size + * and adjusts it if it differs. + */ +void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) +{ + loff_t disk_size, bdev_size; + + disk_size = (loff_t)get_capacity(disk) << 9; + bdev_size = i_size_read(bdev->bd_inode); + if (disk_size != bdev_size) { + char name[BDEVNAME_SIZE]; + + disk_name(disk, 0, name); + printk(KERN_INFO + "%s: detected capacity change from %lld to %lld\n", + name, bdev_size, disk_size); + i_size_write(bdev->bd_inode, disk_size); + } +} +EXPORT_SYMBOL(check_disk_size_change); + /** * revalidate_disk - wrapper for lower-level driver's revalidate_disk * call-back @@ -864,11 +892,20 @@ EXPORT_SYMBOL(open_by_devnum); */ int revalidate_disk(struct gendisk *disk) { + struct block_device *bdev; int ret = 0; if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); + bdev = bdget_disk(disk, 0); + if (!bdev) + return ret; + + mutex_lock(&bdev->bd_mutex); + check_disk_size_change(disk, bdev); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); return ret; } EXPORT_SYMBOL(revalidate_disk); diff --git a/include/linux/fs.h b/include/linux/fs.h index d63461f9798..32477e8872d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,6 +1722,8 @@ extern int fs_may_remount_ro(struct super_block *); */ #define bio_data_dir(bio) ((bio)->bi_rw & 1) +extern void check_disk_size_change(struct gendisk *disk, + struct block_device *bdev); extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); -- cgit v1.2.3-70-g09d2 From aeb5d727062a0238a2f96c9c380fbd2be4640c6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Sep 2008 15:28:45 -0400 Subject: [PATCH] introduce fmode_t, do annotations Signed-off-by: Al Viro --- block/bsg.c | 7 ++++--- block/cmd-filter.c | 2 +- block/scsi_ioctl.c | 5 +++-- drivers/block/amiflop.c | 4 ++-- drivers/block/ataflop.c | 4 ++-- drivers/block/floppy.c | 4 ++-- drivers/block/paride/pf.c | 2 +- drivers/block/paride/pt.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/block/swim3.c | 4 ++-- drivers/char/nvram.c | 6 +++--- drivers/ide/ide-floppy_ioctl.c | 2 +- drivers/ide/ide-gd.c | 2 +- drivers/md/dm-ioctl.c | 4 ++-- drivers/md/dm-table.c | 12 ++++++------ drivers/mtd/mtdchar.c | 10 +++++----- drivers/parisc/eisa_eeprom.c | 2 +- fs/block_dev.c | 10 +++++----- fs/fifo.c | 6 +++--- fs/file_table.c | 4 ++-- fs/hostfs/hostfs_kern.c | 5 +++-- fs/locks.c | 3 ++- fs/open.c | 2 +- fs/proc/base.c | 4 ++-- fs/reiserfs/journal.c | 2 +- include/linux/blkdev.h | 3 ++- include/linux/device-mapper.h | 8 ++++---- include/linux/file.h | 4 ++-- include/linux/fs.h | 16 ++++++++-------- include/linux/fsnotify.h | 2 +- include/linux/types.h | 1 + ipc/shm.c | 2 +- sound/core/oss/pcm_oss.c | 2 +- sound/oss/au1550_ac97.c | 2 +- sound/oss/dmasound/dmasound.h | 4 ++-- sound/oss/dmasound/dmasound_atari.c | 4 ++-- sound/oss/dmasound/dmasound_core.c | 10 +++++----- sound/oss/msnd.h | 2 +- sound/oss/sound_config.h | 20 ++++++-------------- sound/oss/swarm_cs4297a.c | 2 +- sound/oss/vwsnd.c | 2 +- 41 files changed, 96 insertions(+), 98 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/bsg.c b/block/bsg.c index 034112bfe1f..2d36b127f38 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -173,7 +173,7 @@ unlock: static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, struct sg_io_v4 *hdr, struct bsg_device *bd, - int has_write_perm) + fmode_t has_write_perm) { if (hdr->request_len > BLK_MAX_CDB) { rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); @@ -242,7 +242,7 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw) * map sg_io_v4 to a request. */ static struct request * -bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) +bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm) { struct request_queue *q = bd->queue; struct request *rq, *next_rq = NULL; @@ -601,7 +601,8 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) } static int __bsg_write(struct bsg_device *bd, const char __user *buf, - size_t count, ssize_t *bytes_written, int has_write_perm) + size_t count, ssize_t *bytes_written, + fmode_t has_write_perm) { struct bsg_command *bc; struct request *rq; diff --git a/block/cmd-filter.c b/block/cmd-filter.c index e669aed4c6b..504b275e1b9 100644 --- a/block/cmd-filter.c +++ b/block/cmd-filter.c @@ -27,7 +27,7 @@ #include int blk_verify_command(struct blk_cmd_filter *filter, - unsigned char *cmd, int has_write_perm) + unsigned char *cmd, fmode_t has_write_perm) { /* root can do any command. */ if (capable(CAP_SYS_RAWIO)) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index c34272a348f..c525905f9d3 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -384,7 +384,8 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, struct gendisk *disk, struct scsi_ioctl_command __user *sic) { struct request *rq; - int err, write_perm = 0; + int err; + fmode_t write_perm = 0; unsigned int in_len, out_len, bytes, opcode, cmdlen; char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; @@ -428,7 +429,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, /* scsi_ioctl passes NULL */ if (file && (file->f_mode & FMODE_WRITE)) - write_perm = 1; + write_perm = FMODE_WRITE; err = blk_verify_command(&q->cmd_filter, rq->cmd, write_perm); if (err) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 7516baff3bb..d19c5a939fe 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1560,9 +1560,9 @@ static int floppy_open(struct inode *inode, struct file *filp) if (fd_ref[drive] && old_dev != system) return -EBUSY; - if (filp && filp->f_mode & 3) { + if (filp && filp->f_mode & (FMODE_READ|FMODE_WRITE)) { check_disk_change(inode->i_bdev); - if (filp->f_mode & 2 ) { + if (filp->f_mode & FMODE_WRITE ) { int wrprot; get_fdc(drive); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 432cf401829..e1db285b72c 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1826,9 +1826,9 @@ static int floppy_open( struct inode *inode, struct file *filp ) if (filp->f_flags & O_NDELAY) return 0; - if (filp->f_mode & 3) { + if (filp->f_mode & (FMODE_READ|FMODE_WRITE)) { check_disk_change(inode->i_bdev); - if (filp->f_mode & 2) { + if (filp->f_mode & FMODE_WRITE) { if (p->wpstat) { if (p->ref < 0) p->ref = 0; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2cea27aba9a..ae3ef8945f3 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3761,14 +3761,14 @@ static int floppy_open(struct inode *inode, struct file *filp) UFDCS->rawcmd = 2; if (!(filp->f_flags & O_NDELAY)) { - if (filp->f_mode & 3) { + if (filp->f_mode & (FMODE_READ|FMODE_WRITE)) { UDRS->last_checked = 0; check_disk_change(inode->i_bdev); if (UTESTF(FD_DISK_CHANGED)) goto out; } res = -EROFS; - if ((filp->f_mode & 2) && !(UTESTF(FD_DISK_WRITABLE))) + if ((filp->f_mode & FMODE_WRITE) && !(UTESTF(FD_DISK_WRITABLE))) goto out; } mutex_unlock(&open_lock); diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index e7fe6ca97dd..a902d84fd33 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -305,7 +305,7 @@ static int pf_open(struct inode *inode, struct file *file) if (pf->media_status == PF_NM) return -ENODEV; - if ((pf->media_status == PF_RO) && (file->f_mode & 2)) + if ((pf->media_status == PF_RO) && (file->f_mode & FMODE_WRITE)) return -EROFS; pf->access++; diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 5ae229656ea..1e4006e18f0 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -667,7 +667,7 @@ static int pt_open(struct inode *inode, struct file *file) goto out; err = -EROFS; - if ((!(tape->flags & PT_WRITE_OK)) && (file->f_mode & 2)) + if ((!(tape->flags & PT_WRITE_OK)) && (file->f_mode & FMODE_WRITE)) goto out; if (!(iminor(inode) & 128)) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 195ca7c720f..4d581e8ba9f 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2320,7 +2320,7 @@ static int pkt_open_write(struct pktcdvd_device *pd) /* * called at open time. */ -static int pkt_open_dev(struct pktcdvd_device *pd, int write) +static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) { int ret; long lba; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 730ccea78e4..a53ca54bee1 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -908,13 +908,13 @@ static int floppy_open(struct inode *inode, struct file *filp) return -EBUSY; if (err == 0 && (filp->f_flags & O_NDELAY) == 0 - && (filp->f_mode & 3)) { + && (filp->f_mode & (FMODE_READ|FMODE_WRITE))) { check_disk_change(inode->i_bdev); if (fs->ejected) err = -ENXIO; } - if (err == 0 && (filp->f_mode & 2)) { + if (err == 0 && (filp->f_mode & FMODE_WRITE)) { if (fs->write_prot < 0) fs->write_prot = swim3_readbit(fs, WRITE_PROT); if (fs->write_prot) diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 39f6357e3b5..8054ee839b3 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -338,7 +338,7 @@ nvram_open(struct inode *inode, struct file *file) if ((nvram_open_cnt && (file->f_flags & O_EXCL)) || (nvram_open_mode & NVRAM_EXCL) || - ((file->f_mode & 2) && (nvram_open_mode & NVRAM_WRITE))) { + ((file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE))) { spin_unlock(&nvram_state_lock); unlock_kernel(); return -EBUSY; @@ -346,7 +346,7 @@ nvram_open(struct inode *inode, struct file *file) if (file->f_flags & O_EXCL) nvram_open_mode |= NVRAM_EXCL; - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) nvram_open_mode |= NVRAM_WRITE; nvram_open_cnt++; @@ -366,7 +366,7 @@ nvram_release(struct inode *inode, struct file *file) /* if only one instance is open, clear the EXCL bit */ if (nvram_open_mode & NVRAM_EXCL) nvram_open_mode &= ~NVRAM_EXCL; - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) nvram_open_mode &= ~NVRAM_WRITE; spin_unlock(&nvram_state_lock); diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 409e4c15f9b..0d5f5054ab6 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -250,7 +250,7 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file, case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: return ide_floppy_get_format_capacities(drive, argp); case IDEFLOPPY_IOCTL_FORMAT_START: - if (!(file->f_mode & 2)) + if (!(file->f_mode & FMODE_WRITE)) return -EPERM; return ide_floppy_format_unit(drive, (int __user *)argp); case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS: diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index d44898f46c3..d367473098f 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -202,7 +202,7 @@ static int ide_gd_open(struct inode *inode, struct file *filp) goto out_put_idkp; } - if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) { + if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & FMODE_WRITE)) { ret = -EROFS; goto out_put_idkp; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index dca401dc70a..777c948180f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -988,9 +988,9 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size) return r; } -static inline int get_mode(struct dm_ioctl *param) +static inline fmode_t get_mode(struct dm_ioctl *param) { - int mode = FMODE_READ | FMODE_WRITE; + fmode_t mode = FMODE_READ | FMODE_WRITE; if (param->flags & DM_READONLY_FLAG) mode = FMODE_READ; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a740a6950f5..7c8671b06fe 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -43,7 +43,7 @@ struct dm_table { * device. This should be a combination of FMODE_READ * and FMODE_WRITE. */ - int mode; + fmode_t mode; /* a list of devices used by this table */ struct list_head devices; @@ -217,7 +217,7 @@ static int alloc_targets(struct dm_table *t, unsigned int num) return 0; } -int dm_table_create(struct dm_table **result, int mode, +int dm_table_create(struct dm_table **result, fmode_t mode, unsigned num_targets, struct mapped_device *md) { struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); @@ -395,7 +395,7 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start, * careful to leave things as they were if we fail to reopen the * device. */ -static int upgrade_mode(struct dm_dev_internal *dd, int new_mode, +static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, struct mapped_device *md) { int r; @@ -421,7 +421,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, int new_mode, */ static int __table_get_device(struct dm_table *t, struct dm_target *ti, const char *path, sector_t start, sector_t len, - int mode, struct dm_dev **result) + fmode_t mode, struct dm_dev **result) { int r; dev_t uninitialized_var(dev); @@ -537,7 +537,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) EXPORT_SYMBOL_GPL(dm_set_device_limits); int dm_get_device(struct dm_target *ti, const char *path, sector_t start, - sector_t len, int mode, struct dm_dev **result) + sector_t len, fmode_t mode, struct dm_dev **result) { int r = __table_get_device(ti->table, ti, path, start, len, mode, result); @@ -887,7 +887,7 @@ struct list_head *dm_table_get_devices(struct dm_table *t) return &t->devices; } -int dm_table_get_mode(struct dm_table *t) +fmode_t dm_table_get_mode(struct dm_table *t) { return t->mode; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 963840e9b5b..bcffeda2df3 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -96,7 +96,7 @@ static int mtd_open(struct inode *inode, struct file *file) return -ENODEV; /* You can't open the RO devices RW */ - if ((file->f_mode & 2) && (minor & 1)) + if ((file->f_mode & FMODE_WRITE) && (minor & 1)) return -EACCES; lock_kernel(); @@ -114,7 +114,7 @@ static int mtd_open(struct inode *inode, struct file *file) } /* You can't open it RW if it's not a writeable device */ - if ((file->f_mode & 2) && !(mtd->flags & MTD_WRITEABLE)) { + if ((file->f_mode & FMODE_WRITE) && !(mtd->flags & MTD_WRITEABLE)) { put_mtd_device(mtd); ret = -EACCES; goto out; @@ -144,7 +144,7 @@ static int mtd_close(struct inode *inode, struct file *file) DEBUG(MTD_DEBUG_LEVEL0, "MTD_close\n"); /* Only sync if opened RW */ - if ((file->f_mode & 2) && mtd->sync) + if ((file->f_mode & FMODE_WRITE) && mtd->sync) mtd->sync(mtd); put_mtd_device(mtd); @@ -443,7 +443,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file, { struct erase_info *erase; - if(!(file->f_mode & 2)) + if(!(file->f_mode & FMODE_WRITE)) return -EPERM; erase=kzalloc(sizeof(struct erase_info),GFP_KERNEL); @@ -497,7 +497,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file, struct mtd_oob_buf __user *user_buf = argp; uint32_t retlen; - if(!(file->f_mode & 2)) + if(!(file->f_mode & FMODE_WRITE)) return -EPERM; if (copy_from_user(&buf, argp, sizeof(struct mtd_oob_buf))) diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 5ac207932fd..685d94e69d4 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -86,7 +86,7 @@ static int eisa_eeprom_open(struct inode *inode, struct file *file) { cycle_kernel_lock(); - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) return -EINVAL; return 0; diff --git a/fs/block_dev.c b/fs/block_dev.c index 218408eed1b..8897f3b02e9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(bd_release_from_disk); * to be used for internal purposes. If you ever need it - reconsider * your API. */ -struct block_device *open_by_devnum(dev_t dev, unsigned mode) +struct block_device *open_by_devnum(dev_t dev, fmode_t mode) { struct block_device *bdev = bdget(dev); int err = -ENOMEM; @@ -975,7 +975,7 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, +static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, int for_part); static int __blkdev_put(struct block_device *bdev, int for_part); @@ -1104,7 +1104,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } -static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, +static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, int for_part) { /* @@ -1123,7 +1123,7 @@ static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, return do_open(bdev, &fake_file, for_part); } -int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) +int blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags) { return __blkdev_get(bdev, mode, flags, 0); } @@ -1315,7 +1315,7 @@ EXPORT_SYMBOL(lookup_bdev); struct block_device *open_bdev_excl(const char *path, int flags, void *holder) { struct block_device *bdev; - mode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ; int error = 0; bdev = lookup_bdev(path); diff --git a/fs/fifo.c b/fs/fifo.c index 987bf941149..f8f97b8b6d4 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -51,7 +51,7 @@ static int fifo_open(struct inode *inode, struct file *filp) filp->f_mode &= (FMODE_READ | FMODE_WRITE); switch (filp->f_mode) { - case 1: + case FMODE_READ: /* * O_RDONLY * POSIX.1 says that O_NONBLOCK means return with the FIFO @@ -76,7 +76,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } break; - case 2: + case FMODE_WRITE: /* * O_WRONLY * POSIX.1 says that O_NONBLOCK means return -1 with @@ -98,7 +98,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } break; - case 3: + case FMODE_READ | FMODE_WRITE: /* * O_RDWR * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. diff --git a/fs/file_table.c b/fs/file_table.c index f45a4493f9e..efc06faede6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -161,7 +161,7 @@ EXPORT_SYMBOL(get_empty_filp); * code should be moved into this function. */ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, - mode_t mode, const struct file_operations *fop) + fmode_t mode, const struct file_operations *fop) { struct file *file; struct path; @@ -193,7 +193,7 @@ EXPORT_SYMBOL(alloc_file); * of this should be moving to alloc_file(). */ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, - mode_t mode, const struct file_operations *fop) + fmode_t mode, const struct file_operations *fop) { int error = 0; file->f_path.dentry = dentry; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index d6ecabf4d23..7f34f4385de 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -20,7 +20,7 @@ struct hostfs_inode_info { char *host_filename; int fd; - int mode; + fmode_t mode; struct inode vfs_inode; }; @@ -373,7 +373,8 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) int hostfs_file_open(struct inode *ino, struct file *file) { char *name; - int mode = 0, r = 0, w = 0, fd; + fmode_t mode = 0; + int r = 0, w = 0, fd; mode = file->f_mode & (FMODE_READ | FMODE_WRITE); if ((mode & HOSTFS_I(ino)->mode) == mode) diff --git a/fs/locks.c b/fs/locks.c index 5eb259e3cd3..20457486d6b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1580,7 +1580,8 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) cmd &= ~LOCK_NB; unlock = (cmd == LOCK_UN); - if (!unlock && !(cmd & LOCK_MAND) && !(filp->f_mode & 3)) + if (!unlock && !(cmd & LOCK_MAND) && + !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) goto out_putf; error = flock_make_lock(filp, &lock, cmd); diff --git a/fs/open.c b/fs/open.c index 5596049863b..83cdb9dee0c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -798,7 +798,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int error; f->f_flags = flags; - f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | + f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { diff --git a/fs/proc/base.c b/fs/proc/base.c index b5918ae8ca7..486cf3fe713 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1712,9 +1712,9 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, file = fcheck_files(files, fd); if (!file) goto out_unlock; - if (file->f_mode & 1) + if (file->f_mode & FMODE_READ) inode->i_mode |= S_IRUSR | S_IXUSR; - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) inode->i_mode |= S_IWUSR | S_IXUSR; spin_unlock(&files->file_lock); put_files_struct(files); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c21df71943a..b89d193a00d 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2593,7 +2593,7 @@ static int journal_init_dev(struct super_block *super, { int result; dev_t jdev; - int blkdev_mode = FMODE_READ | FMODE_WRITE; + fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; char b[BDEVNAME_SIZE]; result = 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b4fe68fe3a5..a4413ec3cb3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -910,7 +910,8 @@ static inline int sb_issue_discard(struct super_block *sb, * command filter functions */ extern int blk_verify_command(struct blk_cmd_filter *filter, - unsigned char *cmd, int has_write_perm); + unsigned char *cmd, fmode_t has_write_perm); +extern void blk_unregister_filter(struct gendisk *disk); extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define MAX_PHYS_SEGMENTS 128 diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 08d783592b7..3f8d4e76367 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -85,7 +85,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); struct dm_dev { struct block_device *bdev; - int mode; + fmode_t mode; char name[16]; }; @@ -95,7 +95,7 @@ struct dm_dev { * FIXME: too many arguments. */ int dm_get_device(struct dm_target *ti, const char *path, sector_t start, - sector_t len, int mode, struct dm_dev **result); + sector_t len, fmode_t mode, struct dm_dev **result); void dm_put_device(struct dm_target *ti, struct dm_dev *d); /* @@ -223,7 +223,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); /* * First create an empty table. */ -int dm_table_create(struct dm_table **result, int mode, +int dm_table_create(struct dm_table **result, fmode_t mode, unsigned num_targets, struct mapped_device *md); /* @@ -254,7 +254,7 @@ void dm_table_put(struct dm_table *t); */ sector_t dm_table_get_size(struct dm_table *t); unsigned int dm_table_get_num_targets(struct dm_table *t); -int dm_table_get_mode(struct dm_table *t); +fmode_t dm_table_get_mode(struct dm_table *t); struct mapped_device *dm_table_get_md(struct dm_table *t); /* diff --git a/include/linux/file.h b/include/linux/file.h index a20259e248a..335a0a5c316 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -19,10 +19,10 @@ struct file_operations; struct vfsmount; struct dentry; extern int init_file(struct file *, struct vfsmount *mnt, - struct dentry *dentry, mode_t mode, + struct dentry *dentry, fmode_t mode, const struct file_operations *fop); extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry, - mode_t mode, const struct file_operations *fop); + fmode_t mode, const struct file_operations *fop); static inline void fput_light(struct file *file, int fput_needed) { diff --git a/include/linux/fs.h b/include/linux/fs.h index a6a625be13f..60a7a581ba9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -63,18 +63,18 @@ extern int dir_notify_enable; #define MAY_ACCESS 16 #define MAY_OPEN 32 -#define FMODE_READ 1 -#define FMODE_WRITE 2 +#define FMODE_READ ((__force fmode_t)1) +#define FMODE_WRITE ((__force fmode_t)2) /* Internal kernel extensions */ -#define FMODE_LSEEK 4 -#define FMODE_PREAD 8 +#define FMODE_LSEEK ((__force fmode_t)4) +#define FMODE_PREAD ((__force fmode_t)8) #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ /* File is being opened for execution. Primary users of this flag are distributed filesystems that can use it to achieve correct ETXTBUSY behavior for cross-node execution/opening_for_writing of files */ -#define FMODE_EXEC 16 +#define FMODE_EXEC ((__force fmode_t)16) #define RW_MASK 1 #define RWA_MASK 2 @@ -825,7 +825,7 @@ struct file { const struct file_operations *f_op; atomic_long_t f_count; unsigned int f_flags; - mode_t f_mode; + fmode_t f_mode; loff_t f_pos; struct fown_struct f_owner; unsigned int f_uid, f_gid; @@ -1714,7 +1714,7 @@ extern struct block_device *bdget(dev_t); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); -extern struct block_device *open_by_devnum(dev_t, unsigned); +extern struct block_device *open_by_devnum(dev_t, fmode_t); #else static inline void bd_forget(struct inode *inode) {} #endif @@ -1729,7 +1729,7 @@ extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, struct gendisk *disk, unsigned cmd, unsigned long arg); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *, mode_t, unsigned); +extern int blkdev_get(struct block_device *, fmode_t, unsigned); extern int blkdev_put(struct block_device *); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a89513188ce..00fbd5b245c 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -188,7 +188,7 @@ static inline void fsnotify_close(struct file *file) struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; const char *name = dentry->d_name.name; - mode_t mode = file->f_mode; + fmode_t mode = file->f_mode; u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) diff --git a/include/linux/types.h b/include/linux/types.h index f24f7beb47d..1d98330b1f2 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -190,6 +190,7 @@ typedef __u32 __bitwise __wsum; #ifdef __KERNEL__ typedef unsigned __bitwise__ gfp_t; +typedef unsigned __bitwise__ fmode_t; #ifdef CONFIG_PHYS_ADDR_T_64BIT typedef u64 phys_addr_t; diff --git a/ipc/shm.c b/ipc/shm.c index 0add3fa5f54..867e5d6a55c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -817,7 +817,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) struct ipc_namespace *ns; struct shm_file_data *sfd; struct path path; - mode_t f_mode; + fmode_t f_mode; err = -EINVAL; if (shmid < 0) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 1af62b8b86c..e17836680f4 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2283,7 +2283,7 @@ static int snd_pcm_oss_open_file(struct file *file, int idx, err; struct snd_pcm_oss_file *pcm_oss_file; struct snd_pcm_substream *substream; - unsigned int f_mode = file->f_mode; + fmode_t f_mode = file->f_mode; if (rpcm_oss_file) *rpcm_oss_file = NULL; diff --git a/sound/oss/au1550_ac97.c b/sound/oss/au1550_ac97.c index 23018a7c063..81e1f443d09 100644 --- a/sound/oss/au1550_ac97.c +++ b/sound/oss/au1550_ac97.c @@ -93,7 +93,7 @@ static struct au1550_state { spinlock_t lock; struct mutex open_mutex; struct mutex sem; - mode_t open_mode; + fmode_t open_mode; wait_queue_head_t open_wait; struct dmabuf { diff --git a/sound/oss/dmasound/dmasound.h b/sound/oss/dmasound/dmasound.h index d978b009656..1cb13fe56ec 100644 --- a/sound/oss/dmasound/dmasound.h +++ b/sound/oss/dmasound/dmasound.h @@ -129,7 +129,7 @@ typedef struct { int (*mixer_ioctl)(u_int, u_long); /* optional */ int (*write_sq_setup)(void); /* optional */ int (*read_sq_setup)(void); /* optional */ - int (*sq_open)(mode_t); /* optional */ + int (*sq_open)(fmode_t); /* optional */ int (*state_info)(char *, size_t); /* optional */ void (*abort_read)(void); /* optional */ int min_dsp_speed; @@ -235,7 +235,7 @@ struct sound_queue { */ int active; wait_queue_head_t action_queue, open_queue, sync_queue; - int open_mode; + fmode_t open_mode; int busy, syncing, xruns, died; }; diff --git a/sound/oss/dmasound/dmasound_atari.c b/sound/oss/dmasound/dmasound_atari.c index 285239d64b8..4d45bd63718 100644 --- a/sound/oss/dmasound/dmasound_atari.c +++ b/sound/oss/dmasound/dmasound_atari.c @@ -143,7 +143,7 @@ static int AtaMixerIoctl(u_int cmd, u_long arg); static int TTMixerIoctl(u_int cmd, u_long arg); static int FalconMixerIoctl(u_int cmd, u_long arg); static int AtaWriteSqSetup(void); -static int AtaSqOpen(mode_t mode); +static int AtaSqOpen(fmode_t mode); static int TTStateInfo(char *buffer, size_t space); static int FalconStateInfo(char *buffer, size_t space); @@ -1461,7 +1461,7 @@ static int AtaWriteSqSetup(void) return 0 ; } -static int AtaSqOpen(mode_t mode) +static int AtaSqOpen(fmode_t mode) { write_sq_ignore_int = 1; return 0 ; diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 95fc5c68175..b8239f3168f 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -212,7 +212,7 @@ static int irq_installed; #endif /* MODULE */ /* control over who can modify resources shared between play/record */ -static mode_t shared_resource_owner; +static fmode_t shared_resource_owner; static int shared_resources_initialised; /* @@ -668,7 +668,7 @@ static inline void sq_init_waitqueue(struct sound_queue *sq) #if 0 /* blocking open() */ static inline void sq_wake_up(struct sound_queue *sq, struct file *file, - mode_t mode) + fmode_t mode) { if (file->f_mode & mode) { sq->busy = 0; /* CHECK: IS THIS OK??? */ @@ -677,7 +677,7 @@ static inline void sq_wake_up(struct sound_queue *sq, struct file *file, } #endif -static int sq_open2(struct sound_queue *sq, struct file *file, mode_t mode, +static int sq_open2(struct sound_queue *sq, struct file *file, fmode_t mode, int numbufs, int bufsize) { int rc = 0; @@ -891,10 +891,10 @@ static int sq_release(struct inode *inode, struct file *file) is the owner - if we have problems. */ -static int shared_resources_are_mine(mode_t md) +static int shared_resources_are_mine(fmode_t md) { if (shared_resource_owner) - return (shared_resource_owner & md ) ; + return (shared_resource_owner & md) != 0; else { shared_resource_owner = md ; return 1 ; diff --git a/sound/oss/msnd.h b/sound/oss/msnd.h index 61b3955481c..c8be47ec2b7 100644 --- a/sound/oss/msnd.h +++ b/sound/oss/msnd.h @@ -211,7 +211,7 @@ typedef struct multisound_dev { /* State variables */ enum { msndClassic, msndPinnacle } type; - mode_t mode; + fmode_t mode; unsigned long flags; #define F_RESETTING 0 #define F_HAVEDIGITAL 1 diff --git a/sound/oss/sound_config.h b/sound/oss/sound_config.h index 1a00a321061..55271fbe7f4 100644 --- a/sound/oss/sound_config.h +++ b/sound/oss/sound_config.h @@ -110,24 +110,16 @@ struct channel_info { #define OPEN_WRITE PCM_ENABLE_OUTPUT #define OPEN_READWRITE (OPEN_READ|OPEN_WRITE) -#if OPEN_READ == FMODE_READ && OPEN_WRITE == FMODE_WRITE - -static inline int translate_mode(struct file *file) -{ - return file->f_mode; -} - -#else - static inline int translate_mode(struct file *file) { - return ((file->f_mode & FMODE_READ) ? OPEN_READ : 0) | - ((file->f_mode & FMODE_WRITE) ? OPEN_WRITE : 0); + if (OPEN_READ == (__force int)FMODE_READ && + OPEN_WRITE == (__force int)FMODE_WRITE) + return (__force int)(file->f_mode & (FMODE_READ | FMODE_WRITE)); + else + return ((file->f_mode & FMODE_READ) ? OPEN_READ : 0) | + ((file->f_mode & FMODE_WRITE) ? OPEN_WRITE : 0); } -#endif - - #include "sound_calls.h" #include "dev_table.h" diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c index 044453a4ee5..41562ecde5b 100644 --- a/sound/oss/swarm_cs4297a.c +++ b/sound/oss/swarm_cs4297a.c @@ -295,7 +295,7 @@ struct cs4297a_state { struct mutex open_mutex; struct mutex open_sem_adc; struct mutex open_sem_dac; - mode_t open_mode; + fmode_t open_mode; wait_queue_head_t open_wait; wait_queue_head_t open_wait_adc; wait_queue_head_t open_wait_dac; diff --git a/sound/oss/vwsnd.c b/sound/oss/vwsnd.c index dcbb3f739e6..78b8acc7c3b 100644 --- a/sound/oss/vwsnd.c +++ b/sound/oss/vwsnd.c @@ -1509,7 +1509,7 @@ typedef struct vwsnd_dev { struct mutex open_mutex; struct mutex io_mutex; struct mutex mix_mutex; - mode_t open_mode; + fmode_t open_mode; wait_queue_head_t open_wait; lithium_t lith; -- cgit v1.2.3-70-g09d2 From 86d434dede14108dd917b25af0f29c0cb28b8d18 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2007 19:50:05 -0400 Subject: [PATCH] eliminate use of ->f_flags in block methods store needed information in f_mode Signed-off-by: Al Viro --- drivers/block/ataflop.c | 6 +++--- drivers/block/floppy.c | 15 ++++----------- drivers/block/swim3.c | 6 +++--- drivers/block/ub.c | 2 +- drivers/cdrom/cdrom.c | 4 ++-- drivers/ide/ide-gd.c | 2 +- drivers/scsi/sd.c | 2 +- fs/block_dev.c | 7 +++++++ include/linux/fs.h | 4 ++++ 9 files changed, 26 insertions(+), 22 deletions(-) (limited to 'include/linux/fs.h') diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index e1db285b72c..85d56a26f7c 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1813,17 +1813,17 @@ static int floppy_open( struct inode *inode, struct file *filp ) if (p->ref && p->type != type) return -EBUSY; - if (p->ref == -1 || (p->ref && filp->f_flags & O_EXCL)) + if (p->ref == -1 || (p->ref && filp->f_mode & FMODE_EXCL)) return -EBUSY; - if (filp->f_flags & O_EXCL) + if (filp->f_mode & FMODE_EXCL) p->ref = -1; else p->ref++; p->type = type; - if (filp->f_flags & O_NDELAY) + if (filp->f_mode & FMODE_NDELAY) return 0; if (filp->f_mode & (FMODE_READ|FMODE_WRITE)) { diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ae3ef8945f3..5d60c05a736 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3453,7 +3453,7 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long param) { -#define FD_IOCTL_ALLOWED ((filp) && (filp)->private_data) +#define FD_IOCTL_ALLOWED ((filp) && (filp)->f_mode & (FMODE_WRITE|FMODE_WRITE_IOCTL)) #define OUT(c,x) case c: outparam = (const char *) (x); break #define IN(c,x,tag) case c: *(x) = inparam. tag ; return 0 @@ -3690,7 +3690,6 @@ static int floppy_open(struct inode *inode, struct file *filp) int res = -EBUSY; char *tmp; - filp->private_data = (void *)0; mutex_lock(&open_lock); old_dev = UDRS->fd_device; if (opened_bdev[drive] && opened_bdev[drive] != inode->i_bdev) @@ -3701,10 +3700,10 @@ static int floppy_open(struct inode *inode, struct file *filp) USETF(FD_VERIFY); } - if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_flags & O_EXCL))) + if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_mode & FMODE_EXCL))) goto out2; - if (filp->f_flags & O_EXCL) + if (filp->f_mode & FMODE_EXCL) UDRS->fd_ref = -1; else UDRS->fd_ref++; @@ -3751,16 +3750,10 @@ static int floppy_open(struct inode *inode, struct file *filp) buffer_track = -1; } - /* Allow ioctls if we have write-permissions even if read-only open. - * Needed so that programs such as fdrawcmd still can work on write - * protected disks */ - if ((filp->f_mode & FMODE_WRITE) || !file_permission(filp, MAY_WRITE)) - filp->private_data = (void *)8; - if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - if (!(filp->f_flags & O_NDELAY)) { + if (!(filp->f_mode & FMODE_NDELAY)) { if (filp->f_mode & (FMODE_READ|FMODE_WRITE)) { UDRS->last_checked = 0; check_disk_change(inode->i_bdev); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index a53ca54bee1..5c45d5556ae 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -904,10 +904,10 @@ static int floppy_open(struct inode *inode, struct file *filp) swim3_action(fs, SETMFM); swim3_select(fs, RELAX); - } else if (fs->ref_count == -1 || filp->f_flags & O_EXCL) + } else if (fs->ref_count == -1 || filp->f_mode & FMODE_EXCL) return -EBUSY; - if (err == 0 && (filp->f_flags & O_NDELAY) == 0 + if (err == 0 && (filp->f_mode & FMODE_NDELAY) == 0 && (filp->f_mode & (FMODE_READ|FMODE_WRITE))) { check_disk_change(inode->i_bdev); if (fs->ejected) @@ -930,7 +930,7 @@ static int floppy_open(struct inode *inode, struct file *filp) return err; } - if (filp->f_flags & O_EXCL) + if (filp->f_mode & FMODE_EXCL) fs->ref_count = -1; else ++fs->ref_count; diff --git a/drivers/block/ub.c b/drivers/block/ub.c index f60e41833f6..85d41eb67c0 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1691,7 +1691,7 @@ static int ub_bd_open(struct inode *inode, struct file *filp) * under some pretty murky conditions (a failure of READ CAPACITY). * We may need it one day. */ - if (lun->removable && lun->changed && !(filp->f_flags & O_NDELAY)) { + if (lun->removable && lun->changed && !(filp->f_mode & FMODE_NDELAY)) { rc = -ENOMEDIUM; goto err_open; } diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index d47f2f80acc..4feefa622ae 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -982,7 +982,7 @@ int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp) /* if this was a O_NONBLOCK open and we should honor the flags, * do a quick open without drive/disc integrity checks. */ cdi->use_count++; - if ((fp->f_flags & O_NONBLOCK) && (cdi->options & CDO_USE_FFLAGS)) { + if ((fp->f_mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) { ret = cdi->ops->open(cdi, 1); } else { ret = open_for_data(cdi); @@ -1205,7 +1205,7 @@ int cdrom_release(struct cdrom_device_info *cdi, struct file *fp) } opened_for_data = !(cdi->options & CDO_USE_FFLAGS) || - !(fp && fp->f_flags & O_NONBLOCK); + !(fp && fp->f_mode & FMODE_NDELAY); /* * flush cache on last write release diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index d367473098f..66bbb0a22f5 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -197,7 +197,7 @@ static int ide_gd_open(struct inode *inode, struct file *filp) * unreadable disk, so that we can get the format capacity * of the drive or begin the format - Sam */ - if (ret && (filp->f_flags & O_NDELAY) == 0) { + if (ret && (filp->f_mode & FMODE_NDELAY) == 0) { ret = -EIO; goto out_put_idkp; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 7c4d2e68df1..202c1ed9abd 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -640,7 +640,7 @@ static int sd_open(struct inode *inode, struct file *filp) */ retval = -ENOMEDIUM; if (sdev->removable && !sdkp->media_present && - !(filp->f_flags & O_NDELAY)) + !(filp->f_mode & FMODE_NDELAY)) goto error_out; /* diff --git a/fs/block_dev.c b/fs/block_dev.c index 8897f3b02e9..b9022694e9f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1007,6 +1007,13 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } + if (file->f_flags & O_NDELAY) + file->f_mode |= FMODE_NDELAY; + if (file->f_flags & O_EXCL) + file->f_mode |= FMODE_EXCL; + if ((file->f_flags & O_ACCMODE) == 3) + file->f_mode |= FMODE_WRITE_IOCTL; + ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; diff --git a/include/linux/fs.h b/include/linux/fs.h index 60a7a581ba9..5ab5579a516 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -76,6 +76,10 @@ extern int dir_notify_enable; behavior for cross-node execution/opening_for_writing of files */ #define FMODE_EXEC ((__force fmode_t)16) +#define FMODE_NDELAY ((__force fmode_t)32) +#define FMODE_EXCL ((__force fmode_t)64) +#define FMODE_WRITE_IOCTL ((__force fmode_t)128) + #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 -- cgit v1.2.3-70-g09d2 From 08f85851215100d0eebf026810955ee6ad456c38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Oct 2007 13:26:20 -0400 Subject: [PATCH] move block_device_operations to blkdev.h Signed-off-by: Al Viro --- fs/ext2/xip.c | 1 + include/linux/blkdev.h | 17 +++++++++++++++++ include/linux/fs.h | 15 +-------------- 3 files changed, 19 insertions(+), 14 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index 4fb94c20041..b72b8588422 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ext2.h" #include "xip.h" diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 48f41b991ad..48ec8862a11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1057,6 +1057,23 @@ static inline int blk_integrity_rq(struct request *rq) #endif /* CONFIG_BLK_DEV_INTEGRITY */ +struct file; +struct inode; + +struct block_device_operations { + int (*open) (struct inode *, struct file *); + int (*release) (struct inode *, struct file *); + int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); + long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); + long (*compat_ioctl) (struct file *, unsigned, unsigned long); + int (*direct_access) (struct block_device *, sector_t, + void **, unsigned long *); + int (*media_changed) (struct gendisk *); + int (*revalidate_disk) (struct gendisk *); + int (*getgeo)(struct block_device *, struct hd_geometry *); + struct module *owner; +}; + #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out diff --git a/include/linux/fs.h b/include/linux/fs.h index 5ab5579a516..58bbf689fef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1270,20 +1270,7 @@ int generic_osync_inode(struct inode *, struct address_space *, int); * to have different dirent layouts depending on the binary type. */ typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); - -struct block_device_operations { - int (*open) (struct inode *, struct file *); - int (*release) (struct inode *, struct file *); - int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); - long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); - long (*compat_ioctl) (struct file *, unsigned, unsigned long); - int (*direct_access) (struct block_device *, sector_t, - void **, unsigned long *); - int (*media_changed) (struct gendisk *); - int (*revalidate_disk) (struct gendisk *); - int (*getgeo)(struct block_device *, struct hd_geometry *); - struct module *owner; -}; +struct block_device_operations; /* These macros are for out of kernel modules to test that * the kernel supports the unlocked_ioctl and compat_ioctl -- cgit v1.2.3-70-g09d2 From d4430d62fa77208824a37fe6f85ab2831d274769 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Mar 2008 09:09:22 -0500 Subject: [PATCH] beginning of methods conversion To keep the size of changesets sane we split the switch by drivers; to keep the damn thing bisectable we do the following: 1) rename the affected methods, add ones with correct prototypes, make (few) callers handle both. That's this changeset. 2) for each driver convert to new methods. *ALL* drivers are converted in this series. 3) kill the old (renamed) methods. Note that it _is_ a flagday; all in-tree drivers are converted and by the end of this series no trace of old methods remain. The only reason why we do that this way is to keep the damn thing bisectable and allow per-driver debugging if anything goes wrong. New methods: open(bdev, mode) release(disk, mode) ioctl(bdev, mode, cmd, arg) /* Called without BKL */ compat_ioctl(bdev, mode, cmd, arg) locked_ioctl(bdev, mode, cmd, arg) /* Called with BKL, legacy */ Signed-off-by: Al Viro --- arch/um/drivers/ubd_kern.c | 6 +++--- block/compat_ioctl.c | 17 +++++++++-------- block/ioctl.c | 35 ++++++++++++++++++++++++++--------- drivers/block/DAC960.c | 2 +- drivers/block/amiflop.c | 6 +++--- drivers/block/aoe/aoeblk.c | 4 ++-- drivers/block/ataflop.c | 6 +++--- drivers/block/brd.c | 2 +- drivers/block/cciss.c | 8 ++++---- drivers/block/cpqarray.c | 6 +++--- drivers/block/floppy.c | 6 +++--- drivers/block/loop.c | 8 ++++---- drivers/block/nbd.c | 2 +- drivers/block/paride/pcd.c | 6 +++--- drivers/block/paride/pd.c | 6 +++--- drivers/block/paride/pf.c | 6 +++--- drivers/block/pktcdvd.c | 6 +++--- drivers/block/swim3.c | 6 +++--- drivers/block/ub.c | 6 +++--- drivers/block/viodasd.c | 4 ++-- drivers/block/virtio_blk.c | 2 +- drivers/block/xd.c | 2 +- drivers/block/xen-blkfront.c | 4 ++-- drivers/block/xsysace.c | 4 ++-- drivers/block/z2ram.c | 4 ++-- drivers/cdrom/gdrom.c | 6 +++--- drivers/cdrom/viocd.c | 6 +++--- drivers/ide/ide-cd.c | 6 +++--- drivers/ide/ide-gd.c | 6 +++--- drivers/ide/ide-tape.c | 6 +++--- drivers/md/dm.c | 6 +++--- drivers/md/md.c | 6 +++--- drivers/memstick/core/mspro_block.c | 4 ++-- drivers/message/i2o/i2o_block.c | 6 +++--- drivers/mmc/card/block.c | 4 ++-- drivers/mtd/mtd_blkdevs.c | 6 +++--- drivers/s390/block/dasd.c | 8 ++++---- drivers/s390/block/dcssblk.c | 4 ++-- drivers/s390/char/tape_block.c | 6 +++--- drivers/scsi/ide-scsi.c | 6 +++--- drivers/scsi/sd.c | 8 ++++---- drivers/scsi/sr.c | 6 +++--- fs/block_dev.c | 18 +++++++++++++++--- include/linux/blkdev.h | 15 ++++++++++----- include/linux/fs.h | 1 + 45 files changed, 167 insertions(+), 131 deletions(-) (limited to 'include/linux/fs.h') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index b58fb8941d8..72569cc3cbb 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -108,9 +108,9 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); static struct block_device_operations ubd_blops = { .owner = THIS_MODULE, - .open = ubd_open, - .release = ubd_release, - .ioctl = ubd_ioctl, + .__open = ubd_open, + .__release = ubd_release, + .__ioctl = ubd_ioctl, .getgeo = ubd_getgeo, }; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 1e559fba7bd..576c4fd1546 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -708,17 +708,17 @@ static int compat_blkdev_driver_ioctl(struct inode *inode, struct file *file, return -ENOIOCTLCMD; } - if (disk->fops->unlocked_ioctl) - return disk->fops->unlocked_ioctl(file, cmd, arg); + if (disk->fops->__unlocked_ioctl) + return disk->fops->__unlocked_ioctl(file, cmd, arg); - if (disk->fops->ioctl) { + if (disk->fops->__ioctl) { lock_kernel(); - ret = disk->fops->ioctl(inode, file, cmd, arg); + ret = disk->fops->__ioctl(inode, file, cmd, arg); unlock_kernel(); return ret; } - return -ENOTTY; + return __blkdev_driver_ioctl(inode->i_bdev, file->f_mode, cmd, arg); } static int compat_blkdev_locked_ioctl(struct inode *inode, struct file *file, @@ -805,10 +805,11 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) lock_kernel(); ret = compat_blkdev_locked_ioctl(inode, file, bdev, cmd, arg); - /* FIXME: why do we assume -> compat_ioctl needs the BKL? */ - if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) - ret = disk->fops->compat_ioctl(file, cmd, arg); + if (ret == -ENOIOCTLCMD && disk->fops->__compat_ioctl) + ret = disk->fops->__compat_ioctl(file, cmd, arg); unlock_kernel(); + if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) + ret = disk->fops->compat_ioctl(bdev, file->f_mode, cmd, arg); if (ret != -ENOIOCTLCMD) return ret; diff --git a/block/ioctl.c b/block/ioctl.c index 9a26ace6d04..01ff463bc80 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -269,17 +269,24 @@ int blkdev_driver_ioctl(struct inode *inode, struct file *file, struct gendisk *disk, unsigned cmd, unsigned long arg) { int ret; - if (disk->fops->unlocked_ioctl) - return disk->fops->unlocked_ioctl(file, cmd, arg); + fmode_t mode = 0; + if (file) { + mode = file->f_mode; + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY_NOW; + } + + if (disk->fops->__unlocked_ioctl) + return disk->fops->__unlocked_ioctl(file, cmd, arg); - if (disk->fops->ioctl) { + if (disk->fops->__ioctl) { lock_kernel(); - ret = disk->fops->ioctl(inode, file, cmd, arg); + ret = disk->fops->__ioctl(inode, file, cmd, arg); unlock_kernel(); return ret; } - return -ENOTTY; + return __blkdev_driver_ioctl(inode->i_bdev, mode, cmd, arg); } EXPORT_SYMBOL_GPL(blkdev_driver_ioctl); @@ -295,12 +302,22 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, fake_file.f_path.dentry = &fake_dentry; fake_dentry.d_inode = bdev->bd_inode; - if (disk->fops->unlocked_ioctl) - return disk->fops->unlocked_ioctl(&fake_file, cmd, arg); + if (disk->fops->__unlocked_ioctl) + return disk->fops->__unlocked_ioctl(&fake_file, cmd, arg); + + if (disk->fops->__ioctl) { + lock_kernel(); + ret = disk->fops->__ioctl(bdev->bd_inode, &fake_file, cmd, arg); + unlock_kernel(); + return ret; + } + + if (disk->fops->ioctl) + return disk->fops->ioctl(bdev, mode, cmd, arg); - if (disk->fops->ioctl) { + if (disk->fops->locked_ioctl) { lock_kernel(); - ret = disk->fops->ioctl(bdev->bd_inode, &fake_file, cmd, arg); + ret = disk->fops->locked_ioctl(bdev, mode, cmd, arg); unlock_kernel(); return ret; } diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index a002a381df9..4b90ebfa667 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -153,7 +153,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk) static struct block_device_operations DAC960_BlockDeviceOperations = { .owner = THIS_MODULE, - .open = DAC960_open, + .__open = DAC960_open, .getgeo = DAC960_getgeo, .media_changed = DAC960_media_changed, .revalidate_disk = DAC960_revalidate_disk, diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index d19c5a939fe..d5da4e3cb2a 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1648,9 +1648,9 @@ static int amiga_floppy_change(struct gendisk *disk) static struct block_device_operations floppy_fops = { .owner = THIS_MODULE, - .open = floppy_open, - .release = floppy_release, - .ioctl = fd_ioctl, + .__open = floppy_open, + .__release = floppy_release, + .__ioctl = fd_ioctl, .getgeo = fd_getgeo, .media_changed = amiga_floppy_change, }; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index d876ad86123..d4d9796d5dd 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -239,8 +239,8 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) } static struct block_device_operations aoe_bdops = { - .open = aoeblk_open, - .release = aoeblk_release, + .__open = aoeblk_open, + .__release = aoeblk_release, .getgeo = aoeblk_getgeo, .owner = THIS_MODULE, }; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 85d56a26f7c..30166774327 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1857,9 +1857,9 @@ static int floppy_release( struct inode * inode, struct file * filp ) static struct block_device_operations floppy_fops = { .owner = THIS_MODULE, - .open = floppy_open, - .release = floppy_release, - .ioctl = fd_ioctl, + .__open = floppy_open, + .__release = floppy_release, + .__ioctl = fd_ioctl, .media_changed = check_floppy_change, .revalidate_disk= floppy_revalidate, }; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index d070d492e38..2ea99f94766 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -376,7 +376,7 @@ static int brd_ioctl(struct inode *inode, struct file *file, static struct block_device_operations brd_fops = { .owner = THIS_MODULE, - .ioctl = brd_ioctl, + .__ioctl = brd_ioctl, #ifdef CONFIG_BLK_DEV_XIP .direct_access = brd_direct_access, #endif diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d9b1c15b811..781b745181d 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -197,12 +197,12 @@ static long cciss_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg); static struct block_device_operations cciss_fops = { .owner = THIS_MODULE, - .open = cciss_open, - .release = cciss_release, - .ioctl = cciss_ioctl, + .__open = cciss_open, + .__release = cciss_release, + .__ioctl = cciss_ioctl, .getgeo = cciss_getgeo, #ifdef CONFIG_COMPAT - .compat_ioctl = cciss_compat_ioctl, + .__compat_ioctl = cciss_compat_ioctl, #endif .revalidate_disk = cciss_revalidate, }; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 3d967525e9a..b71334b968b 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -195,9 +195,9 @@ static inline ctlr_info_t *get_host(struct gendisk *disk) static struct block_device_operations ida_fops = { .owner = THIS_MODULE, - .open = ida_open, - .release = ida_release, - .ioctl = ida_ioctl, + .__open = ida_open, + .__release = ida_release, + .__ioctl = ida_ioctl, .getgeo = ida_getgeo, .revalidate_disk= ida_revalidate, }; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5d60c05a736..72363df5895 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3902,9 +3902,9 @@ static int floppy_revalidate(struct gendisk *disk) static struct block_device_operations floppy_fops = { .owner = THIS_MODULE, - .open = floppy_open, - .release = floppy_release, - .ioctl = fd_ioctl, + .__open = floppy_open, + .__release = floppy_release, + .__ioctl = fd_ioctl, .getgeo = fd_getgeo, .media_changed = check_floppy_change, .revalidate_disk = floppy_revalidate, diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d3a25b027ff..6faca2b7ae3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1355,11 +1355,11 @@ static int lo_release(struct inode *inode, struct file *file) static struct block_device_operations lo_fops = { .owner = THIS_MODULE, - .open = lo_open, - .release = lo_release, - .ioctl = lo_ioctl, + .__open = lo_open, + .__release = lo_release, + .__ioctl = lo_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = lo_compat_ioctl, + .__compat_ioctl = lo_compat_ioctl, #endif }; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9034ca585af..36015e0945b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -691,7 +691,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, static struct block_device_operations nbd_fops = { .owner = THIS_MODULE, - .ioctl = nbd_ioctl, + .__ioctl = nbd_ioctl, }; /* diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 8bd557e2a65..6e6dcc1d432 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -252,9 +252,9 @@ static int pcd_block_media_changed(struct gendisk *disk) static struct block_device_operations pcd_bdops = { .owner = THIS_MODULE, - .open = pcd_block_open, - .release = pcd_block_release, - .ioctl = pcd_block_ioctl, + .__open = pcd_block_open, + .__release = pcd_block_release, + .__ioctl = pcd_block_ioctl, .media_changed = pcd_block_media_changed, }; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 5fdfa7c888c..b3023844947 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -807,9 +807,9 @@ static int pd_revalidate(struct gendisk *p) static struct block_device_operations pd_fops = { .owner = THIS_MODULE, - .open = pd_open, - .release = pd_release, - .ioctl = pd_ioctl, + .__open = pd_open, + .__release = pd_release, + .__ioctl = pd_ioctl, .getgeo = pd_getgeo, .media_changed = pd_check_media, .revalidate_disk= pd_revalidate diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index a902d84fd33..e08ca5161ad 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -264,9 +264,9 @@ static char *pf_buf; /* buffer for request in progress */ static struct block_device_operations pf_fops = { .owner = THIS_MODULE, - .open = pf_open, - .release = pf_release, - .ioctl = pf_ioctl, + .__open = pf_open, + .__release = pf_release, + .__ioctl = pf_ioctl, .getgeo = pf_getgeo, .media_changed = pf_check_media, }; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index a0ba4023953..33ac8ddf491 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2847,9 +2847,9 @@ static int pkt_media_changed(struct gendisk *disk) static struct block_device_operations pktcdvd_ops = { .owner = THIS_MODULE, - .open = pkt_open, - .release = pkt_close, - .ioctl = pkt_ioctl, + .__open = pkt_open, + .__release = pkt_close, + .__ioctl = pkt_ioctl, .media_changed = pkt_media_changed, }; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 5c45d5556ae..9398af86a7a 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -998,9 +998,9 @@ static int floppy_revalidate(struct gendisk *disk) } static struct block_device_operations floppy_fops = { - .open = floppy_open, - .release = floppy_release, - .ioctl = floppy_ioctl, + .__open = floppy_open, + .__release = floppy_release, + .__ioctl = floppy_ioctl, .media_changed = floppy_check_change, .revalidate_disk= floppy_revalidate, }; diff --git a/drivers/block/ub.c b/drivers/block/ub.c index bc04330f368..5261773407d 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1791,9 +1791,9 @@ static int ub_bd_media_changed(struct gendisk *disk) static struct block_device_operations ub_bd_fops = { .owner = THIS_MODULE, - .open = ub_bd_open, - .release = ub_bd_release, - .ioctl = ub_bd_ioctl, + .__open = ub_bd_open, + .__release = ub_bd_release, + .__ioctl = ub_bd_ioctl, .media_changed = ub_bd_media_changed, .revalidate_disk = ub_bd_revalidate, }; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 1730d29e604..7f7beec29eb 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -221,8 +221,8 @@ static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) */ static struct block_device_operations viodasd_fops = { .owner = THIS_MODULE, - .open = viodasd_open, - .release = viodasd_release, + .__open = viodasd_open, + .__release = viodasd_release, .getgeo = viodasd_getgeo, }; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7643cd16fd6..10f157ea7b0 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -180,7 +180,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) } static struct block_device_operations virtblk_fops = { - .ioctl = virtblk_ioctl, + .__ioctl = virtblk_ioctl, .owner = THIS_MODULE, .getgeo = virtblk_getgeo, }; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 624d30f7da3..316fa1da4b9 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -132,7 +132,7 @@ static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); static struct block_device_operations xd_fops = { .owner = THIS_MODULE, - .ioctl = xd_ioctl, + .__ioctl = xd_ioctl, .getgeo = xd_getgeo, }; static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 1a50ae70f71..7efac80c8dd 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1041,8 +1041,8 @@ static int blkif_release(struct inode *inode, struct file *filep) static struct block_device_operations xlvbd_block_fops = { .owner = THIS_MODULE, - .open = blkif_open, - .release = blkif_release, + .__open = blkif_open, + .__release = blkif_release, .getgeo = blkif_getgeo, .ioctl = blkif_ioctl, }; diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 4a7a059ebaf..e4efe5b7ec2 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -919,8 +919,8 @@ static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo) static struct block_device_operations ace_fops = { .owner = THIS_MODULE, - .open = ace_open, - .release = ace_release, + .__open = ace_open, + .__release = ace_release, .media_changed = ace_media_changed, .revalidate_disk = ace_revalidate_disk, .getgeo = ace_getgeo, diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index be20a67f1fa..4860d0f3687 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -314,8 +314,8 @@ z2_release( struct inode *inode, struct file *filp ) static struct block_device_operations z2_fops = { .owner = THIS_MODULE, - .open = z2_open, - .release = z2_release, + .__open = z2_open, + .__release = z2_release, }; static struct kobject *z2_find(dev_t dev, int *part, void *data) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 0959edf2afd..ab0c637f58b 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -514,10 +514,10 @@ static int gdrom_bdops_ioctl(struct inode *inode, struct file *file, static struct block_device_operations gdrom_bdops = { .owner = THIS_MODULE, - .open = gdrom_bdops_open, - .release = gdrom_bdops_release, + .__open = gdrom_bdops_open, + .__release = gdrom_bdops_release, .media_changed = gdrom_bdops_mediachanged, - .ioctl = gdrom_bdops_ioctl, + .__ioctl = gdrom_bdops_ioctl, }; static irqreturn_t gdrom_command_interrupt(int irq, void *dev_id) diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index abc4079c3f4..57c2dced3e9 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -180,9 +180,9 @@ static int viocd_blk_media_changed(struct gendisk *disk) struct block_device_operations viocd_fops = { .owner = THIS_MODULE, - .open = viocd_blk_open, - .release = viocd_blk_release, - .ioctl = viocd_blk_ioctl, + .__open = viocd_blk_open, + .__release = viocd_blk_release, + .__ioctl = viocd_blk_ioctl, .media_changed = viocd_blk_media_changed, }; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 87d90200b16..3533984355a 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -2200,9 +2200,9 @@ static int idecd_revalidate_disk(struct gendisk *disk) static struct block_device_operations idecd_ops = { .owner = THIS_MODULE, - .open = idecd_open, - .release = idecd_release, - .ioctl = idecd_ioctl, + .__open = idecd_open, + .__release = idecd_release, + .__ioctl = idecd_ioctl, .media_changed = idecd_media_changed, .revalidate_disk = idecd_revalidate_disk }; diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 948af08abe2..d118bbed7cd 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -298,9 +298,9 @@ static int ide_gd_ioctl(struct inode *inode, struct file *file, static struct block_device_operations ide_gd_ops = { .owner = THIS_MODULE, - .open = ide_gd_open, - .release = ide_gd_release, - .ioctl = ide_gd_ioctl, + .__open = ide_gd_open, + .__release = ide_gd_release, + .__ioctl = ide_gd_ioctl, .getgeo = ide_gd_getgeo, .media_changed = ide_gd_media_changed, .revalidate_disk = ide_gd_revalidate_disk diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2b263281ffe..c5df53c4838 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -2376,9 +2376,9 @@ static int idetape_ioctl(struct inode *inode, struct file *file, static struct block_device_operations idetape_block_ops = { .owner = THIS_MODULE, - .open = idetape_open, - .release = idetape_release, - .ioctl = idetape_ioctl, + .__open = idetape_open, + .__release = idetape_release, + .__ioctl = idetape_ioctl, }; static int ide_tape_probe(ide_drive_t *drive) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5f0f4c8bcd3..8b4c92b1b6d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1698,9 +1698,9 @@ int dm_noflush_suspending(struct dm_target *ti) EXPORT_SYMBOL_GPL(dm_noflush_suspending); static struct block_device_operations dm_blk_dops = { - .open = dm_blk_open, - .release = dm_blk_close, - .ioctl = dm_blk_ioctl, + .__open = dm_blk_open, + .__release = dm_blk_close, + .__ioctl = dm_blk_ioctl, .getgeo = dm_blk_getgeo, .owner = THIS_MODULE }; diff --git a/drivers/md/md.c b/drivers/md/md.c index aaa3d465de4..21b04d39ba3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5046,9 +5046,9 @@ static int md_revalidate(struct gendisk *disk) static struct block_device_operations md_fops = { .owner = THIS_MODULE, - .open = md_open, - .release = md_release, - .ioctl = md_ioctl, + .__open = md_open, + .__release = md_release, + .__ioctl = md_ioctl, .getgeo = md_getgeo, .media_changed = md_media_changed, .revalidate_disk= md_revalidate, diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 5263913e0c6..fbe5919789d 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -237,8 +237,8 @@ static int mspro_block_bd_getgeo(struct block_device *bdev, } static struct block_device_operations ms_block_bdops = { - .open = mspro_block_bd_open, - .release = mspro_block_bd_release, + .__open = mspro_block_bd_open, + .__release = mspro_block_bd_release, .getgeo = mspro_block_bd_getgeo, .owner = THIS_MODULE }; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 81483de8c0f..71500dda8eb 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -931,9 +931,9 @@ static void i2o_block_request_fn(struct request_queue *q) /* I2O Block device operations definition */ static struct block_device_operations i2o_block_fops = { .owner = THIS_MODULE, - .open = i2o_block_open, - .release = i2o_block_release, - .ioctl = i2o_block_ioctl, + .__open = i2o_block_open, + .__release = i2o_block_release, + .__ioctl = i2o_block_ioctl, .getgeo = i2o_block_getgeo, .media_changed = i2o_block_media_changed }; diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 24c97d3d16b..8cba06f5e11 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -130,8 +130,8 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) } static struct block_device_operations mmc_bdops = { - .open = mmc_blk_open, - .release = mmc_blk_release, + .__open = mmc_blk_open, + .__release = mmc_blk_release, .getgeo = mmc_blk_getgeo, .owner = THIS_MODULE, }; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 681d5aca2af..b00d07c5375 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -213,9 +213,9 @@ static int blktrans_ioctl(struct inode *inode, struct file *file, static struct block_device_operations mtd_blktrans_ops = { .owner = THIS_MODULE, - .open = blktrans_open, - .release = blktrans_release, - .ioctl = blktrans_ioctl, + .__open = blktrans_open, + .__release = blktrans_release, + .__ioctl = blktrans_ioctl, .getgeo = blktrans_getgeo, }; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 0a225ccda02..6bf68e5fe89 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2087,10 +2087,10 @@ static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) struct block_device_operations dasd_device_operations = { .owner = THIS_MODULE, - .open = dasd_open, - .release = dasd_release, - .ioctl = dasd_ioctl, - .compat_ioctl = dasd_compat_ioctl, + .__open = dasd_open, + .__release = dasd_release, + .__ioctl = dasd_ioctl, + .__compat_ioctl = dasd_compat_ioctl, .getgeo = dasd_getgeo, }; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index a7ff167d5b8..413460cc3dd 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -42,8 +42,8 @@ static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; static int dcssblk_major; static struct block_device_operations dcssblk_devops = { .owner = THIS_MODULE, - .open = dcssblk_open, - .release = dcssblk_release, + .__open = dcssblk_open, + .__release = dcssblk_release, .direct_access = dcssblk_direct_access, }; diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index a25b8bf54f4..f1a741c9a6f 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -52,9 +52,9 @@ static int tapeblock_revalidate_disk(struct gendisk *); static struct block_device_operations tapeblock_fops = { .owner = THIS_MODULE, - .open = tapeblock_open, - .release = tapeblock_release, - .ioctl = tapeblock_ioctl, + .__open = tapeblock_open, + .__release = tapeblock_release, + .__ioctl = tapeblock_ioctl, .media_changed = tapeblock_medium_changed, .revalidate_disk = tapeblock_revalidate_disk, }; diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 5bcc04e82c2..9069afbad9d 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -483,9 +483,9 @@ static int idescsi_ide_ioctl(struct inode *inode, struct file *file, static struct block_device_operations idescsi_ops = { .owner = THIS_MODULE, - .open = idescsi_ide_open, - .release = idescsi_ide_release, - .ioctl = idescsi_ide_ioctl, + .__open = idescsi_ide_open, + .__release = idescsi_ide_release, + .__ioctl = idescsi_ide_ioctl, }; static int idescsi_slave_configure(struct scsi_device * sdp) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5a18528a69d..c8b95e8d285 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -962,12 +962,12 @@ static long sd_compat_ioctl(struct file *file, unsigned int cmd, unsigned long a static struct block_device_operations sd_fops = { .owner = THIS_MODULE, - .open = sd_open, - .release = sd_release, - .ioctl = sd_ioctl, + .__open = sd_open, + .__release = sd_release, + .__ioctl = sd_ioctl, .getgeo = sd_getgeo, #ifdef CONFIG_COMPAT - .compat_ioctl = sd_compat_ioctl, + .__compat_ioctl = sd_compat_ioctl, #endif .media_changed = sd_media_changed, .revalidate_disk = sd_revalidate_disk, diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 2fb8d4d2d6f..9446cbf4de8 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -540,9 +540,9 @@ static int sr_block_media_changed(struct gendisk *disk) static struct block_device_operations sr_bdops = { .owner = THIS_MODULE, - .open = sr_block_open, - .release = sr_block_release, - .ioctl = sr_block_ioctl, + .__open = sr_block_open, + .__release = sr_block_release, + .__ioctl = sr_block_ioctl, .media_changed = sr_block_media_changed, /* * No compat_ioctl for now because sr_block_ioctl never diff --git a/fs/block_dev.c b/fs/block_dev.c index b9022694e9f..73b6ce47c86 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1033,8 +1033,13 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + if (disk->fops->__open) { + ret = disk->fops->__open(bdev->bd_inode, file); + if (ret) + goto out_first; + } if (disk->fops->open) { - ret = disk->fops->open(bdev->bd_inode, file); + ret = disk->fops->open(bdev, file->f_mode); if (ret) goto out_clear; } @@ -1074,8 +1079,13 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { + if (bdev->bd_disk->fops->__open) { + ret = bdev->bd_disk->fops->__open(bdev->bd_inode, file); + if (ret) + goto out; + } if (bdev->bd_disk->fops->open) { - ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); + ret = bdev->bd_disk->fops->open(bdev, file->f_mode); if (ret) goto out_unlock_bdev; } @@ -1184,8 +1194,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part) kill_bdev(bdev); } if (bdev->bd_contains == bdev) { + if (disk->fops->__release) + ret = disk->fops->__release(bd_inode, NULL); if (disk->fops->release) - ret = disk->fops->release(bd_inode, NULL); + ret = disk->fops->release(disk, 0); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2bad616b994..b573186ff1a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1061,11 +1061,16 @@ struct file; struct inode; struct block_device_operations { - int (*open) (struct inode *, struct file *); - int (*release) (struct inode *, struct file *); - int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); - long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); - long (*compat_ioctl) (struct file *, unsigned, unsigned long); + int (*__open) (struct inode *, struct file *); + int (*__release) (struct inode *, struct file *); + int (*__ioctl) (struct inode *, struct file *, unsigned, unsigned long); + long (*__unlocked_ioctl) (struct file *, unsigned, unsigned long); + long (*__compat_ioctl) (struct file *, unsigned, unsigned long); + int (*open) (struct block_device *, fmode_t); + int (*release) (struct gendisk *, fmode_t); + int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); + int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); + int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); int (*media_changed) (struct gendisk *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 58bbf689fef..b5894604ba5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,6 +79,7 @@ extern int dir_notify_enable; #define FMODE_NDELAY ((__force fmode_t)32) #define FMODE_EXCL ((__force fmode_t)64) #define FMODE_WRITE_IOCTL ((__force fmode_t)128) +#define FMODE_NDELAY_NOW ((__force fmode_t)256) #define RW_MASK 1 #define RWA_MASK 2 -- cgit v1.2.3-70-g09d2 From 9a1c3542768b5a58e45a9216921cd10a3bae1205 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Feb 2008 20:40:24 -0500 Subject: [PATCH] pass fmode_t to blkdev_put() Signed-off-by: Al Viro --- drivers/block/pktcdvd.c | 8 ++++---- drivers/char/raw.c | 4 ++-- drivers/md/dm-table.c | 4 ++-- drivers/md/md.c | 4 ++-- drivers/s390/block/dasd_genhd.c | 2 +- fs/block_dev.c | 22 +++++++++++----------- fs/ext3/super.c | 4 ++-- fs/ext4/super.c | 4 ++-- fs/jfs/jfs_logmgr.c | 4 ++-- fs/ocfs2/cluster/heartbeat.c | 4 ++-- fs/partitions/check.c | 2 +- fs/reiserfs/journal.c | 4 ++-- include/linux/fs.h | 2 +- kernel/power/swap.c | 8 ++++---- 14 files changed, 38 insertions(+), 38 deletions(-) (limited to 'include/linux/fs.h') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 90548da9c1c..ce8c7190192 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2381,7 +2381,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) out_unclaim: bd_release(pd->bdev); out_putdev: - blkdev_put(pd->bdev); + blkdev_put(pd->bdev, FMODE_READ); out: return ret; } @@ -2399,7 +2399,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); bd_release(pd->bdev); - blkdev_put(pd->bdev); + blkdev_put(pd->bdev, FMODE_READ); pkt_shrink_pktlist(pd); } @@ -2790,7 +2790,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) return 0; out_mem: - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return ret; @@ -2975,7 +2975,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_debugfs_dev_remove(pd); pkt_sysfs_dev_remove(pd); - blkdev_put(pd->bdev); + blkdev_put(pd->bdev, FMODE_READ|FMODE_WRITE); remove_proc_entry(pd->name, pkt_proc); DPRINTK(DRIVER_NAME": writer %s unmapped\n", pd->name); diff --git a/drivers/char/raw.c b/drivers/char/raw.c index e139372d0e6..bfd59e6bf54 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -87,7 +87,7 @@ static int raw_open(struct inode *inode, struct file *filp) out2: bd_release(bdev); out1: - blkdev_put(bdev); + blkdev_put(bdev, filp->f_mode); out: mutex_unlock(&raw_mutex); return err; @@ -112,7 +112,7 @@ static int raw_release(struct inode *inode, struct file *filp) mutex_unlock(&raw_mutex); bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, filp->f_mode); return 0; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7c8671b06fe..dd8bd2e867c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -357,7 +357,7 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev, return PTR_ERR(bdev); r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); if (r) - blkdev_put(bdev); + blkdev_put(bdev, d->dm_dev.mode); else d->dm_dev.bdev = bdev; return r; @@ -372,7 +372,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) return; bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); - blkdev_put(d->dm_dev.bdev); + blkdev_put(d->dm_dev.bdev, d->dm_dev.mode); d->dm_dev.bdev = NULL; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 06ea991c7a4..c1a837ca193 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1520,7 +1520,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) if (err) { printk(KERN_ERR "md: could not bd_claim %s.\n", bdevname(bdev, b)); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return err; } if (!shared) @@ -1536,7 +1536,7 @@ static void unlock_rdev(mdk_rdev_t *rdev) if (!bdev) MD_BUG(); bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } void md_autodetect_dev(dev_t dev); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index aee6565aaf9..3c1b6915c9a 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -152,7 +152,7 @@ void dasd_destroy_partitions(struct dasd_block *block) invalidate_partition(block->gdp, 0); /* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */ - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ); set_capacity(block->gdp, 0); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 55124ac8c7a..05131baf3cf 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -977,7 +977,7 @@ EXPORT_SYMBOL(bd_set_size); static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, int for_part); -static int __blkdev_put(struct block_device *bdev, int for_part); +static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* * bd_mutex locking: @@ -1095,7 +1095,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, 1); + __blkdev_put(bdev->bd_contains, file->f_mode, 1); bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); @@ -1163,11 +1163,11 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (!(res = bd_claim(bdev, filp))) return 0; - blkdev_put(bdev); + blkdev_put(bdev, filp->f_mode); return res; } -static int __blkdev_put(struct block_device *bdev, int for_part) +static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) { int ret = 0; struct gendisk *disk = bdev->bd_disk; @@ -1184,7 +1184,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) } if (bdev->bd_contains == bdev) { if (disk->fops->release) - ret = disk->fops->release(disk, 0); + ret = disk->fops->release(disk, mode); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; @@ -1203,13 +1203,13 @@ static int __blkdev_put(struct block_device *bdev, int for_part) mutex_unlock(&bdev->bd_mutex); bdput(bdev); if (victim) - __blkdev_put(victim, 1); + __blkdev_put(victim, mode, 1); return ret; } -int blkdev_put(struct block_device *bdev) +int blkdev_put(struct block_device *bdev, fmode_t mode) { - return __blkdev_put(bdev, 0); + return __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); @@ -1218,7 +1218,7 @@ static int blkdev_close(struct inode * inode, struct file * filp) struct block_device *bdev = I_BDEV(filp->f_mapping->host); if (bdev->bd_holder == filp) bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, filp->f_mode); } static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) @@ -1343,7 +1343,7 @@ struct block_device *open_bdev_excl(const char *path, int flags, void *holder) return bdev; blkdev_put: - blkdev_put(bdev); + blkdev_put(bdev, mode); return ERR_PTR(error); } @@ -1359,7 +1359,7 @@ EXPORT_SYMBOL(open_bdev_excl); void close_bdev_excl(struct block_device *bdev) { bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, 0); /* move up in the next patches */ } EXPORT_SYMBOL(close_bdev_excl); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3a260af5544..15c38e69b69 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -347,7 +347,7 @@ fail: static int ext3_blkdev_put(struct block_device *bdev) { bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } static int ext3_blkdev_remove(struct ext3_sb_info *sbi) @@ -2066,7 +2066,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR "EXT3: failed to claim external journal device.\n"); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9b2b2bc4ec1..c12cf7a657a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -399,7 +399,7 @@ fail: static int ext4_blkdev_put(struct block_device *bdev) { bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } static int ext4_blkdev_remove(struct ext4_sb_info *sbi) @@ -2553,7 +2553,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR "EXT4: failed to claim external journal device.\n"); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index cd2ec2988b5..335c4de6552 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1168,7 +1168,7 @@ journal_found: bd_release(bdev); close: /* close external log device */ - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1514,7 +1514,7 @@ int lmLogClose(struct super_block *sb) rc = lmLogShutdown(log); bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); kfree(log); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 7dce1612553..4b6fdf591ee 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -976,7 +976,7 @@ static void o2hb_region_release(struct config_item *item) } if (reg->hr_bdev) - blkdev_put(reg->hr_bdev); + blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); if (reg->hr_slots) kfree(reg->hr_slots); @@ -1358,7 +1358,7 @@ out: iput(inode); if (ret < 0) { if (reg->hr_bdev) { - blkdev_put(reg->hr_bdev); + blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); reg->hr_bdev = NULL; } } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index cfb0c80690a..5a35ff2e1a9 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -488,7 +488,7 @@ void register_disk(struct gendisk *disk) err = blkdev_get(bdev, FMODE_READ, 0); if (err < 0) goto exit; - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ); exit: /* announce disk after possible partitions are created */ diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index b89d193a00d..3261518478f 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2575,7 +2575,7 @@ static int release_journal_dev(struct super_block *super, if (journal->j_dev_bd != NULL) { if (journal->j_dev_bd->bd_dev != super->s_dev) bd_release(journal->j_dev_bd); - result = blkdev_put(journal->j_dev_bd); + result = blkdev_put(journal->j_dev_bd, 0); /* move up */ journal->j_dev_bd = NULL; } @@ -2618,7 +2618,7 @@ static int journal_init_dev(struct super_block *super, } else if (jdev != super->s_dev) { result = bd_claim(journal->j_dev_bd, journal); if (result) { - blkdev_put(journal->j_dev_bd); + blkdev_put(journal->j_dev_bd, blkdev_mode); return result; } diff --git a/include/linux/fs.h b/include/linux/fs.h index b5894604ba5..04c8dc41f45 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,7 +1722,7 @@ extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, unsigned long arg); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, fmode_t, unsigned); -extern int blkdev_put(struct block_device *); +extern int blkdev_put(struct block_device *, fmode_t); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); #ifdef CONFIG_SYSFS diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 80ccac849e4..7b9d611c110 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -178,7 +178,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */ res = set_blocksize(resume_bdev, PAGE_SIZE); if (res < 0) - blkdev_put(resume_bdev); + blkdev_put(resume_bdev, FMODE_WRITE); return res; } @@ -574,7 +574,7 @@ int swsusp_read(unsigned int *flags_p) error = load_image(&handle, &snapshot, header->pages - 1); release_swap_reader(&handle); - blkdev_put(resume_bdev); + blkdev_put(resume_bdev, FMODE_READ); if (!error) pr_debug("PM: Image successfully loaded\n"); @@ -609,7 +609,7 @@ int swsusp_check(void) return -EINVAL; } if (error) - blkdev_put(resume_bdev); + blkdev_put(resume_bdev, FMODE_READ); else pr_debug("PM: Signature found, resuming\n"); } else { @@ -633,7 +633,7 @@ void swsusp_close(void) return; } - blkdev_put(resume_bdev); + blkdev_put(resume_bdev, 0); /* move up */ } static int swsusp_header_init(void) -- cgit v1.2.3-70-g09d2 From 30c40d2c01f68c7eb1a41ab3552bdaf5dbf300d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Feb 2008 19:50:45 -0500 Subject: [PATCH] propagate mode through open_bdev_excl/close_bdev_excl replace open_bdev_excl/close_bdev_excl with variants taking fmode_t. superblock gets the value used to mount it stored in sb->s_mode Signed-off-by: Al Viro --- drivers/mtd/devices/block2mtd.c | 4 ++-- fs/block_dev.c | 24 +++++++++++------------- fs/reiserfs/journal.c | 3 ++- fs/super.c | 14 ++++++++++---- fs/xfs/linux-2.6/xfs_super.c | 4 ++-- include/linux/fs.h | 6 ++++-- 6 files changed, 31 insertions(+), 24 deletions(-) (limited to 'include/linux/fs.h') diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 91fbba76763..8c295f40d2a 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -224,7 +224,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->blkdev) { invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 0, -1); - close_bdev_excl(dev->blkdev); + close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE); } kfree(dev); @@ -246,7 +246,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) return NULL; /* Get a handle on the device */ - bdev = open_bdev_excl(devname, O_RDWR, NULL); + bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, NULL); #ifndef MODULE if (IS_ERR(bdev)) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 05131baf3cf..4b595904cef 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1309,32 +1309,29 @@ fail: EXPORT_SYMBOL(lookup_bdev); /** - * open_bdev_excl - open a block device by name and set it up for use + * open_bdev_exclusive - open a block device by name and set it up for use * * @path: special file representing the block device - * @flags: %MS_RDONLY for opening read-only + * @mode: FMODE_... combination to pass be used * @holder: owner for exclusion * * Open the blockdevice described by the special file at @path, claim it * for the @holder. */ -struct block_device *open_bdev_excl(const char *path, int flags, void *holder) +struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; - fmode_t mode = FMODE_READ; int error = 0; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; - if (!(flags & MS_RDONLY)) - mode |= FMODE_WRITE; error = blkdev_get(bdev, mode, 0); if (error) return ERR_PTR(error); error = -EACCES; - if (!(flags & MS_RDONLY) && bdev_read_only(bdev)) + if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) goto blkdev_put; error = bd_claim(bdev, holder); if (error) @@ -1347,22 +1344,23 @@ blkdev_put: return ERR_PTR(error); } -EXPORT_SYMBOL(open_bdev_excl); +EXPORT_SYMBOL(open_bdev_exclusive); /** - * close_bdev_excl - release a blockdevice openen by open_bdev_excl() + * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() * * @bdev: blockdevice to close + * @mode: mode, must match that used to open. * - * This is the counterpart to open_bdev_excl(). + * This is the counterpart to open_bdev_exclusive(). */ -void close_bdev_excl(struct block_device *bdev) +void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) { bd_release(bdev); - blkdev_put(bdev, 0); /* move up in the next patches */ + blkdev_put(bdev, mode); } -EXPORT_SYMBOL(close_bdev_excl); +EXPORT_SYMBOL(close_bdev_exclusive); int __invalidate_device(struct block_device *bdev) { diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 3261518478f..70b89607667 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2628,7 +2628,8 @@ static int journal_init_dev(struct super_block *super, return 0; } - journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); + journal->j_dev_bd = open_bdev_exclusive(jdev_name, + FMODE_READ|FMODE_WRITE, journal); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; diff --git a/fs/super.c b/fs/super.c index e931ae9511f..0d77ac20d03 100644 --- a/fs/super.c +++ b/fs/super.c @@ -760,9 +760,13 @@ int get_sb_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; + fmode_t mode = FMODE_READ; int error = 0; - bdev = open_bdev_excl(dev_name, flags, fs_type); + if (!(flags & MS_RDONLY)) + mode |= FMODE_WRITE; + + bdev = open_bdev_exclusive(dev_name, mode, fs_type); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -785,11 +789,12 @@ int get_sb_bdev(struct file_system_type *fs_type, goto error_bdev; } - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); } else { char b[BDEVNAME_SIZE]; s->s_flags = flags; + s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); @@ -807,7 +812,7 @@ int get_sb_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); error: return error; } @@ -817,10 +822,11 @@ EXPORT_SYMBOL(get_sb_bdev); void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; + fmode_t mode = sb->s_mode; generic_shutdown_super(sb); sync_blockdev(bdev); - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); } EXPORT_SYMBOL(kill_block_super); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e39013619b2..37ebe36056e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -589,7 +589,7 @@ xfs_blkdev_get( { int error = 0; - *bdevp = open_bdev_excl(name, 0, mp); + *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); printk("XFS: Invalid device [%s], error=%d\n", name, error); @@ -603,7 +603,7 @@ xfs_blkdev_put( struct block_device *bdev) { if (bdev) - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 04c8dc41f45..c6766314dc5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1157,6 +1157,7 @@ struct super_block { char s_id[32]; /* Informational name */ void *s_fs_info; /* Filesystem private info */ + fmode_t s_mode; /* * The next field is for VFS *only*. No filesystems have any business @@ -1753,9 +1754,10 @@ extern void chrdev_show(struct seq_file *,off_t); extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); -extern struct block_device *open_bdev_excl(const char *, int, void *); -extern void close_bdev_excl(struct block_device *); +extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); +extern void close_bdev_exclusive(struct block_device *, fmode_t); extern void blkdev_show(struct seq_file *,off_t); + #else #define BLKDEV_MAJOR_HASH_SIZE 0 #endif -- cgit v1.2.3-70-g09d2 From 572c48921574dbe6dceb958cf965aa962baefde4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Oct 2007 13:24:05 -0400 Subject: [PATCH] sanitize blkdev_get() and friends * get rid of fake struct file/struct dentry in __blkdev_get() * merge __blkdev_get() and do_open() * get rid of flags argument of blkdev_get() Signed-off-by: Al Viro --- drivers/block/pktcdvd.c | 4 +-- drivers/char/raw.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- fs/block_dev.c | 65 ++++++++++++++--------------------------- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/partitions/check.c | 2 +- include/linux/fs.h | 2 +- kernel/power/swap.c | 2 +- 8 files changed, 30 insertions(+), 51 deletions(-) (limited to 'include/linux/fs.h') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index ce8c7190192..f20bf359b84 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2332,7 +2332,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * so bdget() can't fail. */ bdget(pd->bdev->bd_dev); - if ((ret = blkdev_get(pd->bdev, FMODE_READ, O_RDONLY))) + if ((ret = blkdev_get(pd->bdev, FMODE_READ))) goto out; if ((ret = bd_claim(pd->bdev, pd))) @@ -2765,7 +2765,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) bdev = bdget(dev); if (!bdev) return -ENOMEM; - ret = blkdev_get(bdev, FMODE_READ, O_RDONLY | O_NONBLOCK); + ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); if (ret) return ret; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index bfd59e6bf54..f3cf5eb9b7f 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -65,7 +65,7 @@ static int raw_open(struct inode *inode, struct file *filp) if (!bdev) goto out; igrab(bdev->bd_inode); - err = blkdev_get(bdev, filp->f_mode, 0); + err = blkdev_get(bdev, filp->f_mode); if (err) goto out; err = bd_claim(bdev, raw_open); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 3c1b6915c9a..e99d566b69c 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -99,7 +99,7 @@ int dasd_scan_partitions(struct dasd_block *block) struct block_device *bdev; bdev = bdget_disk(block->gdp, 0); - if (!bdev || blkdev_get(bdev, FMODE_READ, 1) < 0) + if (!bdev || blkdev_get(bdev, FMODE_READ) < 0) return -ENODEV; /* * See fs/partition/check.c:register_disk,rescan_partitions diff --git a/fs/block_dev.c b/fs/block_dev.c index 4b595904cef..b89c956e04f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -844,9 +844,8 @@ struct block_device *open_by_devnum(dev_t dev, fmode_t mode) { struct block_device *bdev = bdget(dev); int err = -ENOMEM; - int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; if (bdev) - err = blkdev_get(bdev, mode, flags); + err = blkdev_get(bdev, mode); return err ? ERR_PTR(err) : bdev; } @@ -975,8 +974,6 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, - int for_part); static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* @@ -986,7 +983,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); * mutex_lock_nested(whole->bd_mutex, 1) */ -static int do_open(struct block_device *bdev, struct file *file, int for_part) +static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; struct hd_struct *part = NULL; @@ -994,9 +991,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) int partno; int perm = 0; - if (file->f_mode & FMODE_READ) + if (mode & FMODE_READ) perm |= MAY_READ; - if (file->f_mode & FMODE_WRITE) + if (mode & FMODE_WRITE) perm |= MAY_WRITE; /* * hooks: /n/, see "layering violations". @@ -1007,15 +1004,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } - if (file->f_flags & O_NDELAY) - file->f_mode |= FMODE_NDELAY; - if (file->f_flags & O_EXCL) - file->f_mode |= FMODE_EXCL; - if ((file->f_flags & O_ACCMODE) == 3) - file->f_mode |= FMODE_WRITE_IOCTL; - ret = -ENXIO; - file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); @@ -1034,7 +1023,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (!partno) { struct backing_dev_info *bdi; if (disk->fops->open) { - ret = disk->fops->open(bdev, file->f_mode); + ret = disk->fops->open(bdev, mode); if (ret) goto out_clear; } @@ -1054,7 +1043,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (!whole) goto out_clear; BUG_ON(for_part); - ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); + ret = __blkdev_get(whole, mode, 1); if (ret) goto out_clear; bdev->bd_contains = whole; @@ -1075,7 +1064,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { - ret = bdev->bd_disk->fops->open(bdev, file->f_mode); + ret = bdev->bd_disk->fops->open(bdev, mode); if (ret) goto out_unlock_bdev; } @@ -1095,7 +1084,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, file->f_mode, 1); + __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); @@ -1111,28 +1100,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } -static int __blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags, - int for_part) -{ - /* - * This crockload is due to bad choice of ->open() type. - * It will go away. - * For now, block device ->open() routine must _not_ - * examine anything in 'inode' argument except ->i_rdev. - */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_path.dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, for_part); -} - -int blkdev_get(struct block_device *bdev, fmode_t mode, unsigned flags) +int blkdev_get(struct block_device *bdev, fmode_t mode) { - return __blkdev_get(bdev, mode, flags, 0); + return __blkdev_get(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_get); @@ -1149,15 +1119,24 @@ static int blkdev_open(struct inode * inode, struct file * filp) */ filp->f_flags |= O_LARGEFILE; + if (filp->f_flags & O_NDELAY) + filp->f_mode |= FMODE_NDELAY; + if (filp->f_flags & O_EXCL) + filp->f_mode |= FMODE_EXCL; + if ((filp->f_flags & O_ACCMODE) == 3) + filp->f_mode |= FMODE_WRITE_IOCTL; + bdev = bd_acquire(inode); if (bdev == NULL) return -ENOMEM; - res = do_open(bdev, filp, 0); + filp->f_mapping = bdev->bd_inode->i_mapping; + + res = blkdev_get(bdev, filp->f_mode); if (res) return res; - if (!(filp->f_flags & O_EXCL) ) + if (!(filp->f_mode & FMODE_EXCL)) return 0; if (!(res = bd_claim(bdev, filp))) @@ -1327,7 +1306,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h if (IS_ERR(bdev)) return bdev; - error = blkdev_get(bdev, mode, 0); + error = blkdev_get(bdev, mode); if (error) return ERR_PTR(error); error = -EACCES; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 4b6fdf591ee..6ebaa58e2c0 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1268,7 +1268,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, goto out; reg->hr_bdev = I_BDEV(filp->f_mapping->host); - ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, 0); + ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); if (ret) { reg->hr_bdev = NULL; goto out; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 5a35ff2e1a9..633f7a0ebb2 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -485,7 +485,7 @@ void register_disk(struct gendisk *disk) goto exit; bdev->bd_invalidated = 1; - err = blkdev_get(bdev, FMODE_READ, 0); + err = blkdev_get(bdev, FMODE_READ); if (err < 0) goto exit; blkdev_put(bdev, FMODE_READ); diff --git a/include/linux/fs.h b/include/linux/fs.h index c6766314dc5..cb78e389699 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1722,7 +1722,7 @@ extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, struct gendisk *disk, unsigned cmd, unsigned long arg); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *, fmode_t, unsigned); +extern int blkdev_get(struct block_device *, fmode_t); extern int blkdev_put(struct block_device *, fmode_t); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 178b001a4f1..b7713b53d07 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -172,7 +172,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */ return res; root_swap = res; - res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR); + res = blkdev_get(resume_bdev, FMODE_WRITE); if (res) return res; -- cgit v1.2.3-70-g09d2 From e436fdae70a31102d2be32969b80fe8545edebd9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Sep 2008 03:38:12 -0400 Subject: [PATCH] get rid of blkdev_driver_ioctl() convert remaining callers to __blkdev_driver_ioctl() Signed-off-by: Al Viro --- block/ioctl.c | 29 ++++++++++------------------- include/linux/fs.h | 3 --- 2 files changed, 10 insertions(+), 22 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/ioctl.c b/block/ioctl.c index 0db89f95b15..b4e0abed1b4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -265,21 +265,6 @@ static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev, return -ENOIOCTLCMD; } -int blkdev_driver_ioctl(struct inode *inode, struct file *file, - struct gendisk *disk, unsigned cmd, unsigned long arg) -{ - int ret; - fmode_t mode = 0; - if (file) { - mode = file->f_mode; - if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; - } - - return __blkdev_driver_ioctl(inode->i_bdev, mode, cmd, arg); -} -EXPORT_SYMBOL_GPL(blkdev_driver_ioctl); - int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { @@ -315,13 +300,19 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, struct block_device *bdev = inode->i_bdev; struct gendisk *disk = bdev->bd_disk; int ret, n; + fmode_t mode = 0; + if (file) { + mode = file->f_mode; + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY_NOW; + } switch(cmd) { case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg); + ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); /* -EINVAL to handle old uncorrected drivers */ if (ret != -EINVAL && ret != -ENOTTY) return ret; @@ -333,7 +324,7 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, return 0; case BLKROSET: - ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg); + ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); /* -EINVAL to handle old uncorrected drivers */ if (ret != -EINVAL && ret != -ENOTTY) return ret; @@ -349,7 +340,7 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, case BLKDISCARD: { uint64_t range[2]; - if (!(file->f_mode & FMODE_WRITE)) + if (!(mode & FMODE_WRITE)) return -EBADF; if (copy_from_user(range, (void __user *)arg, sizeof(range))) @@ -387,6 +378,6 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, if (ret != -ENOIOCTLCMD) return ret; - return blkdev_driver_ioctl(inode, file, disk, cmd, arg); + ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); } EXPORT_SYMBOL_GPL(blkdev_ioctl); diff --git a/include/linux/fs.h b/include/linux/fs.h index cb78e389699..11de682c65a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1718,9 +1718,6 @@ extern const struct file_operations def_fifo_fops; #ifdef CONFIG_BLOCK extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); -extern int blkdev_driver_ioctl(struct inode *inode, struct file *file, - struct gendisk *disk, unsigned cmd, - unsigned long arg); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, fmode_t); extern int blkdev_put(struct block_device *, fmode_t); -- cgit v1.2.3-70-g09d2 From 56b26add02b4bdea81d5e0ebda60db1fe3311ad4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 19 Sep 2008 03:17:36 -0400 Subject: [PATCH] kill the rest of struct file propagation in block ioctls Now we can switch blkdev_ioctl() block_device/mode Signed-off-by: Al Viro --- block/compat_ioctl.c | 10 +++++----- block/ioctl.c | 9 +-------- drivers/char/raw.c | 2 +- fs/block_dev.c | 8 ++++++-- include/linux/fs.h | 2 +- 5 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 5b3db0640d8..3098c92402f 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -177,7 +177,7 @@ struct compat_blkpg_ioctl_arg { compat_caddr_t data; }; -static int compat_blkpg_ioctl(struct inode *inode, struct file *file, +static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, struct compat_blkpg_ioctl_arg __user *ua32) { struct blkpg_ioctl_arg __user *a = compat_alloc_user_space(sizeof(*a)); @@ -196,7 +196,7 @@ static int compat_blkpg_ioctl(struct inode *inode, struct file *file, if (err) return err; - return blkdev_ioctl(inode, file, cmd, (unsigned long)a); + return blkdev_ioctl(bdev, mode, cmd, (unsigned long)a); } #define BLKBSZGET_32 _IOR(0x12, 112, int) @@ -715,13 +715,13 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) * but we call blkdev_ioctl, which gets the lock for us */ case BLKRRPART: - return blkdev_ioctl(inode, file, cmd, + return blkdev_ioctl(bdev, mode, cmd, (unsigned long)compat_ptr(arg)); case BLKBSZSET_32: - return blkdev_ioctl(inode, file, BLKBSZSET, + return blkdev_ioctl(bdev, mode, BLKBSZSET, (unsigned long)compat_ptr(arg)); case BLKPG: - return compat_blkpg_ioctl(inode, file, cmd, compat_ptr(arg)); + return compat_blkpg_ioctl(bdev, mode, cmd, compat_ptr(arg)); case BLKRAGET: case BLKFRAGET: if (!arg) diff --git a/block/ioctl.c b/block/ioctl.c index 14b7f2c1066..c832d639b6e 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -230,20 +230,13 @@ EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); * always keep this in sync with compat_blkdev_ioctl() and * compat_blkdev_locked_ioctl() */ -int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, +int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { - struct block_device *bdev = inode->i_bdev; struct gendisk *disk = bdev->bd_disk; struct backing_dev_info *bdi; loff_t size; int ret, n; - fmode_t mode = 0; - if (file) { - mode = file->f_mode; - if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; - } switch(cmd) { case BLKFLSBUF: diff --git a/drivers/char/raw.c b/drivers/char/raw.c index f3cf5eb9b7f..96adf28a17e 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -125,7 +125,7 @@ raw_ioctl(struct inode *inode, struct file *filp, { struct block_device *bdev = filp->private_data; - return blkdev_ioctl(bdev->bd_inode, NULL, command, arg); + return blkdev_ioctl(bdev, 0, command, arg); } static void bind_device(struct raw_config_request *rq) diff --git a/fs/block_dev.c b/fs/block_dev.c index b89c956e04f..05865b93f7e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1202,7 +1202,11 @@ static int blkdev_close(struct inode * inode, struct file * filp) static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { - return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); + struct block_device *bdev = I_BDEV(file->f_mapping->host); + fmode_t mode = file->f_mode; + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY_NOW; + return blkdev_ioctl(bdev, mode, cmd, arg); } static const struct address_space_operations def_blk_aops = { @@ -1238,7 +1242,7 @@ int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) int res; mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg); + res = blkdev_ioctl(bdev, 0, cmd, arg); set_fs(old_fs); return res; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 11de682c65a..ff536e106b4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1717,7 +1717,7 @@ extern const struct file_operations bad_sock_fops; extern const struct file_operations def_fifo_fops; #ifdef CONFIG_BLOCK extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); -extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); +extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, fmode_t); extern int blkdev_put(struct block_device *, fmode_t); -- cgit v1.2.3-70-g09d2 From 6de24f0ed08054b2a202902e4d63beff27654db8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 28 Aug 2008 06:25:49 +0400 Subject: [PATCH 1/2] anondev: init IDR statically Signed-off-by: Alexey Dobriyan --- fs/super.c | 7 +------ include/linux/fs.h | 1 - init/main.c | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/super.c b/fs/super.c index e931ae9511f..dd23bf927fb 100644 --- a/fs/super.c +++ b/fs/super.c @@ -682,7 +682,7 @@ void emergency_remount(void) * filesystems which don't use real block-devices. -- jrs */ -static struct idr unnamed_dev_idr; +static DEFINE_IDR(unnamed_dev_idr); static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ int set_anon_super(struct super_block *s, void *data) @@ -726,11 +726,6 @@ void kill_anon_super(struct super_block *sb) EXPORT_SYMBOL(kill_anon_super); -void __init unnamed_dev_init(void) -{ - idr_init(&unnamed_dev_idr); -} - void kill_litter_super(struct super_block *sb) { if (sb->s_root) diff --git a/include/linux/fs.h b/include/linux/fs.h index a6a625be13f..5f70aa62cf0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1593,7 +1593,6 @@ extern int get_sb_pseudo(struct file_system_type *, char *, struct vfsmount *mnt); extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); int __put_super_and_need_restart(struct super_block *sb); -void unnamed_dev_init(void); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ diff --git a/init/main.c b/init/main.c index 3e17a3bafe6..c6a1024a27a 100644 --- a/init/main.c +++ b/init/main.c @@ -670,7 +670,6 @@ asmlinkage void __init start_kernel(void) fork_init(num_physpages); proc_caches_init(); buffer_init(); - unnamed_dev_init(); key_init(); security_init(); vfs_caches_init(num_physpages); -- cgit v1.2.3-70-g09d2 From f696a3659fc4b3a3bf4bc83d9dbec5e5a2ffd929 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 31 Jul 2008 13:41:58 +0200 Subject: [PATCH] move executable checking into ->permission() For execute permission on a regular files we need to check if file has any execute bits at all, regardless of capabilites. This check is normally performed by generic_permission() but was also added to the case when the filesystem defines its own ->permission() method. In the latter case the filesystem should be responsible for performing this check. Move the check from inode_permission() inside filesystems which are not calling generic_permission(). Create a helper function execute_ok() that returns true if the inode is a directory or if any execute bits are present in i_mode. Also fix up the following code: - coda control file is never executable - sysctl files are never executable - hfs_permission seems broken on MAY_EXEC, remove - hfsplus_permission is eqivalent to generic_permission(), remove Signed-off-by: Miklos Szeredi --- fs/cifs/cifsfs.c | 9 ++++++--- fs/coda/dir.c | 3 +++ fs/coda/pioctl.c | 2 +- fs/hfs/inode.c | 8 -------- fs/hfsplus/inode.c | 13 ------------- fs/namei.c | 21 ++++----------------- fs/nfs/dir.c | 3 +++ fs/proc/proc_sysctl.c | 10 ++++++++-- include/linux/fs.h | 5 +++++ 9 files changed, 30 insertions(+), 44 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 89c64a8dcb9..84cc011a16e 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -275,9 +275,12 @@ static int cifs_permission(struct inode *inode, int mask) cifs_sb = CIFS_SB(inode->i_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) - return 0; - else /* file mode might have been restricted at mount time + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { + if ((mask & MAY_EXEC) && !execute_ok(inode)) + return -EACCES; + else + return 0; + } else /* file mode might have been restricted at mount time on the client (above and beyond ACL on servers) for servers which do not support setting and viewing mode bits, so allowing client to check permissions is useful */ diff --git a/fs/coda/dir.c b/fs/coda/dir.c index c5916228243..75b1fa90b2c 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -146,6 +146,9 @@ int coda_permission(struct inode *inode, int mask) if (!mask) return 0; + if ((mask & MAY_EXEC) && !execute_ok(inode)) + return -EACCES; + lock_kernel(); if (coda_cache_check(inode, mask)) diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index c51365422aa..773f2ce9aa0 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -43,7 +43,7 @@ const struct file_operations coda_ioctl_operations = { /* the coda pioctl inode ops */ static int coda_ioctl_permission(struct inode *inode, int mask) { - return 0; + return (mask & MAY_EXEC) ? -EACCES : 0; } static int coda_pioctl(struct inode * inode, struct file * filp, diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 7e19835efa2..c69b7ac75bf 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -511,13 +511,6 @@ void hfs_clear_inode(struct inode *inode) } } -static int hfs_permission(struct inode *inode, int mask) -{ - if (S_ISREG(inode->i_mode) && mask & MAY_EXEC) - return 0; - return generic_permission(inode, mask, NULL); -} - static int hfs_file_open(struct inode *inode, struct file *file) { if (HFS_IS_RSRC(inode)) @@ -616,7 +609,6 @@ static const struct inode_operations hfs_file_inode_operations = { .lookup = hfs_file_lookup, .truncate = hfs_file_truncate, .setattr = hfs_inode_setattr, - .permission = hfs_permission, .setxattr = hfs_setxattr, .getxattr = hfs_getxattr, .listxattr = hfs_listxattr, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 963be644297..b207f0e6fc2 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -238,18 +238,6 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); } -static int hfsplus_permission(struct inode *inode, int mask) -{ - /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup, - * open_exec has the same test, so it's still not executable, if a x bit - * is set fall back to standard permission check. - */ - if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111)) - return 0; - return generic_permission(inode, mask, NULL); -} - - static int hfsplus_file_open(struct inode *inode, struct file *file) { if (HFSPLUS_IS_RSRC(inode)) @@ -281,7 +269,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, - .permission = hfsplus_permission, .setxattr = hfsplus_setxattr, .getxattr = hfsplus_getxattr, .listxattr = hfsplus_listxattr, diff --git a/fs/namei.c b/fs/namei.c index 9e2a534383d..09ce58e49e7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -212,8 +212,7 @@ int generic_permission(struct inode *inode, int mask, * Read/write DACs are always overridable. * Executable DACs are overridable if at least one exec bit is set. */ - if (!(mask & MAY_EXEC) || - (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) + if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; @@ -249,23 +248,11 @@ int inode_permission(struct inode *inode, int mask) } /* Ordinary permission routines do not understand MAY_APPEND. */ - if (inode->i_op && inode->i_op->permission) { + if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, mask); - if (!retval) { - /* - * Exec permission on a regular file is denied if none - * of the execute bits are set. - * - * This check should be done by the ->permission() - * method. - */ - if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) && - !(inode->i_mode & S_IXUGO)) - return -EACCES; - } - } else { + else retval = generic_permission(inode, mask, NULL); - } + if (retval) return retval; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c216c8786c5..3e64b98f3a9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1957,6 +1957,9 @@ force_lookup: } else res = PTR_ERR(cred); out: + if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) + res = -EACCES; + dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", inode->i_sb->s_id, inode->i_ino, mask, res); return res; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5fe210c0917..7b997754a25 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -298,13 +298,19 @@ static int proc_sys_permission(struct inode *inode, int mask) * sysctl entries that are not writeable, * are _NOT_ writeable, capabilities or not. */ - struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_table_header *head; + struct ctl_table *table; int error; + /* Executable files are not allowed under /proc/sys/ */ + if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) + return -EACCES; + + head = grab_header(inode); if (IS_ERR(head)) return PTR_ERR(head); + table = PROC_I(inode)->sysctl_entry; if (!table) /* global root - r-xr-xr-x */ error = mask & MAY_WRITE ? -EACCES : 0; else /* Use the permissions on the sysctl table entry */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5f70aa62cf0..025a4a251b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1851,6 +1851,11 @@ extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int, int (*check_acl)(struct inode *, int)); +static inline bool execute_ok(struct inode *inode) +{ + return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); +} + extern int get_write_access(struct inode *); extern int deny_write_access(struct file *); static inline void put_write_access(struct inode * inode) -- cgit v1.2.3-70-g09d2 From 08b9fe6b12d32324f311c46b88102b6b9067d434 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 13 Oct 2008 00:09:50 -0400 Subject: [PATCH] i_version: remount support Add support for remounting a filesystem with the i_version option. Signed-off-by: Mimi Zohar --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 025a4a251b6..7d719c1a18e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -136,7 +136,7 @@ extern int dir_notify_enable; /* * Superblock flags that can be altered by MS_REMOUNT */ -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK) +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) /* * Old magic mount flag and mask -- cgit v1.2.3-70-g09d2 From d8ba7a363393f803c93c8cffabd6d0362618bc2a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 4 Oct 2008 22:34:18 +0400 Subject: proc: move rest of /proc/locks to fs/locks.c Signed-off-by: Alexey Dobriyan --- fs/locks.c | 22 +++++++++++++++++++++- fs/proc/proc_misc.c | 17 ----------------- include/linux/fs.h | 1 - 3 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include/linux/fs.h') diff --git a/fs/locks.c b/fs/locks.c index 5eb259e3cd3..90e87f57b33 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2078,6 +2078,7 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) EXPORT_SYMBOL_GPL(vfs_cancel_lock); #ifdef CONFIG_PROC_FS +#include #include static void lock_get_status(struct seq_file *f, struct file_lock *fl, @@ -2183,12 +2184,31 @@ static void locks_stop(struct seq_file *f, void *v) unlock_kernel(); } -struct seq_operations locks_seq_operations = { +static const struct seq_operations locks_seq_operations = { .start = locks_start, .next = locks_next, .stop = locks_stop, .show = locks_show, }; + +static int locks_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &locks_seq_operations); +} + +static const struct file_operations proc_locks_operations = { + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proc_locks_init(void) +{ + proc_create("locks", 0, NULL, &proc_locks_operations); + return 0; +} +module_init(proc_locks_init); #endif /** diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index fcac25edaef..fea7d658fff 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -453,20 +453,6 @@ static const struct file_operations proc_interrupts_operations = { .release = seq_release, }; -#ifdef CONFIG_FILE_LOCKING -static int locks_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &locks_seq_operations); -} - -static const struct file_operations proc_locks_operations = { - .open = locks_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif /* CONFIG_FILE_LOCKING */ - #ifdef CONFIG_PROC_PAGE_MONITOR #define KPMSIZE sizeof(u64) #define KPMMASK (KPMSIZE - 1) @@ -605,9 +591,6 @@ void __init proc_misc_init(void) proc_symlink("mounts", NULL, "self/mounts"); /* And now for trickier ones */ -#ifdef CONFIG_FILE_LOCKING - proc_create("locks", 0, NULL, &proc_locks_operations); -#endif proc_create("devices", 0, NULL, &proc_devinfo_operations); proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK diff --git a/include/linux/fs.h b/include/linux/fs.h index a6a625be13f..024049543ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1037,7 +1037,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); -extern struct seq_operations locks_seq_operations; #else /* !CONFIG_FILE_LOCKING */ #define fcntl_getlk(a, b) ({ -EINVAL; }) #define fcntl_setlk(a, b, c, d) ({ -EACCES; }) -- cgit v1.2.3-70-g09d2 From 4e02ed4b4a2fae34aae766a5bb93ae235f60adb8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 29 Oct 2008 14:00:55 -0700 Subject: fs: remove prepare_write/commit_write Nothing uses prepare_write or commit_write. Remove them from the tree completely. [akpm@linux-foundation.org: schedule simple_prepare_write() for unexporting] Signed-off-by: Nick Piggin Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 12 +- Documentation/filesystems/vfs.txt | 39 +----- drivers/block/loop.c | 5 +- fs/fat/inode.c | 2 +- fs/libfs.c | 2 +- fs/ocfs2/file.c | 3 +- fs/splice.c | 4 +- include/linux/fs.h | 7 -- mm/filemap.c | 242 +------------------------------------- 9 files changed, 23 insertions(+), 293 deletions(-) (limited to 'include/linux/fs.h') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8362860e21a..23d2f4460de 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -161,8 +161,12 @@ prototypes: int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + int (*write_begin)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + int (*write_end)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); @@ -180,8 +184,6 @@ sync_page: no maybe writepages: no set_page_dirty no no readpages: no -prepare_write: no yes yes -commit_write: no yes yes write_begin: no locks the page yes write_end: no yes, unlocks yes perform_write: no n/a yes @@ -191,7 +193,7 @@ releasepage: no yes direct_IO: no launder_page: no yes - ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage() + ->write_begin(), ->write_end(), ->sync_page() and ->readpage() may be called from the request handler (/dev/loop). ->readpage() unlocks the page, either synchronously or via I/O diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c4d348dabe9..5579bda58a6 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -492,7 +492,7 @@ written-back to storage typically in whole pages, however the address_space has finer control of write sizes. The read process essentially only requires 'readpage'. The write -process is more complicated and uses prepare_write/commit_write or +process is more complicated and uses write_begin/write_end or set_page_dirty to write data into the address_space, and writepage, sync_page, and writepages to writeback data to storage. @@ -521,8 +521,6 @@ struct address_space_operations { int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); @@ -598,37 +596,7 @@ struct address_space_operations { readpages is only used for read-ahead, so read errors are ignored. If anything goes wrong, feel free to give up. - prepare_write: called by the generic write path in VM to set up a write - request for a page. This indicates to the address space that - the given range of bytes is about to be written. The - address_space should check that the write will be able to - complete, by allocating space if necessary and doing any other - internal housekeeping. If the write will update parts of - any basic-blocks on storage, then those blocks should be - pre-read (if they haven't been read already) so that the - updated blocks can be written out properly. - The page will be locked. - - Note: the page _must not_ be marked uptodate in this function - (or anywhere else) unless it actually is uptodate right now. As - soon as a page is marked uptodate, it is possible for a concurrent - read(2) to copy it to userspace. - - commit_write: If prepare_write succeeds, new data will be copied - into the page and then commit_write will be called. It will - typically update the size of the file (if appropriate) and - mark the inode as dirty, and do any other related housekeeping - operations. It should avoid returning an error if possible - - errors should have been handled by prepare_write. - - write_begin: This is intended as a replacement for prepare_write. The - key differences being that: - - it returns a locked page (in *pagep) rather than being - given a pre locked page; - - it must be able to cope with short writes (where the - length passed to write_begin is greater than the number - of bytes copied into the page). - + write_begin: Called by the generic buffered write code to ask the filesystem to prepare to write len bytes at the given offset in the file. The address_space should check that the write will be able to complete, @@ -640,6 +608,9 @@ struct address_space_operations { The filesystem must return the locked pagecache page for the specified offset, in *pagep, for the caller to write into. + It must be able to cope with short writes (where the length passed to + write_begin is greater than the number of bytes copied into the page). + flags is a field for AOP_FLAG_xxx flags, described in include/linux/fs.h. diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3f09cd8bcc3..5c4ee70d5cf 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -40,8 +40,7 @@ * Heinz Mauelshagen , Feb 2002 * * Support for falling back on the write file operation when the address space - * operations prepare_write and/or commit_write are not available on the - * backing filesystem. + * operations write_begin is not available on the backing filesystem. * Anton Altaparmakov, 16 Feb 2005 * * Still To Fix: @@ -765,7 +764,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, */ if (!file->f_op->splice_read) goto out_putf; - if (aops->prepare_write || aops->write_begin) + if (aops->write_begin) lo_flags |= LO_FLAGS_USE_AOPS; if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) lo_flags |= LO_FLAGS_READ_ONLY; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 19eafbe3c37..2b2eec1283b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -175,7 +175,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { /* - * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), + * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), * so we need to update the ->mmu_private to block boundary. * * But we must fill the remaining area or hole by nul for diff --git a/fs/libfs.c b/fs/libfs.c index 74688598bcf..e960a832190 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -814,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr); EXPORT_SYMBOL(simple_link); EXPORT_SYMBOL(simple_lookup); EXPORT_SYMBOL(simple_pin_fs); -EXPORT_SYMBOL(simple_prepare_write); +EXPORT_UNUSED_SYMBOL(simple_prepare_write); EXPORT_SYMBOL(simple_readpage); EXPORT_SYMBOL(simple_release_fs); EXPORT_SYMBOL(simple_rename); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8d3225a7807..7efe937a415 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -679,8 +679,7 @@ leave: /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to - * worry about recursive locking in ->prepare_write() and - * ->commit_write(). */ + * worry about recursive locking in ->write_begin() and ->write_end(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 size) { diff --git a/fs/splice.c b/fs/splice.c index a1e701c2715..1abab5cee4b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, }; /* - * The actor worker might be calling ->prepare_write and - * ->commit_write. Most of the time, these expect i_mutex to + * The actor worker might be calling ->write_begin and + * ->write_end. Most of the time, these expect i_mutex to * be held. Since this may result in an ABBA deadlock with * pipe->inode, we have to order lock acquiry here. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b248d61430..0dcdd9458f4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -489,13 +489,6 @@ struct address_space_operations { int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - /* - * ext3 requires that a successful prepare_write() call be followed - * by a commit_write() call - they must be balanced - */ - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); - int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); diff --git a/mm/filemap.c b/mm/filemap.c index ab8553658af..f3e5f8944d1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping, { const struct address_space_operations *aops = mapping->a_ops; - if (aops->write_begin) { - return aops->write_begin(file, mapping, pos, len, flags, + return aops->write_begin(file, mapping, pos, len, flags, pagep, fsdata); - } else { - int ret; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - struct inode *inode = mapping->host; - struct page *page; -again: - page = __grab_cache_page(mapping, index); - *pagep = page; - if (!page) - return -ENOMEM; - - if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) { - /* - * There is no way to resolve a short write situation - * for a !Uptodate page (except by double copying in - * the caller done by generic_perform_write_2copy). - * - * Instead, we have to bring it uptodate here. - */ - ret = aops->readpage(file, page); - page_cache_release(page); - if (ret) { - if (ret == AOP_TRUNCATED_PAGE) - goto again; - return ret; - } - goto again; - } - - ret = aops->prepare_write(file, page, offset, offset+len); - if (ret) { - unlock_page(page); - page_cache_release(page); - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); - } - return ret; - } } EXPORT_SYMBOL(pagecache_write_begin); @@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { const struct address_space_operations *aops = mapping->a_ops; - int ret; - - if (aops->write_end) { - mark_page_accessed(page); - ret = aops->write_end(file, mapping, pos, len, copied, - page, fsdata); - } else { - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - struct inode *inode = mapping->host; - - flush_dcache_page(page); - ret = aops->commit_write(file, page, offset, offset+len); - unlock_page(page); - mark_page_accessed(page); - page_cache_release(page); - - if (ret < 0) { - if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); - } else if (ret > 0) - ret = min_t(size_t, copied, ret); - else - ret = copied; - } - return ret; + mark_page_accessed(page); + return aops->write_end(file, mapping, pos, len, copied, page, fsdata); } EXPORT_SYMBOL(pagecache_write_end); @@ -2226,174 +2163,6 @@ repeat: } EXPORT_SYMBOL(__grab_cache_page); -static ssize_t generic_perform_write_2copy(struct file *file, - struct iov_iter *i, loff_t pos) -{ - struct address_space *mapping = file->f_mapping; - const struct address_space_operations *a_ops = mapping->a_ops; - struct inode *inode = mapping->host; - long status = 0; - ssize_t written = 0; - - do { - struct page *src_page; - struct page *page; - pgoff_t index; /* Pagecache index for current page */ - unsigned long offset; /* Offset into pagecache page */ - unsigned long bytes; /* Bytes to write to page */ - size_t copied; /* Bytes copied from user */ - - offset = (pos & (PAGE_CACHE_SIZE - 1)); - index = pos >> PAGE_CACHE_SHIFT; - bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, - iov_iter_count(i)); - - /* - * a non-NULL src_page indicates that we're doing the - * copy via get_user_pages and kmap. - */ - src_page = NULL; - - /* - * Bring in the user page that we will copy from _first_. - * Otherwise there's a nasty deadlock on copying from the - * same page as we're writing to, without it being marked - * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. - */ - if (unlikely(iov_iter_fault_in_readable(i, bytes))) { - status = -EFAULT; - break; - } - - page = __grab_cache_page(mapping, index); - if (!page) { - status = -ENOMEM; - break; - } - - /* - * non-uptodate pages cannot cope with short copies, and we - * cannot take a pagefault with the destination page locked. - * So pin the source page to copy it. - */ - if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) { - unlock_page(page); - - src_page = alloc_page(GFP_KERNEL); - if (!src_page) { - page_cache_release(page); - status = -ENOMEM; - break; - } - - /* - * Cannot get_user_pages with a page locked for the - * same reason as we can't take a page fault with a - * page locked (as explained below). - */ - copied = iov_iter_copy_from_user(src_page, i, - offset, bytes); - if (unlikely(copied == 0)) { - status = -EFAULT; - page_cache_release(page); - page_cache_release(src_page); - break; - } - bytes = copied; - - lock_page(page); - /* - * Can't handle the page going uptodate here, because - * that means we would use non-atomic usercopies, which - * zero out the tail of the page, which can cause - * zeroes to become transiently visible. We could just - * use a non-zeroing copy, but the APIs aren't too - * consistent. - */ - if (unlikely(!page->mapping || PageUptodate(page))) { - unlock_page(page); - page_cache_release(page); - page_cache_release(src_page); - continue; - } - } - - status = a_ops->prepare_write(file, page, offset, offset+bytes); - if (unlikely(status)) - goto fs_write_aop_error; - - if (!src_page) { - /* - * Must not enter the pagefault handler here, because - * we hold the page lock, so we might recursively - * deadlock on the same lock, or get an ABBA deadlock - * against a different lock, or against the mmap_sem - * (which nests outside the page lock). So increment - * preempt count, and use _atomic usercopies. - * - * The page is uptodate so we are OK to encounter a - * short copy: if unmodified parts of the page are - * marked dirty and written out to disk, it doesn't - * really matter. - */ - pagefault_disable(); - copied = iov_iter_copy_from_user_atomic(page, i, - offset, bytes); - pagefault_enable(); - } else { - void *src, *dst; - src = kmap_atomic(src_page, KM_USER0); - dst = kmap_atomic(page, KM_USER1); - memcpy(dst + offset, src + offset, bytes); - kunmap_atomic(dst, KM_USER1); - kunmap_atomic(src, KM_USER0); - copied = bytes; - } - flush_dcache_page(page); - - status = a_ops->commit_write(file, page, offset, offset+bytes); - if (unlikely(status < 0)) - goto fs_write_aop_error; - if (unlikely(status > 0)) /* filesystem did partial write */ - copied = min_t(size_t, copied, status); - - unlock_page(page); - mark_page_accessed(page); - page_cache_release(page); - if (src_page) - page_cache_release(src_page); - - iov_iter_advance(i, copied); - pos += copied; - written += copied; - - balance_dirty_pages_ratelimited(mapping); - cond_resched(); - continue; - -fs_write_aop_error: - unlock_page(page); - page_cache_release(page); - if (src_page) - page_cache_release(src_page); - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - */ - if (pos + bytes > inode->i_size) - vmtruncate(inode, inode->i_size); - break; - } while (iov_iter_count(i)); - - return written ? written : status; -} - static ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { @@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, struct iov_iter i; iov_iter_init(&i, iov, nr_segs, count, written); - if (a_ops->write_begin) - status = generic_perform_write(file, &i, pos); - else - status = generic_perform_write_2copy(file, &i, pos); + status = generic_perform_write(file, &i, pos); if (likely(status >= 0)) { written += status; -- cgit v1.2.3-70-g09d2 From fd4ce1acd0f8558033b1a6968001552bd7671e6d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Nov 2008 14:58:42 +0100 Subject: [PATCH 1/2] kill FMODE_NDELAY_NOW Update FMODE_NDELAY before each ioctl call so that we can kill the magic FMODE_NDELAY_NOW. It would be even better to do this directly in setfl(), but for that we'd need to have FMODE_NDELAY for all files, not just block special files. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- block/compat_ioctl.c | 8 +++++++- drivers/scsi/sd.c | 2 +- drivers/scsi/sr.c | 2 +- fs/block_dev.c | 10 +++++++++- include/linux/fs.h | 1 - 5 files changed, 18 insertions(+), 5 deletions(-) (limited to 'include/linux/fs.h') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index d43e6087bad..67eb93cff69 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -722,8 +722,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct backing_dev_info *bdi; loff_t size; + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; switch (cmd) { case HDIO_GETGEO: diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c9e1242eaf2..5081b3981d3 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -757,7 +757,7 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode, * access to the device is prohibited. */ error = scsi_nonblockable_ioctl(sdp, cmd, p, - (mode & FMODE_NDELAY_NOW) != 0); + (mode & FMODE_NDELAY) != 0); if (!scsi_block_when_processing_errors(sdp) || !error) return error; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 62b6633e3a9..45b66b98a51 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -521,7 +521,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, * if it doesn't recognise the ioctl */ ret = scsi_nonblockable_ioctl(sdev, cmd, argp, - (mode & FMODE_NDELAY_NOW) != 0); + (mode & FMODE_NDELAY) != 0); if (ret != -ENODEV) return ret; return scsi_ioctl(sdev, cmd, argp); diff --git a/fs/block_dev.c b/fs/block_dev.c index 7c727523bc5..99e0ae1a4c7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1206,8 +1206,16 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct block_device *bdev = I_BDEV(file->f_mapping->host); fmode_t mode = file->f_mode; + + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; + return blkdev_ioctl(bdev, mode, cmd, arg); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 0dcdd9458f4..b3345a90e11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,7 +79,6 @@ extern int dir_notify_enable; #define FMODE_NDELAY ((__force fmode_t)32) #define FMODE_EXCL ((__force fmode_t)64) #define FMODE_WRITE_IOCTL ((__force fmode_t)128) -#define FMODE_NDELAY_NOW ((__force fmode_t)256) #define RW_MASK 1 #define RWA_MASK 2 -- cgit v1.2.3-70-g09d2 From fc9161e54d0dbf799beff9692ea1cc6237162b85 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Nov 2008 14:58:46 +0100 Subject: [PATCH 2/2] documnt FMODE_ constants Make sure all FMODE_ constants are documents, and ensure a coherent style for the already existing comments. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index b3345a90e11..4a853ef6fd3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -63,21 +63,23 @@ extern int dir_notify_enable; #define MAY_ACCESS 16 #define MAY_OPEN 32 -#define FMODE_READ ((__force fmode_t)1) -#define FMODE_WRITE ((__force fmode_t)2) - -/* Internal kernel extensions */ -#define FMODE_LSEEK ((__force fmode_t)4) -#define FMODE_PREAD ((__force fmode_t)8) -#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ - -/* File is being opened for execution. Primary users of this flag are - distributed filesystems that can use it to achieve correct ETXTBUSY - behavior for cross-node execution/opening_for_writing of files */ -#define FMODE_EXEC ((__force fmode_t)16) - -#define FMODE_NDELAY ((__force fmode_t)32) -#define FMODE_EXCL ((__force fmode_t)64) +/* file is open for reading */ +#define FMODE_READ ((__force fmode_t)1) +/* file is open for writing */ +#define FMODE_WRITE ((__force fmode_t)2) +/* file is seekable */ +#define FMODE_LSEEK ((__force fmode_t)4) +/* file can be accessed using pread/pwrite */ +#define FMODE_PREAD ((__force fmode_t)8) +#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ +/* File is opened for execution with sys_execve / sys_uselib */ +#define FMODE_EXEC ((__force fmode_t)16) +/* File is opened with O_NDELAY (only set for block devices) */ +#define FMODE_NDELAY ((__force fmode_t)32) +/* File is opened with O_EXCL (only set for block devices) */ +#define FMODE_EXCL ((__force fmode_t)64) +/* File is opened using open(.., 3, ..) and is writeable only for ioctls + (specialy hack for floppy.c) */ #define FMODE_WRITE_IOCTL ((__force fmode_t)128) #define RW_MASK 1 -- cgit v1.2.3-70-g09d2