diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 10 | ||||
-rw-r--r-- | fs/dlm/dir.c | 7 | ||||
-rw-r--r-- | fs/dlm/lockspace.c | 17 | ||||
-rw-r--r-- | fs/dlm/lowcomms.c | 22 | ||||
-rw-r--r-- | fs/dlm/lowcomms.h | 3 | ||||
-rw-r--r-- | fs/dlm/member.c | 19 | ||||
-rw-r--r-- | fs/dlm/requestqueue.c | 2 | ||||
-rw-r--r-- | fs/eventfd.c | 3 | ||||
-rw-r--r-- | fs/exofs/common.h | 6 | ||||
-rw-r--r-- | fs/exofs/inode.c | 8 | ||||
-rw-r--r-- | fs/exofs/osd.c | 26 | ||||
-rw-r--r-- | fs/fuse/Makefile | 1 | ||||
-rw-r--r-- | fs/fuse/cuse.c | 610 | ||||
-rw-r--r-- | fs/fuse/dev.c | 15 | ||||
-rw-r--r-- | fs/fuse/dir.c | 33 | ||||
-rw-r--r-- | fs/fuse/file.c | 346 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 47 | ||||
-rw-r--r-- | fs/fuse/inode.c | 118 | ||||
-rw-r--r-- | fs/gfs2/Makefile | 1 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 3 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 12 | ||||
-rw-r--r-- | fs/gfs2/log.c | 9 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 3 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 2 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 11 | ||||
-rw-r--r-- | fs/gfs2/super.c | 4 | ||||
-rw-r--r-- | fs/gfs2/trace_gfs2.h | 407 | ||||
-rw-r--r-- | fs/partitions/check.c | 42 |
28 files changed, 1476 insertions, 311 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 9f7270f36b2..525da2e8f73 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -62,6 +62,16 @@ source "fs/autofs/Kconfig" source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" +config CUSE + tristate "Character device in Userpace support" + depends on FUSE_FS + help + This FUSE extension allows character devices to be + implemented in userspace. + + If you want to develop or use userspace character device + based on CUSE, answer Y or M. + config GENERIC_ACL bool select FS_POSIX_ACL diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 858fba14aaa..c4dfa1dcc86 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -49,7 +49,8 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) spin_unlock(&ls->ls_recover_list_lock); if (!found) - de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL); + de = kzalloc(sizeof(struct dlm_direntry) + len, + ls->ls_allocation); return de; } @@ -211,7 +212,7 @@ int dlm_recover_directory(struct dlm_ls *ls) dlm_dir_clear(ls); - last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); + last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation); if (!last_name) goto out; @@ -322,7 +323,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, if (namelen > DLM_RESNAME_MAXLEN) return -EINVAL; - de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); + de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation); if (!de) return -ENOMEM; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index cd8e2df3c29..d489fcc8671 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -384,7 +384,7 @@ static void threads_stop(void) dlm_astd_stop(); } -static int new_lockspace(char *name, int namelen, void **lockspace, +static int new_lockspace(const char *name, int namelen, void **lockspace, uint32_t flags, int lvblen) { struct dlm_ls *ls; @@ -419,16 +419,14 @@ static int new_lockspace(char *name, int namelen, void **lockspace, break; } ls->ls_create_count++; - module_put(THIS_MODULE); - error = 1; /* not an error, return 0 */ + *lockspace = ls; + error = 1; break; } spin_unlock(&lslist_lock); - if (error < 0) - goto out; if (error) - goto ret_zero; + goto out; error = -ENOMEM; @@ -583,7 +581,6 @@ static int new_lockspace(char *name, int namelen, void **lockspace, dlm_create_debug_file(ls); log_debug(ls, "join complete"); - ret_zero: *lockspace = ls; return 0; @@ -614,7 +611,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, return error; } -int dlm_new_lockspace(char *name, int namelen, void **lockspace, +int dlm_new_lockspace(const char *name, int namelen, void **lockspace, uint32_t flags, int lvblen) { int error = 0; @@ -628,7 +625,9 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, error = new_lockspace(name, namelen, lockspace, flags, lvblen); if (!error) ls_count++; - else if (!ls_count) + if (error > 0) + error = 0; + if (!ls_count) threads_stop(); out: mutex_unlock(&ls_lock); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 609108a8326..cdb580a9c7a 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -309,6 +309,20 @@ static void lowcomms_state_change(struct sock *sk) lowcomms_write_space(sk); } +int dlm_lowcomms_connect_node(int nodeid) +{ + struct connection *con; + + if (nodeid == dlm_our_nodeid()) + return 0; + + con = nodeid2con(nodeid, GFP_NOFS); + if (!con) + return -ENOMEM; + lowcomms_connect_sock(con); + return 0; +} + /* Make a socket active */ static int add_sock(struct socket *sock, struct connection *con) { @@ -486,7 +500,7 @@ static void process_sctp_notification(struct connection *con, return; } - new_con = nodeid2con(nodeid, GFP_KERNEL); + new_con = nodeid2con(nodeid, GFP_NOFS); if (!new_con) return; @@ -722,7 +736,7 @@ static int tcp_accept_from_sock(struct connection *con) * the same time and the connections cross on the wire. * In this case we store the incoming one in "othercon" */ - newcon = nodeid2con(nodeid, GFP_KERNEL); + newcon = nodeid2con(nodeid, GFP_NOFS); if (!newcon) { result = -ENOMEM; goto accept_err; @@ -732,7 +746,7 @@ static int tcp_accept_from_sock(struct connection *con) struct connection *othercon = newcon->othercon; if (!othercon) { - othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); + othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); if (!othercon) { log_print("failed to allocate incoming socket"); mutex_unlock(&newcon->sock_mutex); @@ -1421,7 +1435,7 @@ static int work_start(void) static void stop_conn(struct connection *con) { con->flags |= 0x0F; - if (con->sock) + if (con->sock && con->sock->sk) con->sock->sk->sk_user_data = NULL; } diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index a9a9618c0d3..1311e642628 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -19,6 +19,7 @@ void dlm_lowcomms_stop(void); int dlm_lowcomms_close(int nodeid); void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); void dlm_lowcomms_commit_buffer(void *mh); +int dlm_lowcomms_connect_node(int nodeid); #endif /* __LOWCOMMS_DOT_H__ */ diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 26133f05ae3..b128775913b 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -17,6 +17,7 @@ #include "recover.h" #include "rcom.h" #include "config.h" +#include "lowcomms.h" static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) { @@ -45,9 +46,9 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) static int dlm_add_member(struct dlm_ls *ls, int nodeid) { struct dlm_member *memb; - int w; + int w, error; - memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); if (!memb) return -ENOMEM; @@ -57,6 +58,12 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid) return w; } + error = dlm_lowcomms_connect_node(nodeid); + if (error < 0) { + kfree(memb); + return error; + } + memb->nodeid = nodeid; memb->weight = w; add_ordered_member(ls, memb); @@ -136,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls) ls->ls_total_weight = total; - array = kmalloc(sizeof(int) * total, GFP_KERNEL); + array = kmalloc(sizeof(int) * total, ls->ls_allocation); if (!array) return; @@ -219,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) continue; log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); - memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); if (!memb) return -ENOMEM; memb->nodeid = rv->new[i]; @@ -334,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls) int *ids = NULL, *new = NULL; int error, ids_count = 0, new_count = 0; - rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); + rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation); if (!rv) return -ENOMEM; diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index daa4183fbb8..7a2307c0891 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) struct rq_entry *e; int length = ms->m_header.h_length - sizeof(struct dlm_message); - e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); + e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation); if (!e) { log_print("dlm_add_requestqueue: out of memory len %d", length); return; diff --git a/fs/eventfd.c b/fs/eventfd.c index 2a701d593d3..3f0e1974abd 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -16,6 +16,7 @@ #include <linux/anon_inodes.h> #include <linux/eventfd.h> #include <linux/syscalls.h> +#include <linux/module.h> struct eventfd_ctx { wait_queue_head_t wqh; @@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n) return n; } +EXPORT_SYMBOL_GPL(eventfd_signal); static int eventfd_release(struct inode *inode, struct file *file) { @@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd) return file; } +EXPORT_SYMBOL_GPL(eventfd_fget); SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { diff --git a/fs/exofs/common.h b/fs/exofs/common.h index b1512c4bb8c..24667eedc02 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h @@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or, int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); -int osd_req_read_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); - -int osd_req_write_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); - #endif /*ifndef __EXOFS_COM_H__*/ diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ba8d9fab469..77d0a295eb1 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, struct inode *inode) { struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; - struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; pcol->sbi = sbi; - pcol->req_q = req_q; + pcol->req_q = osd_request_queue(sbi->s_dev); pcol->inode = inode; pcol->expected_pages = expected_pages; @@ -266,7 +265,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync) goto err; } - osd_req_read(or, &obj, pcol->bio, i_start); + osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); if (is_sync) { exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); @@ -522,7 +521,8 @@ static int write_exec(struct page_collect *pcol) *pcol_copy = *pcol; - osd_req_write(or, &obj, pcol_copy->bio, i_start); + pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ + osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); if (unlikely(ret)) { EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index 06ca92672eb..b3d2ccb87aa 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c @@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) return -EIO; } - -int osd_req_read_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) -{ - struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; - struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); - - if (!bio) - return -ENOMEM; - - osd_req_read(or, obj, bio, offset); - return 0; -} - -int osd_req_write_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) -{ - struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; - struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); - - if (!bio) - return -ENOMEM; - - osd_req_write(or, obj, bio, offset); - return 0; -} diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 72437065f6a..e95eeb445e5 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_FUSE_FS) += fuse.o +obj-$(CONFIG_CUSE) += cuse.o fuse-objs := dev.o dir.o file.o inode.o control.o diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c new file mode 100644 index 00000000000..de792dcf327 --- /dev/null +++ b/fs/fuse/cuse.c @@ -0,0 +1,610 @@ +/* + * CUSE: Character device in Userspace + * + * Copyright (C) 2008-2009 SUSE Linux Products GmbH + * Copyright (C) 2008-2009 Tejun Heo <tj@kernel.org> + * + * This file is released under the GPLv2. + * + * CUSE enables character devices to be implemented from userland much + * like FUSE allows filesystems. On initialization /dev/cuse is + * created. By opening the file and replying to the CUSE_INIT request + * userland CUSE server can create a character device. After that the + * operation is very similar to FUSE. + * + * A CUSE instance involves the following objects. + * + * cuse_conn : contains fuse_conn and serves as bonding structure + * channel : file handle connected to the userland CUSE server + * cdev : the implemented character device + * dev : generic device for cdev + * + * Note that 'channel' is what 'dev' is in FUSE. As CUSE deals with + * devices, it's called 'channel' to reduce confusion. + * + * channel determines when the character device dies. When channel is + * closed, everything begins to destruct. The cuse_conn is taken off + * the lookup table preventing further access from cdev, cdev and + * generic device are removed and the base reference of cuse_conn is + * put. + * + * On each open, the matching cuse_conn is looked up and if found an + * additional reference is taken which is released when the file is + * closed. + */ + +#include <linux/fuse.h> +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/kdev_t.h> +#include <linux/kthread.h> +#include <linux/list.h> +#include <linux/magic.h> +#include <linux/miscdevice.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/stat.h> + +#include "fuse_i.h" + +#define CUSE_CONNTBL_LEN 64 + +struct cuse_conn { + struct list_head list; /* linked on cuse_conntbl */ + struct fuse_conn fc; /* fuse connection */ + struct cdev *cdev; /* associated character device */ + struct device *dev; /* device representing @cdev */ + + /* init parameters, set once during initialization */ + bool unrestricted_ioctl; +}; + +static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */ +static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN]; +static struct class *cuse_class; + +static struct cuse_conn *fc_to_cc(struct fuse_conn *fc) +{ + return container_of(fc, struct cuse_conn, fc); +} + +static struct list_head *cuse_conntbl_head(dev_t devt) +{ + return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN]; +} + + +/************************************************************************** + * CUSE frontend operations + * + * These are file operations for the character device. + * + * On open, CUSE opens a file from the FUSE mnt and stores it to + * private_data of the open file. All other ops call FUSE ops on the + * FUSE file. + */ + +static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + loff_t pos = 0; + + return fuse_direct_io(file, buf, count, &pos, 0); +} + +static ssize_t cuse_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos = 0; + /* + * No locking or generic_write_checks(), the server is + * responsible for locking and sanity checks. + */ + return fuse_direct_io(file, buf, count, &pos, 1); +} + +static int cuse_open(struct inode *inode, struct file *file) +{ + dev_t devt = inode->i_cdev->dev; + struct cuse_conn *cc = NULL, *pos; + int rc; + + /* look up and get the connection */ + spin_lock(&cuse_lock); + list_for_each_entry(pos, cuse_conntbl_head(devt), list) + if (pos->dev->devt == devt) { + fuse_conn_get(&pos->fc); + cc = pos; + break; + } + spin_unlock(&cuse_lock); + + /* dead? */ + if (!cc) + return -ENODEV; + + /* + * Generic permission check is already done against the chrdev + * file, proceed to open. + */ + rc = fuse_do_open(&cc->fc, 0, file, 0); + if (rc) + fuse_conn_put(&cc->fc); + return rc; +} + +static int cuse_release(struct inode *inode, struct file *file) +{ + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + + fuse_sync_release(ff, file->f_flags); + fuse_conn_put(fc); + + return 0; +} + +static long cuse_file_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_file *ff = file->private_data; + struct cuse_conn *cc = fc_to_cc(ff->fc); + unsigned int flags = 0; + + if (cc->unrestricted_ioctl) + flags |= FUSE_IOCTL_UNRESTRICTED; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_file *ff = file->private_data; + struct cuse_conn *cc = fc_to_cc(ff->fc); + unsigned int flags = FUSE_IOCTL_COMPAT; + + if (cc->unrestricted_ioctl) + flags |= FUSE_IOCTL_UNRESTRICTED; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +static const struct file_operations cuse_frontend_fops = { + .owner = THIS_MODULE, + .read = cuse_read, + .write = cuse_write, + .open = cuse_open, + .release = cuse_release, + .unlocked_ioctl = cuse_file_ioctl, + .compat_ioctl = cuse_file_compat_ioctl, + .poll = fuse_file_poll, +}; + + +/************************************************************************** + * CUSE channel initialization and destruction + */ + +struct cuse_devinfo { + const char *name; +}; + +/** + * cuse_parse_one - parse one key=value pair + * @pp: i/o parameter for the current position + * @end: points to one past the end of the packed string + * @keyp: out parameter for key + * @valp: out parameter for value + * + * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends + * at @end - 1. This function parses one pair and set *@keyp to the + * start of the key and *@valp to the start of the value. Note that + * the original string is modified such that the key string is + * terminated with '\0'. *@pp is updated to point to the next string. + * + * RETURNS: + * 1 on successful parse, 0 on EOF, -errno on failure. + */ +static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) +{ + char *p = *pp; + char *key, *val; + + while (p < end && *p == '\0') + p++; + if (p == end) + return 0; + + if (end[-1] != '\0') { + printk(KERN_ERR "CUSE: info not properly terminated\n"); + return -EINVAL; + } + + key = val = p; + p += strlen(p); + + if (valp) { + strsep(&val, "="); + if (!val) + val = key + strlen(key); + key = strstrip(key); + val = strstrip(val); + } else + key = strstrip(key); + + if (!strlen(key)) { + printk(KERN_ERR "CUSE: zero length info key specified\n"); + return -EINVAL; + } + + *pp = p; + *keyp = key; + if (valp) + *valp = val; + + return 1; +} + +/** + * cuse_parse_dev_info - parse device info + * @p: device info string + * @len: length of device info string + * @devinfo: out parameter for parsed device info + * + * Parse @p to extract device info and store it into @devinfo. String + * pointed to by @p is modified by parsing and @devinfo points into + * them, so @p shouldn't be freed while @devinfo is in use. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) +{ + char *end = p + len; + char *key, *val; + int rc; + + while (true) { + rc = cuse_parse_one(&p, end, &key, &val); + if (rc < 0) + return rc; + if (!rc) + break; + if (strcmp(key, "DEVNAME") == 0) + devinfo->name = val; + else + printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n", + key); + } + + if (!devinfo->name || !strlen(devinfo->name)) { + printk(KERN_ERR "CUSE: DEVNAME unspecified\n"); + return -EINVAL; + } + + return 0; +} + +static void cuse_gendev_release(struct device *dev) +{ + kfree(dev); +} + +/** + * cuse_process_init_reply - finish initializing CUSE channel + * + * This function creates the character device and sets up all the + * required data structures for it. Please read the comment at the + * top of this file for high level overview. + */ +static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) +{ + struct cuse_conn *cc = fc_to_cc(fc); + struct cuse_init_out *arg = &req->misc.cuse_init_out; + struct page *page = req->pages[0]; + struct cuse_devinfo devinfo = { }; + struct device *dev; + struct cdev *cdev; + dev_t devt; + int rc; + + if (req->out.h.error || + arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { + goto err; + } + + fc->minor = arg->minor; + fc->max_read = max_t(unsigned, arg->max_read, 4096); + fc->max_write = max_t(unsigned, arg->max_write, 4096); + + /* parse init reply */ + cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; + + rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size, + &devinfo); + if (rc) + goto err; + + /* determine and reserve devt */ + devt = MKDEV(arg->dev_major, arg->dev_minor); + if (!MAJOR(devt)) + rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name); + else + rc = register_chrdev_region(devt, 1, devinfo.name); + if (rc) { + printk(KERN_ERR "CUSE: failed to register chrdev region\n"); + goto err; + } + + /* devt determined, create device */ + rc = -ENOMEM; + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + goto err_region; + + device_initialize(dev); + dev_set_uevent_suppress(dev, 1); + dev->class = cuse_class; + dev->devt = devt; + dev->release = cuse_gendev_release; + dev_set_drvdata(dev, cc); + dev_set_name(dev, "%s", devinfo.name); + + rc = device_add(dev); + if (rc) + goto err_device; + + /* register cdev */ + rc = -ENOMEM; + cdev = cdev_alloc(); + if (!cdev) + goto err_device; + + cdev->owner = THIS_MODULE; + cdev->ops = &cuse_frontend_fops; + + rc = cdev_add(cdev, devt, 1); + if (rc) + goto err_cdev; + + cc->dev = dev; + cc->cdev = cdev; + + /* make the device available */ + spin_lock(&cuse_lock); + list_add(&cc->list, cuse_conntbl_head(devt)); + spin_unlock(&cuse_lock); + + /* announce device availability */ + dev_set_uevent_suppress(dev, 0); + kobject_uevent(&dev->kobj, KOBJ_ADD); +out: + __free_page(page); + return; + +err_cdev: + cdev_del(cdev); +err_device: + put_device(dev); +err_region: + unregister_chrdev_region(devt, 1); +err: + fc->conn_error = 1; + goto out; +} + +static int cuse_send_init(struct cuse_conn *cc) +{ + int rc; + struct fuse_req *req; + struct page *page; + struct fuse_conn *fc = &cc->fc; + struct cuse_init_in *arg; + + BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); + + req = fuse_get_req(fc); + if (IS_ERR(req)) { + rc = PTR_ERR(req); + goto err; + } + + rc = -ENOMEM; + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto err_put_req; + + arg = &req->misc.cuse_init_in; + arg->major = FUSE_KERNEL_VERSION; + arg->minor = FUSE_KERNEL_MINOR_VERSION; + arg->flags |= CUSE_UNRESTRICTED_IOCTL; + req->in.h.opcode = CUSE_INIT; + req->in.numargs = 1; + req->in.args[0].size = sizeof(struct cuse_init_in); + req->in.args[0].value = arg; + req->out.numargs = 2; + req->out.args[0].size = sizeof(struct cuse_init_out); + req->out.args[0].value = &req->misc.cuse_init_out; + req->out.args[1].size = CUSE_INIT_INFO_MAX; + req->out.argvar = 1; + req->out.argpages = 1; + req->pages[0] = page; + req->num_pages = 1; + req->end = cuse_process_init_reply; + fuse_request_send_background(fc, req); + + return 0; + +err_put_req: + fuse_put_request(fc, req); +err: + return rc; +} + +static void cuse_fc_release(struct fuse_conn *fc) +{ + struct cuse_conn *cc = fc_to_cc(fc); + kfree(cc); +} + +/** + * cuse_channel_open - open method for /dev/cuse + * @inode: inode for /dev/cuse + * @file: file struct being opened + * + * Userland CUSE server can create a CUSE device by opening /dev/cuse + * and replying to the initilaization request kernel sends. This + * function is responsible for handling CUSE device initialization. + * Because the fd opened by this function is used during + * initialization, this function only creates cuse_conn and sends + * init. The rest is delegated to a kthread. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_channel_open(struct inode *inode, struct file *file) +{ + struct cuse_conn *cc; + int rc; + + /* set up cuse_conn */ + cc = kzalloc(sizeof(*cc), GFP_KERNEL); + if (!cc) + return -ENOMEM; + + fuse_conn_init(&cc->fc); + + INIT_LIST_HEAD(&cc->list); + cc->fc.release = cuse_fc_release; + + cc->fc.connected = 1; + cc->fc.blocked = 0; + rc = cuse_send_init(cc); + if (rc) { + fuse_conn_put(&cc->fc); + return rc; + } + file->private_data = &cc->fc; /* channel owns base reference to cc */ + + return 0; +} + +/** + * cuse_channel_release - release method for /dev/cuse + * @inode: inode for /dev/cuse + * @file: file struct being closed + * + * Disconnect the channel, deregister CUSE device and initiate + * destruction by putting the default reference. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_channel_release(struct inode *inode, struct file *file) +{ + struct cuse_conn *cc = fc_to_cc(file->private_data); + int rc; + + /* remove from the conntbl, no more access from this point on */ + spin_lock(&cuse_lock); + list_del_init(&cc->list); + spin_unlock(&cuse_lock); + + /* remove device */ + if (cc->dev) + device_unregister(cc->dev); + if (cc->cdev) { + unregister_chrdev_region(cc->cdev->dev, 1); + cdev_del(cc->cdev); + } + + /* kill connection and shutdown channel */ + fuse_conn_kill(&cc->fc); + rc = fuse_dev_release(inode, file); /* puts the base reference */ + + return rc; +} + +static struct file_operations cuse_channel_fops; /* initialized during init */ + + +/************************************************************************** + * Misc stuff and module initializatiion + * + * CUSE exports the same set of attributes to sysfs as fusectl. + */ + +static ssize_t cuse_class_waiting_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cuse_conn *cc = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); +} + +static ssize_t cuse_class_abort_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cuse_conn *cc = dev_get_drvdata(dev); + + fuse_abort_conn(&cc->fc); + return count; +} + +static struct device_attribute cuse_class_dev_attrs[] = { + __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL), + __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store), + { } +}; + +static struct miscdevice cuse_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "cuse", + .fops = &cuse_channel_fops, +}; + +static int __init cuse_init(void) +{ + int i, rc; + + /* init conntbl */ + for (i = 0; i < CUSE_CONNTBL_LEN; i++) + INIT_LIST_HEAD(&cuse_conntbl[i]); + + /* inherit and extend fuse_dev_operations */ + cuse_channel_fops = fuse_dev_operations; + cuse_channel_fops.owner = THIS_MODULE; + cuse_channel_fops.open = cuse_channel_open; + cuse_channel_fops.release = cuse_channel_release; + + cuse_class = class_create(THIS_MODULE, "cuse"); + if (IS_ERR(cuse_class)) + return PTR_ERR(cuse_class); + + cuse_class->dev_attrs = cuse_class_dev_attrs; + + rc = misc_register(&cuse_miscdev); + if (rc) { + class_destroy(cuse_class); + return rc; + } + + return 0; +} + +static void __exit cuse_exit(void) +{ + misc_deregister(&cuse_miscdev); + class_destroy(cuse_class); +} + +module_init(cuse_init); +module_exit(cuse_exit); + +MODULE_AUTHOR("Tejun Heo <tj@kernel.org>"); +MODULE_DESCRIPTION("Character device in Userspace"); +MODULE_LICENSE("GPL"); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ba76b68c52f..8fed2ed12f3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void) fuse_request_init(req); return req; } +EXPORT_SYMBOL_GPL(fuse_request_alloc); struct fuse_req *fuse_request_alloc_nofs(void) { @@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) atomic_dec(&fc->num_waiting); return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(fuse_get_req); /* * Return request in fuse_file->reserved_req. However that may @@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) fuse_request_free(req); } } +EXPORT_SYMBOL_GPL(fuse_put_request); static unsigned len_args(unsigned numargs, struct fuse_arg *args) { @@ -282,7 +285,7 @@ __releases(&fc->lock) wake_up_all(&fc->blocked_waitq); } if (fc->num_background == FUSE_CONGESTION_THRESHOLD && - fc->connected) { + fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, READ); clear_bdi_congested(&fc->bdi, WRITE); } @@ -400,6 +403,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) } spin_unlock(&fc->lock); } +EXPORT_SYMBOL_GPL(fuse_request_send); static void fuse_request_send_nowait_locked(struct fuse_conn *fc, struct fuse_req *req) @@ -408,7 +412,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, fc->num_background++; if (fc->num_background == FUSE_MAX_BACKGROUND) fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { + if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + fc->bdi_initialized) { set_bdi_congested(&fc->bdi, READ); set_bdi_congested(&fc->bdi, WRITE); } @@ -439,6 +444,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) req->isreply = 1; fuse_request_send_nowait(fc, req); } +EXPORT_SYMBOL_GPL(fuse_request_send_background); /* * Called under fc->lock @@ -1105,8 +1111,9 @@ void fuse_abort_conn(struct fuse_conn *fc) } spin_unlock(&fc->lock); } +EXPORT_SYMBOL_GPL(fuse_abort_conn); -static int fuse_dev_release(struct inode *inode, struct file *file) +int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_conn *fc = fuse_get_conn(file); if (fc) { @@ -1120,6 +1127,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file) return 0; } +EXPORT_SYMBOL_GPL(fuse_dev_release); static int fuse_dev_fasync(int fd, struct file *file, int on) { @@ -1142,6 +1150,7 @@ const struct file_operations fuse_dev_operations = { .release = fuse_dev_release, .fasync = fuse_dev_fasync, }; +EXPORT_SYMBOL_GPL(fuse_dev_operations); static struct miscdevice fuse_miscdevice = { .minor = FUSE_MINOR, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8b8eebc5614..b3089a083d3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -362,19 +362,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } /* - * Synchronous release for the case when something goes wrong in CREATE_OPEN - */ -static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff, - u64 nodeid, int flags) -{ - fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE); - ff->reserved_req->force = 1; - fuse_request_send(fc, ff->reserved_req); - fuse_put_request(fc, ff->reserved_req); - kfree(ff); -} - -/* * Atomic create+open operation * * If the filesystem doesn't support this, then fall back to separate @@ -445,12 +432,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, goto out_free_ff; fuse_put_request(fc, req); + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); - ff->fh = outopen.fh; - fuse_sync_release(fc, ff, outentry.nodeid, flags); + fuse_sync_release(ff, flags); fuse_send_forget(fc, forget_req, outentry.nodeid, 1); return -ENOMEM; } @@ -460,11 +449,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_invalidate_attr(dir); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { - ff->fh = outopen.fh; - fuse_sync_release(fc, ff, outentry.nodeid, flags); + fuse_sync_release(ff, flags); return PTR_ERR(file); } - fuse_finish_open(inode, file, ff, &outopen); + file->private_data = fuse_file_get(ff); + fuse_finish_open(inode, file); return 0; out_free_ff: @@ -1035,7 +1024,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; @@ -1101,12 +1090,14 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c) static int fuse_dir_open(struct inode *inode, struct file *file) { - return fuse_open_common(inode, file, 1); + return fuse_open_common(inode, file, true); } static int fuse_dir_release(struct inode *inode, struct file *file) { - return fuse_release_common(inode, file, 1); + fuse_release_common(file, FUSE_RELEASEDIR); + + return 0; } static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06f30e96567..fce6ce694fd 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -12,13 +12,13 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/module.h> static const struct file_operations fuse_direct_io_file_operations; -static int fuse_send_open(struct inode *inode, struct file *file, int isdir, - struct fuse_open_out *outargp) +static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + int opcode, struct fuse_open_out *outargp) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_open_in inarg; struct fuse_req *req; int err; @@ -31,8 +31,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); if (!fc->atomic_o_trunc) inarg.flags &= ~O_TRUNC; - req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; - req->in.h.nodeid = get_node_id(inode); + req->in.h.opcode = opcode; + req->in.h.nodeid = nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -49,22 +49,27 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; + ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); - if (ff) { - ff->reserved_req = fuse_request_alloc(); - if (!ff->reserved_req) { - kfree(ff); - return NULL; - } else { - INIT_LIST_HEAD(&ff->write_entry); - atomic_set(&ff->count, 0); - spin_lock(&fc->lock); - ff->kh = ++fc->khctr; - spin_unlock(&fc->lock); - } - RB_CLEAR_NODE(&ff->polled_node); - init_waitqueue_head(&ff->poll_wait); + if (unlikely(!ff)) + return NULL; + + ff->fc = fc; + ff->reserved_req = fuse_request_alloc(); + if (unlikely(!ff->reserved_req)) { + kfree(ff); + return NULL; } + + INIT_LIST_HEAD(&ff->write_entry); + atomic_set(&ff->count, 0); + RB_CLEAR_NODE(&ff->polled_node); + init_waitqueue_head(&ff->poll_wait); + + spin_lock(&fc->lock); + ff->kh = ++fc->khctr; + spin_unlock(&fc->lock); + return ff; } @@ -74,7 +79,7 @@ void fuse_file_free(struct fuse_file *ff) kfree(ff); } -static struct fuse_file *fuse_file_get(struct fuse_file *ff) +struct fuse_file *fuse_file_get(struct fuse_file *ff) { atomic_inc(&ff->count); return ff; @@ -82,40 +87,65 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { - dput(req->misc.release.dentry); - mntput(req->misc.release.vfsmount); + path_put(&req->misc.release.path); } static void fuse_file_put(struct fuse_file *ff) { if (atomic_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - struct inode *inode = req->misc.release.dentry->d_inode; - struct fuse_conn *fc = get_fuse_conn(inode); + req->end = fuse_release_end; - fuse_request_send_background(fc, req); + fuse_request_send_background(ff->fc, req); kfree(ff); } } -void fuse_finish_open(struct inode *inode, struct file *file, - struct fuse_file *ff, struct fuse_open_out *outarg) +int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + bool isdir) { - if (outarg->open_flags & FOPEN_DIRECT_IO) + struct fuse_open_out outarg; + struct fuse_file *ff; + int err; + int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; + + ff = fuse_file_alloc(fc); + if (!ff) + return -ENOMEM; + + err = fuse_send_open(fc, nodeid, file, opcode, &outarg); + if (err) { + fuse_file_free(ff); + return err; + } + + if (isdir) + outarg.open_flags &= ~FOPEN_DIRECT_IO; + + ff->fh = outarg.fh; + ff->nodeid = nodeid; + ff->open_flags = outarg.open_flags; + file->private_data = fuse_file_get(ff); + + return 0; +} +EXPORT_SYMBOL_GPL(fuse_do_open); + +void fuse_finish_open(struct inode *inode, struct file *file) +{ + struct fuse_file *ff = file->private_data; + + if (ff->open_flags & FOPEN_DIRECT_IO) file->f_op = &fuse_direct_io_file_operations; - if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) + if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); - if (outarg->open_flags & FOPEN_NONSEEKABLE) + if (ff->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); - ff->fh = outarg->fh; - file->private_data = fuse_file_get(ff); } -int fuse_open_common(struct inode *inode, struct file *file, int isdir) +int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_open_out outarg; - struct fuse_file *ff; int err; /* VFS checks this, but only _after_ ->open() */ @@ -126,78 +156,85 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) if (err) return err; - ff = fuse_file_alloc(fc); - if (!ff) - return -ENOMEM; - - err = fuse_send_open(inode, file, isdir, &outarg); + err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (err) - fuse_file_free(ff); - else { - if (isdir) - outarg.open_flags &= ~FOPEN_DIRECT_IO; - fuse_finish_open(inode, file, ff, &outarg); - } + return err; - return err; + fuse_finish_open(inode, file); + + return 0; } -void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode) +static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) { + struct fuse_conn *fc = ff->fc; struct fuse_req *req = ff->reserved_req; struct fuse_release_in *inarg = &req->misc.release.in; + spin_lock(&fc->lock); + list_del(&ff->write_entry); + if (!RB_EMPTY_NODE(&ff->polled_node)) + rb_erase(&ff->polled_node, &fc->polled_files); + spin_unlock(&fc->lock); + + wake_up_interruptible_sync(&ff->poll_wait); + inarg->fh = ff->fh; inarg->flags = flags; req->in.h.opcode = opcode; - req->in.h.nodeid = nodeid; + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_release_in); req->in.args[0].value = inarg; } -int fuse_release_common(struct inode *inode, struct file *file, int isdir) +void fuse_release_common(struct file *file, int opcode) { - struct fuse_file *ff = file->private_data; - if (ff) { - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = ff->reserved_req; - - fuse_release_fill(ff, get_node_id(inode), file->f_flags, - isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); + struct fuse_file *ff; + struct fuse_req *req; - /* Hold vfsmount and dentry until release is finished */ - req->misc.release.vfsmount = mntget(file->f_path.mnt); - req->misc.release.dentry = dget(file->f_path.dentry); + ff = file->private_data; + if (unlikely(!ff)) + return; - spin_lock(&fc->lock); - list_del(&ff->write_entry); - if (!RB_EMPTY_NODE(&ff->polled_node)) - rb_erase(&ff->polled_node, &fc->polled_files); - spin_unlock(&fc->lock); + req = ff->reserved_req; + fuse_prepare_release(ff, file->f_flags, opcode); - wake_up_interruptible_sync(&ff->poll_wait); - /* - * Normally this will send the RELEASE request, - * however if some asynchronous READ or WRITE requests - * are outstanding, the sending will be delayed - */ - fuse_file_put(ff); - } + /* Hold vfsmount and dentry until release is finished */ + path_get(&file->f_path); + req->misc.release.path = file->f_path; - /* Return value is ignored by VFS */ - return 0; + /* + * Normally this will send the RELEASE request, however if + * some asynchronous READ or WRITE requests are outstanding, + * the sending will be delayed. + */ + fuse_file_put(ff); } static int fuse_open(struct inode *inode, struct file *file) { - return fuse_open_common(inode, file, 0); + return fuse_open_common(inode, file, false); } static int fuse_release(struct inode *inode, struct file *file) { - return fuse_release_common(inode, file, 0); + fuse_release_common(file, FUSE_RELEASE); + + /* return value is ignored by VFS */ + return 0; +} + +void fuse_sync_release(struct fuse_file *ff, int flags) +{ + WARN_ON(atomic_read(&ff->count) > 1); + fuse_prepare_release(ff, flags, FUSE_RELEASE); + ff->reserved_req->force = 1; + fuse_request_send(ff->fc, ff->reserved_req); + fuse_put_request(ff->fc, ff->reserved_req); + kfree(ff); } +EXPORT_SYMBOL_GPL(fuse_sync_release); /* * Scramble the ID space with XTEA, so that the value of the files_struct @@ -371,8 +408,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync) return fuse_fsync_common(file, de, datasync, 0); } -void fuse_read_fill(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, int opcode) +void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, + size_t count, int opcode) { struct fuse_read_in *inarg = &req->misc.read.in; struct fuse_file *ff = file->private_data; @@ -382,7 +419,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, inarg->size = count; inarg->flags = file->f_flags; req->in.h.opcode = opcode; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_read_in); req->in.args[0].value = inarg; @@ -392,12 +429,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, } static size_t fuse_send_read(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - fl_owner_t owner) + loff_t pos, size_t count, fl_owner_t owner) { - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; - fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + fuse_read_fill(req, file, pos, count, FUSE_READ); if (owner != NULL) { struct fuse_read_in *inarg = &req->misc.read.in; @@ -455,7 +492,7 @@ static int fuse_readpage(struct file *file, struct page *page) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - num_read = fuse_send_read(req, file, inode, pos, count, NULL); + num_read = fuse_send_read(req, file, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); @@ -504,19 +541,18 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) fuse_file_put(req->ff); } -static void fuse_send_readpages(struct fuse_req *req, struct file *file, - struct inode *inode) +static void fuse_send_readpages(struct fuse_req *req, struct file *file) { - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; loff_t pos = page_offset(req->pages[0]); size_t count = req->num_pages << PAGE_CACHE_SHIFT; req->out.argpages = 1; req->out.page_zeroing = 1; - fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + fuse_read_fill(req, file, pos, count, FUSE_READ); req->misc.read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { - struct fuse_file *ff = file->private_data; req->ff = fuse_file_get(ff); req->end = fuse_readpages_end; fuse_request_send_background(fc, req); @@ -546,7 +582,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || req->pages[req->num_pages - 1]->index + 1 != page->index)) { - fuse_send_readpages(req, data->file, inode); + fuse_send_readpages(req, data->file); data->req = req = fuse_get_req(fc); if (IS_ERR(req)) { unlock_page(page); @@ -580,7 +616,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); if (!err) { if (data.req->num_pages) - fuse_send_readpages(data.req, file, inode); + fuse_send_readpages(data.req, file); else fuse_put_request(fc, data.req); } @@ -607,24 +643,19 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_read(iocb, iov, nr_segs, pos); } -static void fuse_write_fill(struct fuse_req *req, struct file *file, - struct fuse_file *ff, struct inode *inode, - loff_t pos, size_t count, int writepage) +static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, + loff_t pos, size_t count) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_write_out *outarg = &req->misc.write.out; - memset(inarg, 0, sizeof(struct fuse_write_in)); inarg->fh = ff->fh; inarg->offset = pos; inarg->size = count; - inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; - inarg->flags = file ? file->f_flags : 0; req->in.h.opcode = FUSE_WRITE; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 2; - if (fc->minor < 9) + if (ff->fc->minor < 9) req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; else req->in.args[0].size = sizeof(struct fuse_write_in); @@ -636,13 +667,15 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file, } static size_t fuse_send_write(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - fl_owner_t owner) + loff_t pos, size_t count, fl_owner_t owner) { - struct fuse_conn *fc = get_fuse_conn(inode); - fuse_write_fill(req, file, file->private_data, inode, pos, count, 0); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + struct fuse_write_in *inarg = &req->misc.write.in; + + fuse_write_fill(req, ff, pos, count); + inarg->flags = file->f_flags; if (owner != NULL) { - struct fuse_write_in *inarg = &req->misc.write.in; inarg->write_flags |= FUSE_WRITE_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } @@ -700,7 +733,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, req->num_pages = 1; req->pages[0] = page; req->page_offset = offset; - nres = fuse_send_write(req, file, inode, pos, count, NULL); + nres = fuse_send_write(req, file, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); if (!err && !nres) @@ -741,7 +774,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, for (i = 0; i < req->num_pages; i++) fuse_wait_on_page_writeback(inode, req->pages[i]->index); - res = fuse_send_write(req, file, inode, pos, count, NULL); + res = fuse_send_write(req, file, pos, count, NULL); offset = req->page_offset; count = res; @@ -979,25 +1012,23 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, return 0; } -static ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write) +ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write) { - struct inode *inode = file->f_path.dentry->d_inode; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; size_t nmax = write ? fc->max_write : fc->max_read; loff_t pos = *ppos; ssize_t res = 0; struct fuse_req *req; - if (is_bad_inode(inode)) - return -EIO; - req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); while (count) { size_t nres; + fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); int err = fuse_get_user_pages(req, buf, &nbytes, write); if (err) { @@ -1006,11 +1037,10 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, } if (write) - nres = fuse_send_write(req, file, inode, pos, nbytes, - current->files); + nres = fuse_send_write(req, file, pos, nbytes, owner); else - nres = fuse_send_read(req, file, inode, pos, nbytes, - current->files); + nres = fuse_send_read(req, file, pos, nbytes, owner); + fuse_release_user_pages(req, !write); if (req->out.h.error) { if (!res) @@ -1034,20 +1064,27 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, } } fuse_put_request(fc, req); - if (res > 0) { - if (write) - fuse_write_update_size(inode, pos); + if (res > 0) *ppos = pos; - } - fuse_invalidate_attr(inode); return res; } +EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t fuse_direct_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - return fuse_direct_io(file, buf, count, ppos, 0); + ssize_t res; + struct inode *inode = file->f_path.dentry->d_inode; + + if (is_bad_inode(inode)) + return -EIO; + + res = fuse_direct_io(file, buf, count, ppos, 0); + + fuse_invalidate_attr(inode); + + return res; } static ssize_t fuse_direct_write(struct file *file, const char __user *buf, @@ -1055,12 +1092,22 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, { struct inode *inode = file->f_path.dentry->d_inode; ssize_t res; + + if (is_bad_inode(inode)) + return -EIO; + /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); res = generic_write_checks(file, ppos, &count, 0); - if (!res) + if (!res) { res = fuse_direct_io(file, buf, count, ppos, 1); + if (res > 0) + fuse_write_update_size(inode, *ppos); + } mutex_unlock(&inode->i_mutex); + + fuse_invalidate_attr(inode); + return res; } @@ -1177,9 +1224,10 @@ static int fuse_writepage_locked(struct page *page) req->ff = fuse_file_get(ff); spin_unlock(&fc->lock); - fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); + fuse_write_fill(req, ff, page_offset(page), 0); copy_highpage(tmp_page, page); + req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; req->in.argpages = 1; req->num_pages = 1; req->pages[0] = tmp_page; @@ -1603,12 +1651,11 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, * limits ioctl data transfers to well-formed ioctls and is the forced * behavior for all FUSE servers. */ -static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, - unsigned long arg, unsigned int flags) +long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, + unsigned int flags) { - struct inode *inode = file->f_dentry->d_inode; struct fuse_file *ff = file->private_data; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_conn *fc = ff->fc; struct fuse_ioctl_in inarg = { .fh = ff->fh, .cmd = cmd, @@ -1627,13 +1674,6 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, /* assume all the iovs returned by client always fits in a page */ BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); - if (!fuse_allow_task(fc, current)) - return -EACCES; - - err = -EIO; - if (is_bad_inode(inode)) - goto out; - err = -ENOMEM; pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); iov_page = alloc_page(GFP_KERNEL); @@ -1694,7 +1734,7 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, /* okay, let's send it to the client */ req->in.h.opcode = FUSE_IOCTL; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1777,17 +1817,33 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, return err ? err : outarg.result; } +EXPORT_SYMBOL_GPL(fuse_do_ioctl); + +static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fuse_allow_task(fc, current)) + return -EACCES; + + if (is_bad_inode(inode)) + return -EIO; + + return fuse_do_ioctl(file, cmd, arg, flags); +} static long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return fuse_file_do_ioctl(file, cmd, arg, 0); + return fuse_file_ioctl_common(file, cmd, arg, 0); } static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT); + return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); } /* @@ -1841,11 +1897,10 @@ static void fuse_register_polled_file(struct fuse_conn *fc, spin_unlock(&fc->lock); } -static unsigned fuse_file_poll(struct file *file, poll_table *wait) +unsigned fuse_file_poll(struct file *file, poll_table *wait) { - struct inode *inode = file->f_dentry->d_inode; struct fuse_file *ff = file->private_data; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_conn *fc = ff->fc; struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; struct fuse_poll_out outarg; struct fuse_req *req; @@ -1870,7 +1925,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait) return PTR_ERR(req); req->in.h.opcode = FUSE_POLL; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1889,6 +1944,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait) } return POLLERR; } +EXPORT_SYMBOL_GPL(fuse_file_poll); /* * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6fc5aedaa0d..aaf2f9ff970 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -97,8 +97,13 @@ struct fuse_inode { struct list_head writepages; }; +struct fuse_conn; + /** FUSE specific file data */ struct fuse_file { + /** Fuse connection for this file */ + struct fuse_conn *fc; + /** Request reserved for flush and release */ struct fuse_req *reserved_req; @@ -108,9 +113,15 @@ struct fuse_file { /** File handle used by userspace */ u64 fh; + /** Node id of this file */ + u64 nodeid; + /** Refcount */ atomic_t count; + /** FOPEN_* flags returned by open */ + u32 open_flags; + /** Entry on inode's write_files list */ struct list_head write_entry; @@ -185,8 +196,6 @@ enum fuse_req_state { FUSE_REQ_FINISHED }; -struct fuse_conn; - /** * A request to the client */ @@ -248,11 +257,12 @@ struct fuse_req { struct fuse_forget_in forget_in; struct { struct fuse_release_in in; - struct vfsmount *vfsmount; - struct dentry *dentry; + struct path path; } release; struct fuse_init_in init_in; struct fuse_init_out init_out; + struct cuse_init_in cuse_init_in; + struct cuse_init_out cuse_init_out; struct { struct fuse_read_in in; u64 attr_ver; @@ -386,6 +396,9 @@ struct fuse_conn { /** Filesystem supports NFS exporting. Only set in INIT */ unsigned export_support:1; + /** Set if bdi is valid */ + unsigned bdi_initialized:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -515,25 +528,24 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, * Initialize READ or READDIR request */ void fuse_read_fill(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, int opcode); + loff_t pos, size_t count, int opcode); /** * Send OPEN or OPENDIR request */ -int fuse_open_common(struct inode *inode, struct file *file, int isdir); +int fuse_open_common(struct inode *inode, struct file *file, bool isdir); struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); +struct fuse_file *fuse_file_get(struct fuse_file *ff); void fuse_file_free(struct fuse_file *ff); -void fuse_finish_open(struct inode *inode, struct file *file, - struct fuse_file *ff, struct fuse_open_out *outarg); +void fuse_finish_open(struct inode *inode, struct file *file); -/** Fill in ff->reserved_req with a RELEASE request */ -void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode); +void fuse_sync_release(struct fuse_file *ff, int flags); /** * Send RELEASE or RELEASEDIR request */ -int fuse_release_common(struct inode *inode, struct file *file, int isdir); +void fuse_release_common(struct file *file, int opcode); /** * Send FSYNC or FSYNCDIR request @@ -652,10 +664,12 @@ void fuse_invalidate_entry_cache(struct dentry *entry); */ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); +void fuse_conn_kill(struct fuse_conn *fc); + /** * Initialize fuse_conn */ -int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb); +void fuse_conn_init(struct fuse_conn *fc); /** * Release reference to fuse_conn @@ -694,4 +708,13 @@ void fuse_release_nowrite(struct inode *inode); u64 fuse_get_attr_version(struct fuse_conn *fc); +int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + bool isdir); +ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write); +long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, + unsigned int flags); +unsigned fuse_file_poll(struct file *file, poll_table *wait); +int fuse_dev_release(struct inode *inode, struct file *file); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 91f7c85f1ff..f0df55a5292 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -277,11 +277,14 @@ static void fuse_send_destroy(struct fuse_conn *fc) } } -static void fuse_put_super(struct super_block *sb) +static void fuse_bdi_destroy(struct fuse_conn *fc) { - struct fuse_conn *fc = get_fuse_conn_super(sb); + if (fc->bdi_initialized) + bdi_destroy(&fc->bdi); +} - fuse_send_destroy(fc); +void fuse_conn_kill(struct fuse_conn *fc) +{ spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; @@ -295,7 +298,16 @@ static void fuse_put_super(struct super_block *sb) list_del(&fc->entry); fuse_ctl_remove_conn(fc); mutex_unlock(&fuse_mutex); - bdi_destroy(&fc->bdi); + fuse_bdi_destroy(fc); +} +EXPORT_SYMBOL_GPL(fuse_conn_kill); + +static void fuse_put_super(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + fuse_send_destroy(fc); + fuse_conn_kill(fc); fuse_conn_put(fc); } @@ -466,10 +478,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) +void fuse_conn_init(struct fuse_conn *fc) { - int err; - memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); @@ -484,49 +494,12 @@ int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); atomic_set(&fc->num_waiting, 0); - fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; - fc->bdi.unplug_io_fn = default_unplug_io_fn; - /* fuse does it's own writeback accounting */ - fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; fc->khctr = 0; fc->polled_files = RB_ROOT; - fc->dev = sb->s_dev; - err = bdi_init(&fc->bdi); - if (err) - goto error_mutex_destroy; - if (sb->s_bdev) { - err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", - MAJOR(fc->dev), MINOR(fc->dev)); - } else { - err = bdi_register_dev(&fc->bdi, fc->dev); - } - if (err) - goto error_bdi_destroy; - /* - * For a single fuse filesystem use max 1% of dirty + - * writeback threshold. - * - * This gives about 1M of write buffer for memory maps on a - * machine with 1G and 10% dirty_ratio, which should be more - * than enough. - * - * Privileged users can raise it by writing to - * - * /sys/class/bdi/<bdi>/max_ratio - */ - bdi_set_max_ratio(&fc->bdi, 1); fc->reqctr = 0; fc->blocked = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); - - return 0; - - error_bdi_destroy: - bdi_destroy(&fc->bdi); - error_mutex_destroy: - mutex_destroy(&fc->inst_mutex); - return err; } EXPORT_SYMBOL_GPL(fuse_conn_init); @@ -539,12 +512,14 @@ void fuse_conn_put(struct fuse_conn *fc) fc->release(fc); } } +EXPORT_SYMBOL_GPL(fuse_conn_put); struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) { atomic_inc(&fc->count); return fc; } +EXPORT_SYMBOL_GPL(fuse_conn_get); static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) { @@ -797,6 +772,48 @@ static void fuse_free_conn(struct fuse_conn *fc) kfree(fc); } +static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) +{ + int err; + + fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + fc->bdi.unplug_io_fn = default_unplug_io_fn; + /* fuse does it's own writeback accounting */ + fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; + + err = bdi_init(&fc->bdi); + if (err) + return err; + + fc->bdi_initialized = 1; + + if (sb->s_bdev) { + err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", + MAJOR(fc->dev), MINOR(fc->dev)); + } else { + err = bdi_register_dev(&fc->bdi, fc->dev); + } + + if (err) + return err; + + /* + * For a single fuse filesystem use max 1% of dirty + + * writeback threshold. + * + * This gives about 1M of write buffer for memory maps on a + * machine with 1G and 10% dirty_ratio, which should be more + * than enough. + * + * Privileged users can raise it by writing to + * + * /sys/class/bdi/<bdi>/max_ratio + */ + bdi_set_max_ratio(&fc->bdi, 1); + + return 0; +} + static int fuse_fill_super(struct super_block *sb, void *data, int silent) { struct fuse_conn *fc; @@ -843,11 +860,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!fc) goto err_fput; - err = fuse_conn_init(fc, sb); - if (err) { - kfree(fc); - goto err_fput; - } + fuse_conn_init(fc); + + fc->dev = sb->s_dev; + err = fuse_bdi_init(fc, sb); + if (err) + goto err_put_conn; fc->release = fuse_free_conn; fc->flags = d.flags; @@ -911,7 +929,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err_put_root: dput(root_dentry); err_put_conn: - bdi_destroy(&fc->bdi); + fuse_bdi_destroy(fc); fuse_conn_put(fc); err_fput: fput(file); diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index d53a9bea1c2..3da2f1f4f73 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,3 +1,4 @@ +EXTRA_CFLAGS := -I$(src) obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o log.o lops.o main.o meta_io.o \ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 329763530dc..6d47379e794 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -25,6 +25,7 @@ #include "trans.h" #include "dir.h" #include "util.h" +#include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to @@ -589,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, clear_buffer_mapped(bh_map); clear_buffer_new(bh_map); clear_buffer_boundary(bh_map); + trace_gfs2_bmap(ip, bh_map, lblock, create, 1); if (gfs2_is_dir(ip)) { bsize = sdp->sd_jbsize; arr = sdp->sd_jheightsize; @@ -623,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, ret = 0; out: release_metapath(&mp); + trace_gfs2_bmap(ip, bh_map, lblock, create, ret); bmap_unlock(ip, create); return ret; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2bf62bcc518..297421c0427 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -39,6 +39,8 @@ #include "super.h" #include "util.h" #include "bmap.h" +#define CREATE_TRACE_POINTS +#include "trace_gfs2.h" struct gfs2_gl_hash_bucket { struct hlist_head hb_list; @@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl) if (aspace) gfs2_aspace_put(aspace); - + trace_gfs2_glock_put(gl); sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl); } @@ -317,14 +319,17 @@ restart: return 2; gh->gh_error = ret; list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); gfs2_holder_wake(gh); goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 1); gfs2_holder_wake(gh); goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 0); gfs2_holder_wake(gh); continue; } @@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret) else continue; list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); gfs2_holder_wake(gh); } } @@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) int rv; spin_lock(&gl->gl_spin); + trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); @@ -851,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, gl->gl_demote_state != state) { gl->gl_demote_state = LM_ST_UNLOCKED; } + trace_gfs2_demote_rq(gl); } /** @@ -936,6 +944,7 @@ fail: goto do_cancel; return; } + trace_gfs2_glock_queue(gh, 1); list_add_tail(&gh->gh_list, insert_pt); do_cancel: gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); @@ -1032,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) return; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f2e449c595b..13c6237c5f6 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -28,6 +28,7 @@ #include "meta_io.h" #include "util.h" #include "dir.h" +#include "trace_gfs2.h" #define PULL 1 @@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) gfs2_log_lock(sdp); } atomic_sub(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, -blks); gfs2_log_unlock(sdp); mutex_unlock(&sdp->sd_log_reserve_mutex); @@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) gfs2_log_lock(sdp); atomic_add(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, blks); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); @@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) gfs2_log_lock(sdp); atomic_add(dist, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, dist); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); @@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) up_write(&sdp->sd_log_flush_lock); return; } + trace_gfs2_log_flush(sdp, 1); ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ai->ai_ail1_list); @@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ gfs2_log_lock(sdp); atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ + trace_gfs2_log_blocks(sdp, -1); gfs2_log_unlock(sdp); log_write_header(sdp, 0, PULL); } @@ -763,7 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) ai = NULL; } gfs2_log_unlock(sdp); - + trace_gfs2_log_flush(sdp, 0); up_write(&sdp->sd_log_flush_lock); kfree(ai); @@ -787,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; atomic_add(unused, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, unused); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); sdp->sd_log_blks_reserved = reserved; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 00315f50fa4..9969ff062c5 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -27,6 +27,7 @@ #include "rgrp.h" #include "trans.h" #include "util.h" +#include "trace_gfs2.h" /** * gfs2_pin - Pin a buffer in memory @@ -53,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) if (bd->bd_ail) list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); get_bh(bh); + trace_gfs2_pin(bd, 1); } /** @@ -89,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, bd->bd_ail = ai; list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + trace_gfs2_pin(bd, 0); gfs2_log_unlock(sdp); unlock_buffer(bh); } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index cc34f271b3e..7bc3c45cd67 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -33,6 +33,7 @@ #include "log.h" #include "quota.h" #include "dir.h" +#include "trace_gfs2.h" #define DO 0 #define UNDO 1 @@ -775,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) /* Map the extents for this journal's blocks */ map_journal_extents(sdp); } + trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); if (sdp->sd_lockstruct.ls_first) { unsigned int x; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index de3239731db..daa4ae341a2 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -29,6 +29,7 @@ #include "util.h" #include "log.h" #include "inode.h" +#include "trace_gfs2.h" #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) @@ -1519,7 +1520,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) spin_lock(&sdp->sd_rindex_spin); rgd->rd_free_clone -= *n; spin_unlock(&sdp->sd_rindex_spin); - + trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); *bn = block; return 0; @@ -1571,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) spin_lock(&sdp->sd_rindex_spin); rgd->rd_free_clone--; spin_unlock(&sdp->sd_rindex_spin); - + trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); return block; } @@ -1591,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); if (!rgd) return; - + trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1619,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); if (!rgd) return; - + trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1642,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode) rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); if (!rgd) return; + trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); gfs2_trans_add_rg(rgd); @@ -1673,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { gfs2_free_uninit_di(rgd, ip->i_no_addr); + trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE); gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_meta_wipe(ip, ip->i_no_addr, 1); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index c8930b31cdf..0a680133647 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -719,8 +719,6 @@ static void gfs2_put_super(struct super_block *sb) int error; struct gfs2_jdesc *jd; - lock_kernel(); - /* Unfreeze the filesystem, if we need to */ mutex_lock(&sdp->sd_freeze_lock); @@ -787,8 +785,6 @@ restart: /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); - - unlock_kernel(); } /** diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h new file mode 100644 index 00000000000..98d6ef1c1dc --- /dev/null +++ b/fs/gfs2/trace_gfs2.h @@ -0,0 +1,407 @@ +#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GFS2_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gfs2 +#define TRACE_INCLUDE_FILE trace_gfs2 + +#include <linux/fs.h> +#include <linux/buffer_head.h> +#include <linux/dlmconstants.h> +#include <linux/gfs2_ondisk.h> +#include "incore.h" +#include "glock.h" + +#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } +#define glock_trace_name(x) __print_symbolic(x, \ + dlm_state_name(IV), \ + dlm_state_name(NL), \ + dlm_state_name(CR), \ + dlm_state_name(CW), \ + dlm_state_name(PR), \ + dlm_state_name(PW), \ + dlm_state_name(EX)) + +#define block_state_name(x) __print_symbolic(x, \ + { GFS2_BLKST_FREE, "free" }, \ + { GFS2_BLKST_USED, "used" }, \ + { GFS2_BLKST_DINODE, "dinode" }, \ + { GFS2_BLKST_UNLINKED, "unlinked" }) + +#define show_glock_flags(flags) __print_flags(flags, "", \ + {(1UL << GLF_LOCK), "l" }, \ + {(1UL << GLF_DEMOTE), "D" }, \ + {(1UL << GLF_PENDING_DEMOTE), "d" }, \ + {(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \ + {(1UL << GLF_DIRTY), "y" }, \ + {(1UL << GLF_LFLUSH), "f" }, \ + {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ + {(1UL << GLF_REPLY_PENDING), "r" }, \ + {(1UL << GLF_INITIAL), "I" }, \ + {(1UL << GLF_FROZEN), "F" }) + +#ifndef NUMPTY +#define NUMPTY +static inline u8 glock_trace_state(unsigned int state) +{ + switch(state) { + case LM_ST_SHARED: + return DLM_LOCK_PR; + case LM_ST_DEFERRED: + return DLM_LOCK_CW; + case LM_ST_EXCLUSIVE: + return DLM_LOCK_EX; + } + return DLM_LOCK_NL; +} +#endif + +/* Section 1 - Locking + * + * Objectives: + * Latency: Remote demote request to state change + * Latency: Local lock request to state change + * Latency: State change to lock grant + * Correctness: Ordering of local lock state vs. I/O requests + * Correctness: Responses to remote demote requests + */ + +/* General glock state change (DLM lock request completes) */ +TRACE_EVENT(gfs2_glock_state_change, + + TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state), + + TP_ARGS(gl, new_state), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( u8, new_state ) + __field( u8, dmt_state ) + __field( u8, tgt_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gl->gl_name.ln_number; + __entry->gltype = gl->gl_name.ln_type; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->new_state = glock_trace_state(new_state); + __entry->tgt_state = glock_trace_state(gl->gl_target); + __entry->dmt_state = glock_trace_state(gl->gl_demote_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(__entry->new_state), + glock_trace_name(__entry->tgt_state), + glock_trace_name(__entry->dmt_state), + show_glock_flags(__entry->flags)) +); + +/* State change -> unlocked, glock is being deallocated */ +TRACE_EVENT(gfs2_glock_put, + + TP_PROTO(const struct gfs2_glock *gl), + + TP_ARGS(gl), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->gltype = gl->gl_name.ln_type; + __entry->glnum = gl->gl_name.ln_number; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->gltype, (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(DLM_LOCK_IV), + show_glock_flags(__entry->flags)) + +); + +/* Callback (local or remote) requesting lock demotion */ +TRACE_EVENT(gfs2_demote_rq, + + TP_PROTO(const struct gfs2_glock *gl), + + TP_ARGS(gl), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( u8, dmt_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->gltype = gl->gl_name.ln_type; + __entry->glnum = gl->gl_name.ln_number; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->dmt_state = glock_trace_state(gl->gl_demote_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(__entry->dmt_state), + show_glock_flags(__entry->flags)) + +); + +/* Promotion/grant of a glock */ +TRACE_EVENT(gfs2_promote, + + TP_PROTO(const struct gfs2_holder *gh, int first), + + TP_ARGS(gh, first), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( int, first ) + __field( u8, state ) + ), + + TP_fast_assign( + __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gh->gh_gl->gl_name.ln_number; + __entry->gltype = gh->gh_gl->gl_name.ln_type; + __entry->first = first; + __entry->state = glock_trace_state(gh->gh_state); + ), + + TP_printk("%u,%u glock %u:%llu promote %s %s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + __entry->first ? "first": "other", + glock_trace_name(__entry->state)) +); + +/* Queue/dequeue a lock request */ +TRACE_EVENT(gfs2_glock_queue, + + TP_PROTO(const struct gfs2_holder *gh, int queue), + + TP_ARGS(gh, queue), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( int, queue ) + __field( u8, state ) + ), + + TP_fast_assign( + __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gh->gh_gl->gl_name.ln_number; + __entry->gltype = gh->gh_gl->gl_name.ln_type; + __entry->queue = queue; + __entry->state = glock_trace_state(gh->gh_state); + ), + + TP_printk("%u,%u glock %u:%llu %squeue %s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + __entry->queue ? "" : "de", + glock_trace_name(__entry->state)) +); + +/* Section 2 - Log/journal + * + * Objectives: + * Latency: Log flush time + * Correctness: pin/unpin vs. disk I/O ordering + * Performance: Log usage stats + */ + +/* Pin/unpin a block in the log */ +TRACE_EVENT(gfs2_pin, + + TP_PROTO(const struct gfs2_bufdata *bd, int pin), + + TP_ARGS(bd, pin), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, pin ) + __field( u32, len ) + __field( sector_t, block ) + __field( u64, ino ) + ), + + TP_fast_assign( + __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev; + __entry->pin = pin; + __entry->len = bd->bd_bh->b_size; + __entry->block = bd->bd_bh->b_blocknr; + __entry->ino = bd->bd_gl->gl_name.ln_number; + ), + + TP_printk("%u,%u log %s %llu/%lu inode %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->pin ? "pin" : "unpin", + (unsigned long long)__entry->block, + (unsigned long)__entry->len, + (unsigned long long)__entry->ino) +); + +/* Flushing the log */ +TRACE_EVENT(gfs2_log_flush, + + TP_PROTO(const struct gfs2_sbd *sdp, int start), + + TP_ARGS(sdp, start), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, start ) + __field( u64, log_seq ) + ), + + TP_fast_assign( + __entry->dev = sdp->sd_vfs->s_dev; + __entry->start = start; + __entry->log_seq = sdp->sd_log_sequence; + ), + + TP_printk("%u,%u log flush %s %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->start ? "start" : "end", + (unsigned long long)__entry->log_seq) +); + +/* Reserving/releasing blocks in the log */ +TRACE_EVENT(gfs2_log_blocks, + + TP_PROTO(const struct gfs2_sbd *sdp, int blocks), + + TP_ARGS(sdp, blocks), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, blocks ) + ), + + TP_fast_assign( + __entry->dev = sdp->sd_vfs->s_dev; + __entry->blocks = blocks; + ), + + TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->blocks) +); + +/* Section 3 - bmap + * + * Objectives: + * Latency: Bmap request time + * Performance: Block allocator tracing + * Correctness: Test of disard generation vs. blocks allocated + */ + +/* Map an extent of blocks, possibly a new allocation */ +TRACE_EVENT(gfs2_bmap, + + TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh, + sector_t lblock, int create, int errno), + + TP_ARGS(ip, bh, lblock, create, errno), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, lblock ) + __field( sector_t, pblock ) + __field( u64, inum ) + __field( unsigned long, state ) + __field( u32, len ) + __field( int, create ) + __field( int, errno ) + ), + + TP_fast_assign( + __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->lblock = lblock; + __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0; + __entry->inum = ip->i_no_addr; + __entry->state = bh->b_state; + __entry->len = bh->b_size; + __entry->create = create; + __entry->errno = errno; + ), + + TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->lblock, + (unsigned long)__entry->len, + (unsigned long long)__entry->pblock, + __entry->state, __entry->create ? "create " : "nocreate", + __entry->errno) +); + +/* Keep track of blocks as they are allocated/freed */ +TRACE_EVENT(gfs2_block_alloc, + + TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len, + u8 block_state), + + TP_ARGS(ip, block, len, block_state), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, start ) + __field( u64, inum ) + __field( u32, len ) + __field( u8, block_state ) + ), + + TP_fast_assign( + __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->start = block; + __entry->inum = ip->i_no_addr; + __entry->len = len; + __entry->block_state = block_state; + ), + + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->start, + (unsigned long)__entry->len, + block_state_name(__entry->block_state)) +); + +#endif /* _TRACE_GFS2_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> + diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0af36085eb2..1a9c7878f86 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -556,27 +556,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) /* add partitions */ for (p = 1; p < state->limit; p++) { - sector_t size = state->parts[p].size; - sector_t from = state->parts[p].from; + sector_t size, from; +try_scan: + size = state->parts[p].size; if (!size) continue; + + from = state->parts[p].from; if (from >= get_capacity(disk)) { printk(KERN_WARNING "%s: p%d ignored, start %llu is behind the end of the disk\n", disk->disk_name, p, (unsigned long long) from); continue; } + if (from + size > get_capacity(disk)) { - /* - * we can not ignore partitions of broken tables - * created by for example camera firmware, but we - * limit them to the end of the disk to avoid - * creating invalid block devices - */ + struct block_device_operations *bdops = disk->fops; + unsigned long long capacity; + printk(KERN_WARNING - "%s: p%d size %llu limited to end of disk\n", + "%s: p%d size %llu exceeds device capacity, ", disk->disk_name, p, (unsigned long long) size); - size = get_capacity(disk) - from; + + if (bdops->set_capacity && + (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) { + printk(KERN_CONT "enabling native capacity\n"); + capacity = bdops->set_capacity(disk, ~0ULL); + disk->flags |= GENHD_FL_NATIVE_CAPACITY; + if (capacity > get_capacity(disk)) { + set_capacity(disk, capacity); + check_disk_size_change(disk, bdev); + bdev->bd_invalidated = 0; + } + goto try_scan; + } else { + /* + * we can not ignore partitions of broken tables + * created by for example camera firmware, but + * we limit them to the end of the disk to avoid + * creating invalid block devices + */ + printk(KERN_CONT "limited to end of disk\n"); + size = get_capacity(disk) - from; + } } part = add_partition(disk, p, from, size, state->parts[p].flags); |