From d911d98748018f7c8facc035ba39c30f5cce6f9c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Apr 2014 11:07:31 -0400 Subject: kernfs: make kernfs_notify() trigger inotify events too kernfs_notify() is used to indicate either new data is available or the content of a file has changed. It currently only triggers poll which may not be the most convenient to monitor especially when there are a lot to monitor. Let's hook it up to fsnotify too so that the events can be monitored via inotify too. fsnotify_modify() requires file * but kernfs_notify() doesn't have any specific file associated; however, we can walk all super_blocks associated with a kernfs_root and as kernfs always associate one ino with inode and one dentry with an inode, it's trivial to look up the dentry associated with a given kernfs_node. As any active monitor would pin dentry, just looking up existing dentry is enough. This patch looks up the dentry associated with the specified kernfs_node and generates events equivalent to fsnotify_modify(). Note that as fsnotify doesn't provide fsnotify_modify() equivalent which can be called with dentry, kernfs_notify() directly calls fsnotify_parent() and fsnotify(). It might be better to add a wrapper in fsnotify.h instead. Signed-off-by: Tejun Heo Cc: John McCutchan Cc: Robert Love Cc: Eric Paris Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'fs/kernfs/file.c') diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 8034706a7af..98bacd9ea7f 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "kernfs-internal.h" @@ -785,20 +786,48 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait) */ void kernfs_notify(struct kernfs_node *kn) { + struct kernfs_root *root = kernfs_root(kn); struct kernfs_open_node *on; + struct kernfs_super_info *info; unsigned long flags; + if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) + return; + + /* kick poll */ spin_lock_irqsave(&kernfs_open_node_lock, flags); - if (!WARN_ON(kernfs_type(kn) != KERNFS_FILE)) { - on = kn->attr.open; - if (on) { - atomic_inc(&on->event); - wake_up_interruptible(&on->poll); - } + on = kn->attr.open; + if (on) { + atomic_inc(&on->event); + wake_up_interruptible(&on->poll); } spin_unlock_irqrestore(&kernfs_open_node_lock, flags); + + /* kick fsnotify */ + mutex_lock(&kernfs_mutex); + + list_for_each_entry(info, &root->supers, node) { + struct inode *inode; + struct dentry *dentry; + + inode = ilookup(info->sb, kn->ino); + if (!inode) + continue; + + dentry = d_find_any_alias(inode); + if (dentry) { + fsnotify_parent(NULL, dentry, FS_MODIFY); + fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, + NULL, 0); + dput(dentry); + } + + iput(inode); + } + + mutex_unlock(&kernfs_mutex); } EXPORT_SYMBOL_GPL(kernfs_notify); -- cgit v1.2.3-70-g09d2 From 555724a831b4a146e7bdf16ecc989cda032b076d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 May 2014 13:56:27 -0400 Subject: kernfs, sysfs, cgroup: restrict extra perm check on open to sysfs The kernfs open method - kernfs_fop_open() - inherited extra permission checks from sysfs. While the vfs layer allows ignoring the read/write permissions checks if the issuer has CAP_DAC_OVERRIDE, sysfs explicitly denied open regardless of the cap if the file doesn't have any of the UGO perms of the requested access or doesn't implement the requested operation. It can be debated whether this was a good idea or not but the behavior is too subtle and dangerous to change at this point. After cgroup got converted to kernfs, this extra perm check also got applied to cgroup breaking libcgroup which opens write-only files with O_RDWR as root. This patch gates the extra open permission check with a new flag KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK and enables it for sysfs. For sysfs, nothing changes. For cgroup, root now can perform any operation regardless of the permissions as it was before kernfs conversion. Note that kernfs still fails unimplemented operations with -EINVAL. While at it, add comments explaining KERNFS_ROOT flags. Signed-off-by: Tejun Heo Reported-by: Andrey Wagin Tested-by: Andrey Wagin Cc: Li Zefan References: http://lkml.kernel.org/g/CANaxB-xUm3rJ-Cbp72q-rQJO5mZe1qK6qXsQM=vh0U8upJ44+A@mail.gmail.com Fixes: 2bd59d48ebfb ("cgroup: convert to kernfs") Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 17 ++++++++++------- fs/sysfs/mount.c | 3 ++- include/linux/kernfs.h | 19 ++++++++++++++++++- 3 files changed, 30 insertions(+), 9 deletions(-) (limited to 'fs/kernfs/file.c') diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e01ea4a14a0..5e9a80cfc3d 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -610,6 +610,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn, static int kernfs_fop_open(struct inode *inode, struct file *file) { struct kernfs_node *kn = file->f_path.dentry->d_fsdata; + struct kernfs_root *root = kernfs_root(kn); const struct kernfs_ops *ops; struct kernfs_open_file *of; bool has_read, has_write, has_mmap; @@ -624,14 +625,16 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) has_write = ops->write || ops->mmap; has_mmap = ops->mmap; - /* check perms and supported operations */ - if ((file->f_mode & FMODE_WRITE) && - (!(inode->i_mode & S_IWUGO) || !has_write)) - goto err_out; + /* see the flag definition for details */ + if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { + if ((file->f_mode & FMODE_WRITE) && + (!(inode->i_mode & S_IWUGO) || !has_write)) + goto err_out; - if ((file->f_mode & FMODE_READ) && - (!(inode->i_mode & S_IRUGO) || !has_read)) - goto err_out; + if ((file->f_mode & FMODE_READ) && + (!(inode->i_mode & S_IRUGO) || !has_read)) + goto err_out; + } /* allocate a kernfs_open_file for the file */ error = -ENOMEM; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index a66ad6196f5..8794423f7ef 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -63,7 +63,8 @@ int __init sysfs_init(void) { int err; - sysfs_root = kernfs_create_root(NULL, 0, NULL); + sysfs_root = kernfs_create_root(NULL, KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, + NULL); if (IS_ERR(sysfs_root)) return PTR_ERR(sysfs_root); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index b0122dc6f96..ca1be5c9136 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -50,7 +50,24 @@ enum kernfs_node_flag { /* @flags for kernfs_create_root() */ enum kernfs_root_flag { - KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, + /* + * kernfs_nodes are created in the deactivated state and invisible. + * They require explicit kernfs_activate() to become visible. This + * can be used to make related nodes become visible atomically + * after all nodes are created successfully. + */ + KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, + + /* + * For regular flies, if the opener has CAP_DAC_OVERRIDE, open(2) + * succeeds regardless of the RW permissions. sysfs had an extra + * layer of enforcement where open(2) fails with -EACCES regardless + * of CAP_DAC_OVERRIDE if the permission doesn't have the + * respective read or write access at all (none of S_IRUGO or + * S_IWUGO) or the respective operation isn't implemented. The + * following flag enables that behavior. + */ + KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002, }; /* type-specific structures for kernfs_node union members */ -- cgit v1.2.3-70-g09d2 From ecca47ce8294843045e7465d76fee84dbf07a004 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Jul 2014 16:41:03 -0400 Subject: kernfs: kernfs_notify() must be useable from non-sleepable contexts d911d9874801 ("kernfs: make kernfs_notify() trigger inotify events too") added fsnotify triggering to kernfs_notify() which requires a sleepable context. There are already existing users of kernfs_notify() which invoke it from an atomic context and in general it's silly to require a sleepable context for triggering a notification. The following is an invalid context bug triggerd by md invoking sysfs_notify() from IO completion path. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:586 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1 2 locks held by swapper/1/0: #0: (&(&vblk->vq_lock)->rlock){-.-...}, at: [] virtblk_done+0x42/0xe0 [virtio_blk] #1: (&(&bitmap->counts.lock)->rlock){-.....}, at: [] bitmap_endwrite+0x68/0x240 irq event stamp: 33518 hardirqs last enabled at (33515): [] default_idle+0x1f/0x230 hardirqs last disabled at (33516): [] common_interrupt+0x6d/0x72 softirqs last enabled at (33518): [] _local_bh_enable+0x22/0x50 softirqs last disabled at (33517): [] irq_enter+0x60/0x80 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-0.rc2.git2.1.fc21.x86_64 #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000000 f90db13964f4ee05 ffff88007d403b80 ffffffff81807b4c 0000000000000000 ffff88007d403ba8 ffffffff810d4f14 0000000000000000 0000000000441800 ffff880078fa1780 ffff88007d403c38 ffffffff8180caf2 Call Trace: [] dump_stack+0x4d/0x66 [] __might_sleep+0x184/0x240 [] mutex_lock_nested+0x42/0x440 [] kernfs_notify+0x90/0x150 [] bitmap_endwrite+0xcc/0x240 [] close_write+0x93/0xb0 [raid1] [] r1_bio_write_done+0x29/0x50 [raid1] [] raid1_end_write_request+0xe4/0x260 [raid1] [] bio_endio+0x6b/0xa0 [] blk_update_request+0x94/0x420 [] blk_mq_end_io+0x1a/0x70 [] virtblk_request_done+0x32/0x80 [virtio_blk] [] __blk_mq_complete_request+0x88/0x120 [] blk_mq_complete_request+0x2a/0x30 [] virtblk_done+0x66/0xe0 [virtio_blk] [] vring_interrupt+0x3a/0xa0 [virtio_ring] [] handle_irq_event_percpu+0x77/0x340 [] handle_irq_event+0x3d/0x60 [] handle_edge_irq+0x66/0x130 [] handle_irq+0x84/0x150 [] do_IRQ+0x4d/0xe0 [] common_interrupt+0x72/0x72 [] ? native_safe_halt+0x6/0x10 [] default_idle+0x24/0x230 [] arch_cpu_idle+0xf/0x20 [] cpu_startup_entry+0x37c/0x7b0 [] start_secondary+0x25b/0x300 This patch fixes it by punting the notification delivery through a work item. This ends up adding an extra pointer to kernfs_elem_attr enlarging kernfs_node by a pointer, which is not ideal but not a very big deal either. If this turns out to be an actual issue, we can move kernfs_elem_attr->size to kernfs_node->iattr later. Signed-off-by: Tejun Heo Reported-by: Josh Boyer Cc: Jens Axboe Reviewed-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 69 ++++++++++++++++++++++++++++++++++++++++---------- include/linux/kernfs.h | 1 + 2 files changed, 56 insertions(+), 14 deletions(-) (limited to 'fs/kernfs/file.c') diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e3d37f607f9..d895b4b7b66 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -39,6 +39,19 @@ struct kernfs_open_node { struct list_head files; /* goes through kernfs_open_file.list */ }; +/* + * kernfs_notify() may be called from any context and bounces notifications + * through a work item. To minimize space overhead in kernfs_node, the + * pending queue is implemented as a singly linked list of kernfs_nodes. + * The list is terminated with the self pointer so that whether a + * kernfs_node is on the list or not can be determined by testing the next + * pointer for NULL. + */ +#define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) + +static DEFINE_SPINLOCK(kernfs_notify_lock); +static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; + static struct kernfs_open_file *kernfs_of(struct file *file) { return ((struct seq_file *)file->private_data)->private; @@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait) return DEFAULT_POLLMASK|POLLERR|POLLPRI; } -/** - * kernfs_notify - notify a kernfs file - * @kn: file to notify - * - * Notify @kn such that poll(2) on @kn wakes up. - */ -void kernfs_notify(struct kernfs_node *kn) +static void kernfs_notify_workfn(struct work_struct *work) { - struct kernfs_root *root = kernfs_root(kn); + struct kernfs_node *kn; struct kernfs_open_node *on; struct kernfs_super_info *info; - unsigned long flags; - - if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) +repeat: + /* pop one off the notify_list */ + spin_lock_irq(&kernfs_notify_lock); + kn = kernfs_notify_list; + if (kn == KERNFS_NOTIFY_EOL) { + spin_unlock_irq(&kernfs_notify_lock); return; + } + kernfs_notify_list = kn->attr.notify_next; + kn->attr.notify_next = NULL; + spin_unlock_irq(&kernfs_notify_lock); /* kick poll */ - spin_lock_irqsave(&kernfs_open_node_lock, flags); + spin_lock_irq(&kernfs_open_node_lock); on = kn->attr.open; if (on) { @@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn) wake_up_interruptible(&on->poll); } - spin_unlock_irqrestore(&kernfs_open_node_lock, flags); + spin_unlock_irq(&kernfs_open_node_lock); /* kick fsnotify */ mutex_lock(&kernfs_mutex); - list_for_each_entry(info, &root->supers, node) { + list_for_each_entry(info, &kernfs_root(kn)->supers, node) { struct inode *inode; struct dentry *dentry; @@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn) } mutex_unlock(&kernfs_mutex); + kernfs_put(kn); + goto repeat; +} + +/** + * kernfs_notify - notify a kernfs file + * @kn: file to notify + * + * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any + * context. + */ +void kernfs_notify(struct kernfs_node *kn) +{ + static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); + unsigned long flags; + + if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) + return; + + spin_lock_irqsave(&kernfs_notify_lock, flags); + if (!kn->attr.notify_next) { + kernfs_get(kn); + kn->attr.notify_next = kernfs_notify_list; + kernfs_notify_list = kn; + schedule_work(&kernfs_notify_work); + } + spin_unlock_irqrestore(&kernfs_notify_lock, flags); } EXPORT_SYMBOL_GPL(kernfs_notify); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 17aa1cce6f8..145375ea0bd 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -91,6 +91,7 @@ struct kernfs_elem_attr { const struct kernfs_ops *ops; struct kernfs_open_node *open; loff_t size; + struct kernfs_node *notify_next; /* for kernfs_notify() */ }; /* -- cgit v1.2.3-70-g09d2