diff options
Diffstat (limited to 'fs')
42 files changed, 1122 insertions, 1638 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 1cdc043922d..6c5051802bd 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1490,7 +1490,12 @@ config NFSD select LOCKD select SUNRPC select EXPORTFS - select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL + select NFSD_V2_ACL if NFSD_V3_ACL + select NFS_ACL_SUPPORT if NFSD_V2_ACL + select NFSD_TCP if NFSD_V4 + select CRYPTO_MD5 if NFSD_V4 + select CRYPTO if NFSD_V4 + select FS_POSIX_ACL if NFSD_V4 help If you want your Linux box to act as an NFS *server*, so that other computers on your local network which support NFS can access certain @@ -1528,7 +1533,6 @@ config NFSD_V3 config NFSD_V3_ACL bool "Provide server support for the NFSv3 ACL protocol extension" depends on NFSD_V3 - select NFSD_V2_ACL help Implement the NFSv3 ACL protocol extension for manipulating POSIX Access Control Lists on exported file systems. NFS clients should @@ -1538,10 +1542,6 @@ config NFSD_V3_ACL config NFSD_V4 bool "Provide NFSv4 server support (EXPERIMENTAL)" depends on NFSD_V3 && EXPERIMENTAL - select NFSD_TCP - select CRYPTO_MD5 - select CRYPTO - select FS_POSIX_ACL help If you would like to include the NFSv4 server as well as the NFSv2 and NFSv3 servers, say Y here. This feature is experimental, and diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 009a9ae88d6..bfc1fd22d5b 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -413,8 +413,7 @@ int afs_server_find_by_peer(const struct rxrpc_peer *peer, /* we found it in the graveyard - resurrect it */ found_dead_server: - list_del(&server->link); - list_add_tail(&server->link, &cell->sv_list); + list_move_tail(&server->link, &cell->sv_list); afs_get_server(server); afs_kafstimod_del_timer(&server->timeout); spin_unlock(&cell->sv_gylock); diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c index 7ac07d0d47b..f09a794f248 100644 --- a/fs/afs/kafsasyncd.c +++ b/fs/afs/kafsasyncd.c @@ -136,8 +136,7 @@ static int kafsasyncd(void *arg) if (!list_empty(&kafsasyncd_async_attnq)) { op = list_entry(kafsasyncd_async_attnq.next, struct afs_async_op, link); - list_del(&op->link); - list_add_tail(&op->link, + list_move_tail(&op->link, &kafsasyncd_async_busyq); } @@ -204,8 +203,7 @@ void afs_kafsasyncd_begin_op(struct afs_async_op *op) init_waitqueue_entry(&op->waiter, kafsasyncd_task); add_wait_queue(&op->call->waitq, &op->waiter); - list_del(&op->link); - list_add_tail(&op->link, &kafsasyncd_async_busyq); + list_move_tail(&op->link, &kafsasyncd_async_busyq); spin_unlock(&kafsasyncd_async_lock); @@ -223,8 +221,7 @@ void afs_kafsasyncd_attend_op(struct afs_async_op *op) spin_lock(&kafsasyncd_async_lock); - list_del(&op->link); - list_add_tail(&op->link, &kafsasyncd_async_attnq); + list_move_tail(&op->link, &kafsasyncd_async_attnq); spin_unlock(&kafsasyncd_async_lock); diff --git a/fs/afs/server.c b/fs/afs/server.c index 62b093aa41c..22afaae1a4c 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -123,8 +123,7 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr, resurrect_server: _debug("resurrecting server"); - list_del(&zombie->link); - list_add_tail(&zombie->link, &cell->sv_list); + list_move_tail(&zombie->link, &cell->sv_list); afs_get_server(zombie); afs_kafstimod_del_timer(&zombie->timeout); spin_unlock(&cell->sv_gylock); @@ -168,8 +167,7 @@ void afs_put_server(struct afs_server *server) } spin_lock(&cell->sv_gylock); - list_del(&server->link); - list_add_tail(&server->link, &cell->sv_graveyard); + list_move_tail(&server->link, &cell->sv_graveyard); /* time out in 10 secs */ afs_kafstimod_add_timer(&server->timeout, 10 * HZ); diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index eced20618ec..331f730a1fb 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -326,8 +326,7 @@ int afs_vlocation_lookup(struct afs_cell *cell, /* found in the graveyard - resurrect */ _debug("found in graveyard"); atomic_inc(&vlocation->usage); - list_del(&vlocation->link); - list_add_tail(&vlocation->link, &cell->vl_list); + list_move_tail(&vlocation->link, &cell->vl_list); spin_unlock(&cell->vl_gylock); afs_kafstimod_del_timer(&vlocation->timeout); @@ -478,8 +477,7 @@ static void __afs_put_vlocation(struct afs_vlocation *vlocation) } /* move to graveyard queue */ - list_del(&vlocation->link); - list_add_tail(&vlocation->link,&cell->vl_graveyard); + list_move_tail(&vlocation->link,&cell->vl_graveyard); /* remove from pending timeout queue (refcounted if actually being * updated) */ diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index 9867fef3261..cf62da5d782 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -104,8 +104,7 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode, vnode->cb_expiry * HZ); spin_lock(&afs_cb_hash_lock); - list_del(&vnode->cb_hash_link); - list_add_tail(&vnode->cb_hash_link, + list_move_tail(&vnode->cb_hash_link, &afs_cb_hash(server, &vnode->fid)); spin_unlock(&afs_cb_hash_lock); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 4456d1daa40..8dbd44f10e9 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -376,8 +376,7 @@ next: DPRINTK("returning %p %.*s", expired, (int)expired->d_name.len, expired->d_name.name); spin_lock(&dcache_lock); - list_del(&expired->d_parent->d_subdirs); - list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); spin_unlock(&dcache_lock); return expired; } diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 6c6771db36d..7caee8d8ea3 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -259,7 +259,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf, /* If request was not a signal, enqueue and don't free */ if (!(req->uc_flags & REQ_ASYNC)) { req->uc_flags |= REQ_READ; - list_add(&(req->uc_chain), vcp->vc_processing.prev); + list_add_tail(&(req->uc_chain), &vcp->vc_processing); goto out; } diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index b040eba13a7..a5b5e631ba6 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -725,7 +725,7 @@ static int coda_upcall(struct coda_sb_info *sbi, ((union inputArgs *)buffer)->ih.unique = req->uc_unique; /* Append msg to pending queue and poke Venus. */ - list_add(&(req->uc_chain), vcommp->vc_pending.prev); + list_add_tail(&(req->uc_chain), &vcommp->vc_pending); wake_up_interruptible(&vcommp->vc_waitq); /* We can be interrupted while we wait for Venus to process diff --git a/fs/compat.c b/fs/compat.c index 7e7e5bc4f3c..e31e9cf9664 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -55,6 +55,20 @@ extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); +int compat_log = 1; + +int compat_printk(const char *fmt, ...) +{ + va_list ap; + int ret; + if (!compat_log) + return 0; + va_start(ap, fmt); + ret = vprintk(fmt, ap); + va_end(ap); + return ret; +} + /* * Not all architectures have sys_utime, so implement this in terms * of sys_utimes. @@ -359,7 +373,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd, sprintf(buf,"'%c'", (cmd>>24) & 0x3f); if (!isprint(buf[1])) sprintf(buf, "%02x", buf[1]); - printk("ioctl32(%s:%d): Unknown cmd fd(%d) " + compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) " "cmd(%08x){%s} arg(%08x) on %s\n", current->comm, current->pid, (int)fd, (unsigned int)cmd, buf, diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 9eb9824dd33..d8ecfedef18 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -80,6 +80,7 @@ #include <net/bluetooth/rfcomm.h> #include <linux/capi.h> +#include <linux/gigaset_dev.h> #include <scsi/scsi.h> #include <scsi/scsi_ioctl.h> diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5f952187fc5..207f8006fd6 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1009,8 +1009,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir /* fallthrough */ default: if (filp->f_pos == 2) { - list_del(q); - list_add(q, &parent_sd->s_children); + list_move(q, &parent_sd->s_children); } for (p=q->next; p!= &parent_sd->s_children; p=p->next) { struct configfs_dirent *next; @@ -1033,8 +1032,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir dt_type(next)) < 0) return 0; - list_del(q); - list_add(q, p); + list_move(q, p); p = q; filp->f_pos++; } diff --git a/fs/dcache.c b/fs/dcache.c index b85fda36053..48b44a714b3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -522,8 +522,7 @@ void shrink_dcache_sb(struct super_block * sb) dentry = list_entry(tmp, struct dentry, d_lru); if (dentry->d_sb != sb) continue; - list_del(tmp); - list_add(tmp, &dentry_unused); + list_move(tmp, &dentry_unused); } /* @@ -638,7 +637,7 @@ resume: * of the unused list for prune_dcache */ if (!atomic_read(&dentry->d_count)) { - list_add(&dentry->d_lru, dentry_unused.prev); + list_add_tail(&dentry->d_lru, &dentry_unused); dentry_stat.nr_unused++; found++; } diff --git a/fs/dquot.c b/fs/dquot.c index 81d87a413c6..0122a279106 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -250,7 +250,7 @@ static inline struct dquot *find_dquot(unsigned int hashent, struct super_block /* Add a dquot to the tail of the free list */ static inline void put_dquot_last(struct dquot *dquot) { - list_add(&dquot->dq_free, free_dquots.prev); + list_add_tail(&dquot->dq_free, &free_dquots); dqstats.free_dquots++; } @@ -266,7 +266,7 @@ static inline void put_inuse(struct dquot *dquot) { /* We add to the back of inuse list so we don't have to restart * when traversing this list and we block */ - list_add(&dquot->dq_inuse, inuse_list.prev); + list_add_tail(&dquot->dq_inuse, &inuse_list); dqstats.allocated_dquots++; } diff --git a/fs/exec.c b/fs/exec.c index 0b88bf64614..c8494f513ea 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk) * and to assume its PID: */ if (!thread_group_leader(current)) { - struct dentry *proc_dentry1, *proc_dentry2; - /* * Wait for the thread group leader to be a zombie. * It should already be zombie at this point, most @@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk) */ current->start_time = leader->start_time; - spin_lock(&leader->proc_lock); - spin_lock(¤t->proc_lock); - proc_dentry1 = proc_pid_unhash(current); - proc_dentry2 = proc_pid_unhash(leader); write_lock_irq(&tasklist_lock); BUG_ON(leader->tgid != current->tgid); @@ -713,7 +707,7 @@ static int de_thread(struct task_struct *tsk) attach_pid(current, PIDTYPE_PID, current->pid); attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); attach_pid(current, PIDTYPE_SID, current->signal->session); - list_add_tail_rcu(¤t->tasks, &init_task.tasks); + list_replace_rcu(&leader->tasks, ¤t->tasks); current->group_leader = current; leader->group_leader = current; @@ -721,7 +715,6 @@ static int de_thread(struct task_struct *tsk) /* Reduce leader to a thread */ detach_pid(leader, PIDTYPE_PGID); detach_pid(leader, PIDTYPE_SID); - list_del_init(&leader->tasks); current->exit_signal = SIGCHLD; @@ -729,10 +722,6 @@ static int de_thread(struct task_struct *tsk) leader->exit_state = EXIT_DEAD; write_unlock_irq(&tasklist_lock); - spin_unlock(&leader->proc_lock); - spin_unlock(¤t->proc_lock); - proc_pid_flush(proc_dentry1); - proc_pid_flush(proc_dentry2); } /* @@ -1379,67 +1368,102 @@ static void format_corename(char *corename, const char *pattern, long signr) *out_ptr = 0; } -static void zap_threads (struct mm_struct *mm) +static void zap_process(struct task_struct *start) { - struct task_struct *g, *p; - struct task_struct *tsk = current; - struct completion *vfork_done = tsk->vfork_done; - int traced = 0; + struct task_struct *t; - /* - * Make sure nobody is waiting for us to release the VM, - * otherwise we can deadlock when we wait on each other - */ - if (vfork_done) { - tsk->vfork_done = NULL; - complete(vfork_done); - } + start->signal->flags = SIGNAL_GROUP_EXIT; + start->signal->group_stop_count = 0; - read_lock(&tasklist_lock); - do_each_thread(g,p) - if (mm == p->mm && p != tsk) { - force_sig_specific(SIGKILL, p); - mm->core_waiters++; - if (unlikely(p->ptrace) && - unlikely(p->parent->mm == mm)) - traced = 1; + t = start; + do { + if (t != current && t->mm) { + t->mm->core_waiters++; + sigaddset(&t->pending.signal, SIGKILL); + signal_wake_up(t, 1); } - while_each_thread(g,p); + } while ((t = next_thread(t)) != start); +} - read_unlock(&tasklist_lock); +static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, + int exit_code) +{ + struct task_struct *g, *p; + unsigned long flags; + int err = -EAGAIN; + + spin_lock_irq(&tsk->sighand->siglock); + if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) { + tsk->signal->group_exit_code = exit_code; + zap_process(tsk); + err = 0; + } + spin_unlock_irq(&tsk->sighand->siglock); + if (err) + return err; - if (unlikely(traced)) { - /* - * We are zapping a thread and the thread it ptraces. - * If the tracee went into a ptrace stop for exit tracing, - * we could deadlock since the tracer is waiting for this - * coredump to finish. Detach them so they can both die. - */ - write_lock_irq(&tasklist_lock); - do_each_thread(g,p) { - if (mm == p->mm && p != tsk && - p->ptrace && p->parent->mm == mm) { - __ptrace_detach(p, 0); + if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) + goto done; + + rcu_read_lock(); + for_each_process(g) { + if (g == tsk->group_leader) + continue; + + p = g; + do { + if (p->mm) { + if (p->mm == mm) { + /* + * p->sighand can't disappear, but + * may be changed by de_thread() + */ + lock_task_sighand(p, &flags); + zap_process(p); + unlock_task_sighand(p, &flags); + } + break; } - } while_each_thread(g,p); - write_unlock_irq(&tasklist_lock); + } while ((p = next_thread(p)) != g); } + rcu_read_unlock(); +done: + return mm->core_waiters; } -static void coredump_wait(struct mm_struct *mm) +static int coredump_wait(int exit_code) { - DECLARE_COMPLETION(startup_done); + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + struct completion startup_done; + struct completion *vfork_done; int core_waiters; + init_completion(&mm->core_done); + init_completion(&startup_done); mm->core_startup_done = &startup_done; - zap_threads(mm); - core_waiters = mm->core_waiters; + core_waiters = zap_threads(tsk, mm, exit_code); up_write(&mm->mmap_sem); + if (unlikely(core_waiters < 0)) + goto fail; + + /* + * Make sure nobody is waiting for us to release the VM, + * otherwise we can deadlock when we wait on each other + */ + vfork_done = tsk->vfork_done; + if (vfork_done) { + tsk->vfork_done = NULL; + complete(vfork_done); + } + if (core_waiters) wait_for_completion(&startup_done); +fail: BUG_ON(mm->core_waiters); + return core_waiters; } int do_coredump(long signr, int exit_code, struct pt_regs * regs) @@ -1473,22 +1497,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) } mm->dumpable = 0; - retval = -EAGAIN; - spin_lock_irq(¤t->sighand->siglock); - if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { - current->signal->flags = SIGNAL_GROUP_EXIT; - current->signal->group_exit_code = exit_code; - current->signal->group_stop_count = 0; - retval = 0; - } - spin_unlock_irq(¤t->sighand->siglock); - if (retval) { - up_write(&mm->mmap_sem); + retval = coredump_wait(exit_code); + if (retval < 0) goto fail; - } - - init_completion(&mm->core_done); - coredump_wait(mm); /* * Clear any false indication of pending signals that might diff --git a/fs/ext3/super.c b/fs/ext3/super.c index b2891cc29db..b7483360a2d 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -630,7 +630,7 @@ enum { Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, + Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, @@ -666,6 +666,7 @@ static match_table_t tokens = { {Opt_noreservation, "noreservation"}, {Opt_noload, "noload"}, {Opt_nobh, "nobh"}, + {Opt_bh, "bh"}, {Opt_commit, "commit=%u"}, {Opt_journal_update, "journal=update"}, {Opt_journal_inum, "journal=%u"}, @@ -1014,6 +1015,9 @@ clear_qf_name: case Opt_nobh: set_opt(sbi->s_mount_opt, NOBH); break; + case Opt_bh: + clear_opt(sbi->s_mount_opt, NOBH); + break; default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 1862e8bc101..b8886f048ea 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -53,8 +53,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, if (!instr) { printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); spin_lock(&c->erase_completion_lock); - list_del(&jeb->list); - list_add(&jeb->list, &c->erase_pending_list); + list_move(&jeb->list, &c->erase_pending_list); c->erasing_size -= c->sector_size; c->dirty_size += c->sector_size; jeb->dirty_size = c->sector_size; @@ -86,8 +85,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, /* Erase failed immediately. Refile it on the list */ D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret)); spin_lock(&c->erase_completion_lock); - list_del(&jeb->list); - list_add(&jeb->list, &c->erase_pending_list); + list_move(&jeb->list, &c->erase_pending_list); c->erasing_size -= c->sector_size; c->dirty_size += c->sector_size; jeb->dirty_size = c->sector_size; @@ -161,8 +159,7 @@ static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblo { D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset)); spin_lock(&c->erase_completion_lock); - list_del(&jeb->list); - list_add_tail(&jeb->list, &c->erase_complete_list); + list_move_tail(&jeb->list, &c->erase_complete_list); spin_unlock(&c->erase_completion_lock); /* Ensure that kupdated calls us again to mark them clean */ jffs2_erase_pending_trigger(c); @@ -178,8 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { /* We'd like to give this block another try. */ spin_lock(&c->erase_completion_lock); - list_del(&jeb->list); - list_add(&jeb->list, &c->erase_pending_list); + list_move(&jeb->list, &c->erase_pending_list); c->erasing_size -= c->sector_size; c->dirty_size += c->sector_size; jeb->dirty_size = c->sector_size; @@ -191,8 +187,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock spin_lock(&c->erase_completion_lock); c->erasing_size -= c->sector_size; c->bad_size += c->sector_size; - list_del(&jeb->list); - list_add(&jeb->list, &c->bad_list); + list_move(&jeb->list, &c->bad_list); c->nr_erasing_blocks--; spin_unlock(&c->erase_completion_lock); wake_up(&c->erase_wait); diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 8bedfd2ff68..ac0c350ed7d 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -211,8 +211,7 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c) struct jffs2_eraseblock *ejeb; ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list); - list_del(&ejeb->list); - list_add_tail(&ejeb->list, &c->erase_pending_list); + list_move_tail(&ejeb->list, &c->erase_pending_list); c->nr_erasing_blocks++; jffs2_erase_pending_trigger(c); D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n", diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index a7f153f79ec..b9b700730df 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -495,8 +495,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) /* Fix up the original jeb now it's on the bad_list */ if (first_raw == jeb->first_node) { D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); - list_del(&jeb->list); - list_add(&jeb->list, &c->erase_pending_list); + list_move(&jeb->list, &c->erase_pending_list); c->nr_erasing_blocks++; jffs2_erase_pending_trigger(c); } diff --git a/fs/libfs.c b/fs/libfs.c index fc785d8befb..ac02ea602c3 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -149,10 +149,9 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) /* fallthrough */ default: spin_lock(&dcache_lock); - if (filp->f_pos == 2) { - list_del(q); - list_add(q, &dentry->d_subdirs); - } + if (filp->f_pos == 2) + list_move(q, &dentry->d_subdirs); + for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); @@ -164,8 +163,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) return 0; spin_lock(&dcache_lock); /* next is still alive */ - list_del(q); - list_add(q, p); + list_move(q, p); p = q; filp->f_pos++; } diff --git a/fs/namespace.c b/fs/namespace.c index 866430bb024..b3ed212ea41 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -526,10 +526,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) { struct vfsmount *p; - for (p = mnt; p; p = next_mnt(p, mnt)) { - list_del(&p->mnt_hash); - list_add(&p->mnt_hash, kill); - } + for (p = mnt; p; p = next_mnt(p, mnt)) + list_move(&p->mnt_hash, kill); if (propagate) propagate_umount(kill); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 96c7578cbe1..1630b5670dc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -529,8 +529,7 @@ move_to_confirmed(struct nfs4_client *clp) dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); list_del_init(&clp->cl_strhash); - list_del_init(&clp->cl_idhash); - list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); + list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); strhashval = clientstr_hashval(clp->cl_recdir); list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); renew_client(clp); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index d852ebb538e..fdf7cf3dfad 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -103,8 +103,7 @@ nfsd_cache_shutdown(void) static void lru_put_end(struct svc_cacherep *rp) { - list_del(&rp->c_lru); - list_add_tail(&rp->c_lru, &lru_head); + list_move_tail(&rp->c_lru, &lru_head); } /* diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 355593dd8ef..87ee29cad50 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -381,8 +381,7 @@ do_ast: ret = DLM_NORMAL; if (past->type == DLM_AST) { /* do not alter lock refcount. switching lists. */ - list_del_init(&lock->list); - list_add_tail(&lock->list, &res->granted); + list_move_tail(&lock->list, &res->granted); mlog(0, "ast: adding to granted list... type=%d, " "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); if (lock->ml.convert_type != LKM_IVMODE) { diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 8285228d9e3..70888b31e75 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -231,8 +231,7 @@ switch_queues: lock->ml.convert_type = type; /* do not alter lock refcount. switching lists. */ - list_del_init(&lock->list); - list_add_tail(&lock->list, &res->converting); + list_move_tail(&lock->list, &res->converting); unlock_exit: spin_unlock(&lock->spinlock); @@ -248,8 +247,7 @@ void dlm_revert_pending_convert(struct dlm_lock_resource *res, struct dlm_lock *lock) { /* do not alter lock refcount. switching lists. */ - list_del_init(&lock->list); - list_add_tail(&lock->list, &res->granted); + list_move_tail(&lock->list, &res->granted); lock->ml.convert_type = LKM_IVMODE; lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); } @@ -294,8 +292,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, res->state |= DLM_LOCK_RES_IN_PROGRESS; /* move lock to local convert queue */ /* do not alter lock refcount. switching lists. */ - list_del_init(&lock->list); - list_add_tail(&lock->list, &res->converting); + list_move_tail(&lock->list, &res->converting); lock->convert_pending = 1; lock->ml.convert_type = type; diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 6fea28318d6..55cda25ae11 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -239,8 +239,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, mlog(0, "%s: $RECOVERY lock for this node (%u) is " "mastered by %u; got lock, manually granting (no ast)\n", dlm->name, dlm->node_num, res->owner); - list_del_init(&lock->list); - list_add_tail(&lock->list, &res->granted); + list_move_tail(&lock->list, &res->granted); } spin_unlock(&res->spinlock); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 805cbabac05..9962190e741 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -905,13 +905,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, mlog(0, "found lockres owned by dead node while " "doing recovery for node %u. sending it.\n", dead_node); - list_del_init(&res->recovering); - list_add_tail(&res->recovering, list); + list_move_tail(&res->recovering, list); } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { mlog(0, "found UNKNOWN owner while doing recovery " "for node %u. sending it.\n", dead_node); - list_del_init(&res->recovering); - list_add_tail(&res->recovering, list); + list_move_tail(&res->recovering, list); } } spin_unlock(&dlm->spinlock); @@ -1529,8 +1527,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, /* move the lock to its proper place */ /* do not alter lock refcount. switching lists. */ - list_del_init(&lock->list); - list_add_tail(&lock->list, queue); + list_move_tail(&lock->list, queue); spin_unlock(&res->spinlock); mlog(0, "just reordered a local lock!\n"); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 5be9d14f12c..44d3b57ae8a 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -318,8 +318,7 @@ converting: target->ml.type = target->ml.convert_type; target->ml.convert_type = LKM_IVMODE; - list_del_init(&target->list); - list_add_tail(&target->list, &res->granted); + list_move_tail(&target->list, &res->granted); BUG_ON(!target->lksb); target->lksb->status = DLM_NORMAL; @@ -380,8 +379,7 @@ blocked: target->ml.type, target->ml.node); // target->ml.type is already correct - list_del_init(&target->list); - list_add_tail(&target->list, &res->granted); + list_move_tail(&target->list, &res->granted); BUG_ON(!target->lksb); target->lksb->status = DLM_NORMAL; diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 7b1a2754267..ac89c509daf 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -271,8 +271,7 @@ void dlm_commit_pending_unlock(struct dlm_lock_resource *res, void dlm_commit_pending_cancel(struct dlm_lock_resource *res, struct dlm_lock *lock) { - list_del_init(&lock->list); - list_add_tail(&lock->list, &res->granted); + list_move_tail(&lock->list, &res->granted); lock->ml.convert_type = LKM_IVMODE; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index eebc3cfa6be..3fe8781c22c 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -222,8 +222,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle, BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list)); OCFS2_I(inode)->ip_handle = handle; - list_del(&(OCFS2_I(inode)->ip_handle_list)); - list_add_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list)); + list_move_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list)); } static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle) diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index efc7c91128a..93a56bd4a2b 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -1,5 +1,4 @@ -/* $Id: inode.c,v 1.15 2001/11/12 09:43:39 davem Exp $ - * openpromfs.c: /proc/openprom handling routines +/* inode.c: /proc/openprom handling routines * * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com) * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) @@ -12,762 +11,245 @@ #include <linux/openprom_fs.h> #include <linux/init.h> #include <linux/slab.h> -#include <linux/smp_lock.h> +#include <linux/seq_file.h> #include <asm/openprom.h> #include <asm/oplib.h> +#include <asm/prom.h> #include <asm/uaccess.h> -#define ALIASES_NNODES 64 - -typedef struct { - u16 parent; - u16 next; - u16 child; - u16 first_prop; - u32 node; -} openpromfs_node; - -typedef struct { -#define OPP_STRING 0x10 -#define OPP_STRINGLIST 0x20 -#define OPP_BINARY 0x40 -#define OPP_HEXSTRING 0x80 -#define OPP_DIRTY 0x01 -#define OPP_QUOTED 0x02 -#define OPP_NOTQUOTED 0x04 -#define OPP_ASCIIZ 0x08 - u32 flag; - u32 alloclen; - u32 len; - char *value; - char name[8]; -} openprom_property; - -static openpromfs_node *nodes; -static int alloced; -static u16 last_node; -static u16 first_prop; -static u16 options = 0xffff; -static u16 aliases = 0xffff; -static int aliases_nodes; -static char *alias_names [ALIASES_NNODES]; - -#define OPENPROM_ROOT_INO 16 -#define OPENPROM_FIRST_INO OPENPROM_ROOT_INO -#define NODE(ino) nodes[ino - OPENPROM_FIRST_INO] -#define NODE2INO(node) (node + OPENPROM_FIRST_INO) -#define NODEP2INO(no) (no + OPENPROM_FIRST_INO + last_node) - -static int openpromfs_create (struct inode *, struct dentry *, int, struct nameidata *); -static int openpromfs_readdir(struct file *, void *, filldir_t); -static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry, struct nameidata *nd); -static int openpromfs_unlink (struct inode *, struct dentry *dentry); +static DEFINE_MUTEX(op_mutex); -static inline u16 ptr_nod(void *p) -{ - return (long)p & 0xFFFF; -} +#define OPENPROM_ROOT_INO 0 -static ssize_t nodenum_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +enum op_inode_type { + op_inode_node, + op_inode_prop, +}; + +union op_inode_data { + struct device_node *node; + struct property *prop; +}; + +struct op_inode_info { + struct inode vfs_inode; + enum op_inode_type type; + union op_inode_data u; +}; + +static inline struct op_inode_info *OP_I(struct inode *inode) { - struct inode *inode = file->f_dentry->d_inode; - char buffer[10]; - - if (count < 0 || !inode->u.generic_ip) - return -EINVAL; - sprintf (buffer, "%8.8lx\n", (long)inode->u.generic_ip); - if (file->f_pos >= 9) - return 0; - if (count > 9 - file->f_pos) - count = 9 - file->f_pos; - if (copy_to_user(buf, buffer + file->f_pos, count)) - return -EFAULT; - *ppos += count; - return count; + return container_of(inode, struct op_inode_info, vfs_inode); } -static ssize_t property_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) +static int is_string(unsigned char *p, int len) { - struct inode *inode = filp->f_dentry->d_inode; - int i, j, k; - u32 node; - char *p, *s; - u32 *q; - openprom_property *op; - char buffer[64]; - - if (!filp->private_data) { - node = nodes[ptr_nod(inode->u.generic_ip)].node; - i = ((u32)(long)inode->u.generic_ip) >> 16; - if (ptr_nod(inode->u.generic_ip) == aliases) { - if (i >= aliases_nodes) - p = NULL; - else - p = alias_names [i]; - } else - for (p = prom_firstprop (node, buffer); - i && p && *p; - p = prom_nextprop (node, p, buffer), i--) - /* nothing */ ; - if (!p || !*p) - return -EIO; - i = prom_getproplen (node, p); - if (i < 0) { - if (ptr_nod(inode->u.generic_ip) == aliases) - i = 0; - else - return -EIO; - } - k = i; - if (i < 64) i = 64; - filp->private_data = kmalloc (sizeof (openprom_property) - + (j = strlen (p)) + 2 * i, - GFP_KERNEL); - if (!filp->private_data) - return -ENOMEM; - op = filp->private_data; - op->flag = 0; - op->alloclen = 2 * i; - strcpy (op->name, p); - op->value = (char *)(((unsigned long)(op->name + j + 4)) & ~3); - op->len = k; - if (k && prom_getproperty (node, p, op->value, i) < 0) - return -EIO; - op->value [k] = 0; - if (k) { - for (s = NULL, p = op->value; p < op->value + k; p++) { - if ((*p >= ' ' && *p <= '~') || *p == '\n') { - op->flag |= OPP_STRING; - s = p; - continue; - } - if (p > op->value && !*p && s == p - 1) { - if (p < op->value + k - 1) - op->flag |= OPP_STRINGLIST; - else - op->flag |= OPP_ASCIIZ; - continue; - } - if (k == 1 && !*p) { - op->flag |= (OPP_STRING|OPP_ASCIIZ); - break; - } - op->flag &= ~(OPP_STRING|OPP_STRINGLIST); - if (k & 3) - op->flag |= OPP_HEXSTRING; - else - op->flag |= OPP_BINARY; - break; - } - if (op->flag & OPP_STRINGLIST) - op->flag &= ~(OPP_STRING); - if (op->flag & OPP_ASCIIZ) - op->len--; - } - } else - op = filp->private_data; - if (!count || !(op->len || (op->flag & OPP_ASCIIZ))) - return 0; - if (*ppos >= 0xffffff || count >= 0xffffff) - return -EINVAL; - if (op->flag & OPP_STRINGLIST) { - for (k = 0, p = op->value; p < op->value + op->len; p++) - if (!*p) - k++; - i = op->len + 4 * k + 3; - } else if (op->flag & OPP_STRING) { - i = op->len + 3; - } else if (op->flag & OPP_BINARY) { - i = (op->len * 9) >> 2; - } else { - i = (op->len << 1) + 1; - } - k = *ppos; - if (k >= i) return 0; - if (count > i - k) count = i - k; - if (op->flag & OPP_STRING) { - if (!k) { - if (put_user('\'', buf)) - return -EFAULT; - k++; - count--; - } + int i; - if (k + count >= i - 2) - j = i - 2 - k; - else - j = count; - - if (j >= 0) { - if (copy_to_user(buf + k - *ppos, - op->value + k - 1, j)) - return -EFAULT; - count -= j; - k += j; - } + for (i = 0; i < len; i++) { + unsigned char val = p[i]; - if (count) { - if (put_user('\'', &buf [k++ - *ppos])) - return -EFAULT; - } - if (count > 1) { - if (put_user('\n', &buf [k++ - *ppos])) - return -EFAULT; - } - } else if (op->flag & OPP_STRINGLIST) { - char *tmp; - - tmp = kmalloc (i, GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - s = tmp; - *s++ = '\''; - for (p = op->value; p < op->value + op->len; p++) { - if (!*p) { - strcpy(s, "' + '"); - s += 5; - continue; - } - *s++ = *p; - } - strcpy(s, "'\n"); - - if (copy_to_user(buf, tmp + k, count)) - return -EFAULT; - - kfree(tmp); - k += count; - - } else if (op->flag & OPP_BINARY) { - char buffer[10]; - u32 *first, *last; - int first_off, last_cnt; - - first = ((u32 *)op->value) + k / 9; - first_off = k % 9; - last = ((u32 *)op->value) + (k + count - 1) / 9; - last_cnt = (k + count) % 9; - if (!last_cnt) last_cnt = 9; - - if (first == last) { - sprintf (buffer, "%08x.", *first); - if (copy_to_user(buf, buffer + first_off, - last_cnt - first_off)) - return -EFAULT; - buf += last_cnt - first_off; - } else { - for (q = first; q <= last; q++) { - sprintf (buffer, "%08x.", *q); - if (q == first) { - if (copy_to_user(buf, buffer + first_off, - 9 - first_off)) - return -EFAULT; - buf += 9 - first_off; - } else if (q == last) { - if (copy_to_user(buf, buffer, last_cnt)) - return -EFAULT; - buf += last_cnt; - } else { - if (copy_to_user(buf, buffer, 9)) - return -EFAULT; - buf += 9; - } - } - } + if ((i && !val) || + (val >= ' ' && val <= '~')) + continue; - if (last == (u32 *)(op->value + op->len - 4) && last_cnt == 9) { - if (put_user('\n', (buf - 1))) - return -EFAULT; - } + return 0; + } - k += count; + return 1; +} - } else if (op->flag & OPP_HEXSTRING) { - char buffer[3]; +static int property_show(struct seq_file *f, void *v) +{ + struct property *prop = f->private; + void *pval; + int len; - if ((k < i - 1) && (k & 1)) { - sprintf (buffer, "%02x", - (unsigned char) *(op->value + (k >> 1)) & 0xff); - if (put_user(buffer[1], &buf[k++ - *ppos])) - return -EFAULT; - count--; - } + len = prop->length; + pval = prop->value; - for (; (count > 1) && (k < i - 1); k += 2) { - sprintf (buffer, "%02x", - (unsigned char) *(op->value + (k >> 1)) & 0xff); - if (copy_to_user(buf + k - *ppos, buffer, 2)) - return -EFAULT; - count -= 2; - } + if (is_string(pval, len)) { + while (len > 0) { + int n = strlen(pval); - if (count && (k < i - 1)) { - sprintf (buffer, "%02x", - (unsigned char) *(op->value + (k >> 1)) & 0xff); - if (put_user(buffer[0], &buf[k++ - *ppos])) - return -EFAULT; - count--; - } + seq_printf(f, "%s", (char *) pval); - if (count) { - if (put_user('\n', &buf [k++ - *ppos])) - return -EFAULT; - } - } - count = k - *ppos; - *ppos = k; - return count; -} + /* Skip over the NULL byte too. */ + pval += n + 1; + len -= n + 1; -static ssize_t property_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - int i, j, k; - char *p; - u32 *q; - void *b; - openprom_property *op; - - if (*ppos >= 0xffffff || count >= 0xffffff) - return -EINVAL; - if (!filp->private_data) { - i = property_read (filp, NULL, 0, NULL); - if (i) - return i; - } - k = *ppos; - op = filp->private_data; - if (!(op->flag & OPP_STRING)) { - u32 *first, *last; - int first_off, last_cnt; - u32 mask, mask2; - char tmp [9]; - int forcelen = 0; - - j = k % 9; - for (i = 0; i < count; i++, j++) { - if (j == 9) j = 0; - if (!j) { - char ctmp; - if (get_user(ctmp, &buf[i])) - return -EFAULT; - if (ctmp != '.') { - if (ctmp != '\n') { - if (op->flag & OPP_BINARY) - return -EINVAL; - else - goto write_try_string; - } else { - count = i + 1; - forcelen = 1; - break; - } - } - } else { - char ctmp; - if (get_user(ctmp, &buf[i])) - return -EFAULT; - if (ctmp < '0' || - (ctmp > '9' && ctmp < 'A') || - (ctmp > 'F' && ctmp < 'a') || - ctmp > 'f') { - if (op->flag & OPP_BINARY) - return -EINVAL; - else - goto write_try_string; - } - } - } - op->flag |= OPP_BINARY; - tmp [8] = 0; - i = ((count + k + 8) / 9) << 2; - if (op->alloclen <= i) { - b = kmalloc (sizeof (openprom_property) + 2 * i, - GFP_KERNEL); - if (!b) - return -ENOMEM; - memcpy (b, filp->private_data, - sizeof (openprom_property) - + strlen (op->name) + op->alloclen); - memset (b + sizeof (openprom_property) - + strlen (op->name) + op->alloclen, - 0, 2 * i - op->alloclen); - op = b; - op->alloclen = 2*i; - b = filp->private_data; - filp->private_data = op; - kfree (b); + if (len > 0) + seq_printf(f, " + "); } - first = ((u32 *)op->value) + (k / 9); - first_off = k % 9; - last = (u32 *)(op->value + i); - last_cnt = (k + count) % 9; - if (first + 1 == last) { - memset (tmp, '0', 8); - if (copy_from_user(tmp + first_off, buf, - (count + first_off > 8) ? - 8 - first_off : count)) - return -EFAULT; - mask = 0xffffffff; - mask2 = 0xffffffff; - for (j = 0; j < first_off; j++) - mask >>= 1; - for (j = 8 - count - first_off; j > 0; j--) - mask2 <<= 1; - mask &= mask2; - if (mask) { - *first &= ~mask; - *first |= simple_strtoul (tmp, NULL, 16); - op->flag |= OPP_DIRTY; + } else { + if (len & 3) { + while (len) { + len--; + if (len) + seq_printf(f, "%02x.", + *(unsigned char *) pval); + else + seq_printf(f, "%02x", + *(unsigned char *) pval); + pval++; } } else { - op->flag |= OPP_DIRTY; - for (q = first; q < last; q++) { - if (q == first) { - if (first_off < 8) { - memset (tmp, '0', 8); - if (copy_from_user(tmp + first_off, - buf, - 8 - first_off)) - return -EFAULT; - mask = 0xffffffff; - for (j = 0; j < first_off; j++) - mask >>= 1; - *q &= ~mask; - *q |= simple_strtoul (tmp,NULL,16); - } - buf += 9; - } else if ((q == last - 1) && last_cnt - && (last_cnt < 8)) { - memset (tmp, '0', 8); - if (copy_from_user(tmp, buf, last_cnt)) - return -EFAULT; - mask = 0xffffffff; - for (j = 0; j < 8 - last_cnt; j++) - mask <<= 1; - *q &= ~mask; - *q |= simple_strtoul (tmp, NULL, 16); - buf += last_cnt; - } else { - char tchars[2 * sizeof(long) + 1]; - - if (copy_from_user(tchars, buf, sizeof(tchars) - 1)) - return -EFAULT; - tchars[sizeof(tchars) - 1] = '\0'; - *q = simple_strtoul (tchars, NULL, 16); - buf += 9; - } - } - } - if (!forcelen) { - if (op->len < i) - op->len = i; - } else - op->len = i; - *ppos += count; - } -write_try_string: - if (!(op->flag & OPP_BINARY)) { - if (!(op->flag & (OPP_QUOTED | OPP_NOTQUOTED))) { - char ctmp; - - /* No way, if somebody starts writing from the middle, - * we don't know whether he uses quotes around or not - */ - if (k > 0) - return -EINVAL; - if (get_user(ctmp, buf)) - return -EFAULT; - if (ctmp == '\'') { - op->flag |= OPP_QUOTED; - buf++; - count--; - (*ppos)++; - if (!count) { - op->flag |= OPP_STRING; - return 1; - } - } else - op->flag |= OPP_NOTQUOTED; - } - op->flag |= OPP_STRING; - if (op->alloclen <= count + *ppos) { - b = kmalloc (sizeof (openprom_property) - + 2 * (count + *ppos), GFP_KERNEL); - if (!b) - return -ENOMEM; - memcpy (b, filp->private_data, - sizeof (openprom_property) - + strlen (op->name) + op->alloclen); - memset (b + sizeof (openprom_property) - + strlen (op->name) + op->alloclen, - 0, 2*(count - *ppos) - op->alloclen); - op = b; - op->alloclen = 2*(count + *ppos); - b = filp->private_data; - filp->private_data = op; - kfree (b); - } - p = op->value + *ppos - ((op->flag & OPP_QUOTED) ? 1 : 0); - if (copy_from_user(p, buf, count)) - return -EFAULT; - op->flag |= OPP_DIRTY; - for (i = 0; i < count; i++, p++) - if (*p == '\n') { - *p = 0; - break; + while (len >= 4) { + len -= 4; + + if (len) + seq_printf(f, "%08x.", + *(unsigned int *) pval); + else + seq_printf(f, "%08x", + *(unsigned int *) pval); + pval += 4; } - if (i < count) { - op->len = p - op->value; - *ppos += i + 1; - if ((p > op->value) && (op->flag & OPP_QUOTED) - && (*(p - 1) == '\'')) - op->len--; - } else { - if (p - op->value > op->len) - op->len = p - op->value; - *ppos += count; } } - return *ppos - k; + seq_printf(f, "\n"); + + return 0; } -int property_release (struct inode *inode, struct file *filp) +static void *property_start(struct seq_file *f, loff_t *pos) { - openprom_property *op = filp->private_data; - int error; - u32 node; - - if (!op) - return 0; - lock_kernel(); - node = nodes[ptr_nod(inode->u.generic_ip)].node; - if (ptr_nod(inode->u.generic_ip) == aliases) { - if ((op->flag & OPP_DIRTY) && (op->flag & OPP_STRING)) { - char *p = op->name; - int i = (op->value - op->name) - strlen (op->name) - 1; - op->value [op->len] = 0; - *(op->value - 1) = ' '; - if (i) { - for (p = op->value - i - 2; p >= op->name; p--) - p[i] = *p; - p = op->name + i; - } - memcpy (p - 8, "nvalias ", 8); - prom_feval (p - 8); - } - } else if (op->flag & OPP_DIRTY) { - if (op->flag & OPP_STRING) { - op->value [op->len] = 0; - error = prom_setprop (node, op->name, - op->value, op->len + 1); - if (error <= 0) - printk (KERN_WARNING "openpromfs: " - "Couldn't write property %s\n", - op->name); - } else if ((op->flag & OPP_BINARY) || !op->len) { - error = prom_setprop (node, op->name, - op->value, op->len); - if (error <= 0) - printk (KERN_WARNING "openpromfs: " - "Couldn't write property %s\n", - op->name); - } else { - printk (KERN_WARNING "openpromfs: " - "Unknown property type of %s\n", - op->name); - } + if (*pos == 0) + return pos; + return NULL; +} + +static void *property_next(struct seq_file *f, void *v, loff_t *pos) +{ + (*pos)++; + return NULL; +} + +static void property_stop(struct seq_file *f, void *v) +{ + /* Nothing to do */ +} + +static struct seq_operations property_op = { + .start = property_start, + .next = property_next, + .stop = property_stop, + .show = property_show +}; + +static int property_open(struct inode *inode, struct file *file) +{ + struct op_inode_info *oi = OP_I(inode); + int ret; + + BUG_ON(oi->type != op_inode_prop); + + ret = seq_open(file, &property_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = oi->u.prop; } - unlock_kernel(); - kfree (filp->private_data); - return 0; + return ret; } static const struct file_operations openpromfs_prop_ops = { - .read = property_read, - .write = property_write, - .release = property_release, + .open = property_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, }; -static const struct file_operations openpromfs_nodenum_ops = { - .read = nodenum_read, -}; +static int openpromfs_readdir(struct file *, void *, filldir_t); static const struct file_operations openprom_operations = { .read = generic_read_dir, .readdir = openpromfs_readdir, }; -static struct inode_operations openprom_alias_inode_operations = { - .create = openpromfs_create, - .lookup = openpromfs_lookup, - .unlink = openpromfs_unlink, -}; +static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); static struct inode_operations openprom_inode_operations = { .lookup = openpromfs_lookup, }; -static int lookup_children(u16 n, const char * name, int len) +static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { - int ret; - u16 node; - for (; n != 0xffff; n = nodes[n].next) { - node = nodes[n].child; - if (node != 0xffff) { - char buffer[128]; - int i; - char *p; - - while (node != 0xffff) { - if (prom_getname (nodes[node].node, - buffer, 128) >= 0) { - i = strlen (buffer); - if ((len == i) - && !strncmp (buffer, name, len)) - return NODE2INO(node); - p = strchr (buffer, '@'); - if (p && (len == p - buffer) - && !strncmp (buffer, name, len)) - return NODE2INO(node); - } - node = nodes[node].next; - } - } else - continue; - ret = lookup_children (nodes[n].child, name, len); - if (ret) return ret; - } - return 0; -} - -static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) -{ - int ino = 0; -#define OPFSL_DIR 0 -#define OPFSL_PROPERTY 1 -#define OPFSL_NODENUM 2 - int type = 0; - char buffer[128]; - char *p; + struct op_inode_info *ent_oi, *oi = OP_I(dir); + struct device_node *dp, *child; + struct property *prop; + enum op_inode_type ent_type; + union op_inode_data ent_data; const char *name; - u32 n; - u16 dirnode; - unsigned int len; - int i; struct inode *inode; - char buffer2[64]; + unsigned int ino; + int len; - inode = NULL; + BUG_ON(oi->type != op_inode_node); + + dp = oi->u.node; + name = dentry->d_name.name; len = dentry->d_name.len; - lock_kernel(); - if (name [0] == '.' && len == 5 && !strncmp (name + 1, "node", 4)) { - ino = NODEP2INO(NODE(dir->i_ino).first_prop); - type = OPFSL_NODENUM; - } - if (!ino) { - u16 node = NODE(dir->i_ino).child; - while (node != 0xffff) { - if (prom_getname (nodes[node].node, buffer, 128) >= 0) { - i = strlen (buffer); - if (len == i && !strncmp (buffer, name, len)) { - ino = NODE2INO(node); - type = OPFSL_DIR; - break; - } - p = strchr (buffer, '@'); - if (p && (len == p - buffer) - && !strncmp (buffer, name, len)) { - ino = NODE2INO(node); - type = OPFSL_DIR; - break; - } - } - node = nodes[node].next; - } - } - n = NODE(dir->i_ino).node; - dirnode = dir->i_ino - OPENPROM_FIRST_INO; - if (!ino) { - int j = NODEP2INO(NODE(dir->i_ino).first_prop); - if (dirnode != aliases) { - for (p = prom_firstprop (n, buffer2); - p && *p; - p = prom_nextprop (n, p, buffer2)) { - j++; - if ((len == strlen (p)) - && !strncmp (p, name, len)) { - ino = j; - type = OPFSL_PROPERTY; - break; - } - } - } else { - int k; - for (k = 0; k < aliases_nodes; k++) { - j++; - if (alias_names [k] - && (len == strlen (alias_names [k])) - && !strncmp (alias_names [k], name, len)) { - ino = j; - type = OPFSL_PROPERTY; - break; - } - } + + mutex_lock(&op_mutex); + + child = dp->child; + while (child) { + int n = strlen(child->path_component_name); + + if (len == n && + !strncmp(child->path_component_name, name, len)) { + ent_type = op_inode_node; + ent_data.node = child; + ino = child->unique_id; + goto found; } + child = child->sibling; } - if (!ino) { - ino = lookup_children (NODE(dir->i_ino).child, name, len); - if (ino) - type = OPFSL_DIR; - else { - unlock_kernel(); - return ERR_PTR(-ENOENT); + + prop = dp->properties; + while (prop) { + int n = strlen(prop->name); + + if (len == n && !strncmp(prop->name, name, len)) { + ent_type = op_inode_prop; + ent_data.prop = prop; + ino = prop->unique_id; + goto found; } + + prop = prop->next; } - inode = iget (dir->i_sb, ino); - unlock_kernel(); + + mutex_unlock(&op_mutex); + return ERR_PTR(-ENOENT); + +found: + inode = iget(dir->i_sb, ino); + mutex_unlock(&op_mutex); if (!inode) return ERR_PTR(-EINVAL); - switch (type) { - case OPFSL_DIR: + ent_oi = OP_I(inode); + ent_oi->type = ent_type; + ent_oi->u = ent_data; + + switch (ent_type) { + case op_inode_node: inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - if (ino == OPENPROM_FIRST_INO + aliases) { - inode->i_mode |= S_IWUSR; - inode->i_op = &openprom_alias_inode_operations; - } else - inode->i_op = &openprom_inode_operations; + inode->i_op = &openprom_inode_operations; inode->i_fop = &openprom_operations; inode->i_nlink = 2; break; - case OPFSL_NODENUM: - inode->i_mode = S_IFREG | S_IRUGO; - inode->i_fop = &openpromfs_nodenum_ops; - inode->i_nlink = 1; - inode->u.generic_ip = (void *)(long)(n); - break; - case OPFSL_PROPERTY: - if ((dirnode == options) && (len == 17) - && !strncmp (name, "security-password", 17)) + case op_inode_prop: + if (!strcmp(dp->name, "options") && (len == 17) && + !strncmp (name, "security-password", 17)) inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; - else { + else inode->i_mode = S_IFREG | S_IRUGO; - if (dirnode == options || dirnode == aliases) { - if (len != 4 || strncmp (name, "name", 4)) - inode->i_mode |= S_IWUSR; - } - } inode->i_fop = &openpromfs_prop_ops; inode->i_nlink = 1; - if (inode->i_size < 0) - inode->i_size = 0; - inode->u.generic_ip = (void *)(long)(((u16)dirnode) | - (((u16)(ino - NODEP2INO(NODE(dir->i_ino).first_prop) - 1)) << 16)); + inode->i_size = ent_oi->u.prop->length; break; } @@ -781,237 +263,89 @@ static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentr static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir) { struct inode *inode = filp->f_dentry->d_inode; + struct op_inode_info *oi = OP_I(inode); + struct device_node *dp = oi->u.node; + struct device_node *child; + struct property *prop; unsigned int ino; - u32 n; - int i, j; - char buffer[128]; - u16 node; - char *p; - char buffer2[64]; - - lock_kernel(); + int i; + + mutex_lock(&op_mutex); ino = inode->i_ino; i = filp->f_pos; switch (i) { case 0: - if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) goto out; + if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) + goto out; i++; filp->f_pos++; /* fall thru */ case 1: - if (filldir(dirent, "..", 2, i, - (NODE(ino).parent == 0xffff) ? - OPENPROM_ROOT_INO : NODE2INO(NODE(ino).parent), DT_DIR) < 0) + if (filldir(dirent, "..", 2, i, + (dp->parent == NULL ? + OPENPROM_ROOT_INO : + dp->parent->unique_id), DT_DIR) < 0) goto out; i++; filp->f_pos++; /* fall thru */ default: i -= 2; - node = NODE(ino).child; - while (i && node != 0xffff) { - node = nodes[node].next; + + /* First, the children nodes as directories. */ + child = dp->child; + while (i && child) { + child = child->sibling; i--; } - while (node != 0xffff) { - if (prom_getname (nodes[node].node, buffer, 128) < 0) - goto out; - if (filldir(dirent, buffer, strlen(buffer), - filp->f_pos, NODE2INO(node), DT_DIR) < 0) + while (child) { + if (filldir(dirent, + child->path_component_name, + strlen(child->path_component_name), + filp->f_pos, child->unique_id, DT_DIR) < 0) goto out; + filp->f_pos++; - node = nodes[node].next; + child = child->sibling; } - j = NODEP2INO(NODE(ino).first_prop); - if (!i) { - if (filldir(dirent, ".node", 5, filp->f_pos, j, DT_REG) < 0) + + /* Next, the properties as files. */ + prop = dp->properties; + while (i && prop) { + prop = prop->next; + i--; + } + while (prop) { + if (filldir(dirent, prop->name, strlen(prop->name), + filp->f_pos, prop->unique_id, DT_REG) < 0) goto out; + filp->f_pos++; - } else - i--; - n = NODE(ino).node; - if (ino == OPENPROM_FIRST_INO + aliases) { - for (j++; i < aliases_nodes; i++, j++) { - if (alias_names [i]) { - if (filldir (dirent, alias_names [i], - strlen (alias_names [i]), - filp->f_pos, j, DT_REG) < 0) goto out; - filp->f_pos++; - } - } - } else { - for (p = prom_firstprop (n, buffer2); - p && *p; - p = prom_nextprop (n, p, buffer2)) { - j++; - if (i) i--; - else { - if (filldir(dirent, p, strlen(p), - filp->f_pos, j, DT_REG) < 0) - goto out; - filp->f_pos++; - } - } + prop = prop->next; } } out: - unlock_kernel(); - return 0; -} - -static int openpromfs_create (struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) -{ - char *p; - struct inode *inode; - - if (!dir) - return -ENOENT; - if (dentry->d_name.len > 256) - return -EINVAL; - p = kmalloc (dentry->d_name.len + 1, GFP_KERNEL); - if (!p) - return -ENOMEM; - strncpy (p, dentry->d_name.name, dentry->d_name.len); - p [dentry->d_name.len] = 0; - lock_kernel(); - if (aliases_nodes == ALIASES_NNODES) { - kfree(p); - unlock_kernel(); - return -EIO; - } - alias_names [aliases_nodes++] = p; - inode = iget (dir->i_sb, - NODEP2INO(NODE(dir->i_ino).first_prop) + aliases_nodes); - if (!inode) { - unlock_kernel(); - return -EINVAL; - } - inode->i_mode = S_IFREG | S_IRUGO | S_IWUSR; - inode->i_fop = &openpromfs_prop_ops; - inode->i_nlink = 1; - if (inode->i_size < 0) inode->i_size = 0; - inode->u.generic_ip = (void *)(long)(((u16)aliases) | - (((u16)(aliases_nodes - 1)) << 16)); - unlock_kernel(); - d_instantiate(dentry, inode); + mutex_unlock(&op_mutex); return 0; } -static int openpromfs_unlink (struct inode *dir, struct dentry *dentry) -{ - unsigned int len; - char *p; - const char *name; - int i; - - name = dentry->d_name.name; - len = dentry->d_name.len; - lock_kernel(); - for (i = 0; i < aliases_nodes; i++) - if ((strlen (alias_names [i]) == len) - && !strncmp (name, alias_names[i], len)) { - char buffer[512]; - - p = alias_names [i]; - alias_names [i] = NULL; - kfree (p); - strcpy (buffer, "nvunalias "); - memcpy (buffer + 10, name, len); - buffer [10 + len] = 0; - prom_feval (buffer); - } - unlock_kernel(); - return 0; -} +static kmem_cache_t *op_inode_cachep; -/* {{{ init section */ -static int __init check_space (u16 n) +static struct inode *openprom_alloc_inode(struct super_block *sb) { - unsigned long pages; + struct op_inode_info *oi; - if ((1 << alloced) * PAGE_SIZE < (n + 2) * sizeof(openpromfs_node)) { - pages = __get_free_pages (GFP_KERNEL, alloced + 1); - if (!pages) - return -1; + oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL); + if (!oi) + return NULL; - if (nodes) { - memcpy ((char *)pages, nodes, - (1 << alloced) * PAGE_SIZE); - free_pages ((unsigned long)nodes, alloced); - } - alloced++; - nodes = (openpromfs_node *)pages; - } - return 0; + return &oi->vfs_inode; } -static u16 __init get_nodes (u16 parent, u32 node) +static void openprom_destroy_inode(struct inode *inode) { - char *p; - u16 n = last_node++, i; - char buffer[64]; - - if (check_space (n) < 0) - return 0xffff; - nodes[n].parent = parent; - nodes[n].node = node; - nodes[n].next = 0xffff; - nodes[n].child = 0xffff; - nodes[n].first_prop = first_prop++; - if (!parent) { - char buffer[8]; - int j; - - if ((j = prom_getproperty (node, "name", buffer, 8)) >= 0) { - buffer[j] = 0; - if (!strcmp (buffer, "options")) - options = n; - else if (!strcmp (buffer, "aliases")) - aliases = n; - } - } - if (n != aliases) - for (p = prom_firstprop (node, buffer); - p && p != (char *)-1 && *p; - p = prom_nextprop (node, p, buffer)) - first_prop++; - else { - char *q; - for (p = prom_firstprop (node, buffer); - p && p != (char *)-1 && *p; - p = prom_nextprop (node, p, buffer)) { - if (aliases_nodes == ALIASES_NNODES) - break; - for (i = 0; i < aliases_nodes; i++) - if (!strcmp (p, alias_names [i])) - break; - if (i < aliases_nodes) - continue; - q = kmalloc (strlen (p) + 1, GFP_KERNEL); - if (!q) - return 0xffff; - strcpy (q, p); - alias_names [aliases_nodes++] = q; - } - first_prop += ALIASES_NNODES; - } - node = prom_getchild (node); - if (node) { - parent = get_nodes (n, node); - if (parent == 0xffff) - return 0xffff; - nodes[n].child = parent; - while ((node = prom_getsibling (node)) != 0) { - i = get_nodes (n, node); - if (i == 0xffff) - return 0xffff; - nodes[parent].next = i; - parent = i; - } - } - return n; + kmem_cache_free(op_inode_cachep, OP_I(inode)); } static void openprom_read_inode(struct inode * inode) @@ -1031,6 +365,8 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data) } static struct super_operations openprom_sops = { + .alloc_inode = openprom_alloc_inode, + .destroy_inode = openprom_destroy_inode, .read_inode = openprom_read_inode, .statfs = simple_statfs, .remount_fs = openprom_remount, @@ -1038,7 +374,8 @@ static struct super_operations openprom_sops = { static int openprom_fill_super(struct super_block *s, void *data, int silent) { - struct inode * root_inode; + struct inode *root_inode; + struct op_inode_info *oi; s->s_flags |= MS_NOATIME; s->s_blocksize = 1024; @@ -1049,6 +386,11 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) root_inode = iget(s, OPENPROM_ROOT_INO); if (!root_inode) goto out_no_root; + + oi = OP_I(root_inode); + oi->type = op_inode_node; + oi->u.node = of_find_node_by_path("/"); + s->s_root = d_alloc_root(root_inode); if (!s->s_root) goto out_no_root; @@ -1073,29 +415,39 @@ static struct file_system_type openprom_fs_type = { .kill_sb = kill_anon_super, }; +static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags) +{ + struct op_inode_info *oi = (struct op_inode_info *) data; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + inode_init_once(&oi->vfs_inode); +} + static int __init init_openprom_fs(void) { - nodes = (openpromfs_node *)__get_free_pages(GFP_KERNEL, 0); - if (!nodes) { - printk (KERN_WARNING "openpromfs: can't get free page\n"); - return -EIO; - } - if (get_nodes (0xffff, prom_root_node) == 0xffff) { - printk (KERN_WARNING "openpromfs: couldn't setup tree\n"); - return -EIO; - } - nodes[last_node].first_prop = first_prop; - return register_filesystem(&openprom_fs_type); + int err; + + op_inode_cachep = kmem_cache_create("op_inode_cache", + sizeof(struct op_inode_info), + 0, + (SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD), + op_inode_init_once, NULL); + if (!op_inode_cachep) + return -ENOMEM; + + err = register_filesystem(&openprom_fs_type); + if (err) + kmem_cache_destroy(op_inode_cachep); + + return err; } static void __exit exit_openprom_fs(void) { - int i; unregister_filesystem(&openprom_fs_type); - free_pages ((unsigned long)nodes, alloced); - for (i = 0; i < aliases_nodes; i++) - kfree (alias_names [i]); - nodes = NULL; + kmem_cache_destroy(op_inode_cachep); } module_init(init_openprom_fs) diff --git a/fs/pnode.c b/fs/pnode.c index 37b568ed0e0..da42ee61c1d 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -53,8 +53,7 @@ static int do_make_slave(struct vfsmount *mnt) if (master) { list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) slave_mnt->mnt_master = master; - list_del(&mnt->mnt_slave); - list_add(&mnt->mnt_slave, &master->mnt_slave_list); + list_move(&mnt->mnt_slave, &master->mnt_slave_list); list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); INIT_LIST_HEAD(&mnt->mnt_slave_list); } else { @@ -283,10 +282,8 @@ static void __propagate_umount(struct vfsmount *mnt) * umount the child only if the child has no * other children */ - if (child && list_empty(&child->mnt_mounts)) { - list_del(&child->mnt_hash); - list_add_tail(&child->mnt_hash, &mnt->mnt_hash); - } + if (child && list_empty(&child->mnt_mounts)) + list_move_tail(&child->mnt_hash, &mnt->mnt_hash); } } diff --git a/fs/proc/base.c b/fs/proc/base.c index 6afff725a8c..6ba7785319d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -74,6 +74,16 @@ #include <linux/poll.h> #include "internal.h" +/* NOTE: + * Implementing inode permission operations in /proc is almost + * certainly an error. Permission checks need to happen during + * each system call not at open time. The reason is that most of + * what we wish to check for permissions in /proc varies at runtime. + * + * The classic example of a problem is opening file descriptors + * in /proc for a task before it execs a suid executable. + */ + /* * For hysterical raisins we keep the same inumbers as in the old procfs. * Feel free to change the macro below - just keep the range distinct from @@ -121,6 +131,8 @@ enum pid_directory_inos { PROC_TGID_ATTR_PREV, PROC_TGID_ATTR_EXEC, PROC_TGID_ATTR_FSCREATE, + PROC_TGID_ATTR_KEYCREATE, + PROC_TGID_ATTR_SOCKCREATE, #endif #ifdef CONFIG_AUDITSYSCALL PROC_TGID_LOGINUID, @@ -162,6 +174,8 @@ enum pid_directory_inos { PROC_TID_ATTR_PREV, PROC_TID_ATTR_EXEC, PROC_TID_ATTR_FSCREATE, + PROC_TID_ATTR_KEYCREATE, + PROC_TID_ATTR_SOCKCREATE, #endif #ifdef CONFIG_AUDITSYSCALL PROC_TID_LOGINUID, @@ -173,6 +187,9 @@ enum pid_directory_inos { PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; +/* Worst case buffer size needed for holding an integer. */ +#define PROC_NUMBUF 10 + struct pid_entry { int type; int len; @@ -275,6 +292,8 @@ static struct pid_entry tgid_attr_stuff[] = { E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), {0,0,NULL,0} }; static struct pid_entry tid_attr_stuff[] = { @@ -282,6 +301,8 @@ static struct pid_entry tid_attr_stuff[] = { E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), {0,0,NULL,0} }; #endif @@ -290,12 +311,15 @@ static struct pid_entry tid_attr_stuff[] = { static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - struct task_struct *task = proc_task(inode); - struct files_struct *files; + struct task_struct *task = get_proc_task(inode); + struct files_struct *files = NULL; struct file *file; - int fd = proc_type(inode) - PROC_TID_FD_DIR; + int fd = proc_fd(inode); - files = get_files_struct(task); + if (task) { + files = get_files_struct(task); + put_task_struct(task); + } if (files) { /* * We are not taking a ref to the file structure, so we must @@ -327,29 +351,33 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) return fs; } -static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int get_nr_threads(struct task_struct *tsk) { - struct fs_struct *fs = get_fs_struct(proc_task(inode)); - int result = -ENOENT; - if (fs) { - read_lock(&fs->lock); - *mnt = mntget(fs->pwdmnt); - *dentry = dget(fs->pwd); - read_unlock(&fs->lock); - result = 0; - put_fs_struct(fs); + /* Must be called with the rcu_read_lock held */ + unsigned long flags; + int count = 0; + + if (lock_task_sighand(tsk, &flags)) { + count = atomic_read(&tsk->signal->count); + unlock_task_sighand(tsk, &flags); } - return result; + return count; } -static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - struct fs_struct *fs = get_fs_struct(proc_task(inode)); + struct task_struct *task = get_proc_task(inode); + struct fs_struct *fs = NULL; int result = -ENOENT; + + if (task) { + fs = get_fs_struct(task); + put_task_struct(task); + } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->rootmnt); - *dentry = dget(fs->root); + *mnt = mntget(fs->pwdmnt); + *dentry = dget(fs->pwd); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -357,42 +385,16 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf return result; } - -/* Same as proc_root_link, but this addionally tries to get fs from other - * threads in the group */ -static int proc_task_root_link(struct inode *inode, struct dentry **dentry, - struct vfsmount **mnt) +static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - struct fs_struct *fs; + struct task_struct *task = get_proc_task(inode); + struct fs_struct *fs = NULL; int result = -ENOENT; - struct task_struct *leader = proc_task(inode); - task_lock(leader); - fs = leader->fs; - if (fs) { - atomic_inc(&fs->count); - task_unlock(leader); - } else { - /* Try to get fs from other threads */ - task_unlock(leader); - read_lock(&tasklist_lock); - if (pid_alive(leader)) { - struct task_struct *task = leader; - - while ((task = next_thread(task)) != leader) { - task_lock(task); - fs = task->fs; - if (fs) { - atomic_inc(&fs->count); - task_unlock(task); - break; - } - task_unlock(task); - } - } - read_unlock(&tasklist_lock); + if (task) { + fs = get_fs_struct(task); + put_task_struct(task); } - if (fs) { read_lock(&fs->lock); *mnt = mntget(fs->rootmnt); @@ -404,7 +406,6 @@ static int proc_task_root_link(struct inode *inode, struct dentry **dentry, return result; } - #define MAY_PTRACE(task) \ (task == current || \ (task->parent == current && \ @@ -535,142 +536,22 @@ static int proc_oom_score(struct task_struct *task, char *buffer) /************************************************************************/ /* permission checks */ - -/* If the process being read is separated by chroot from the reading process, - * don't let the reader access the threads. - * - * note: this does dput(root) and mntput(vfsmnt) on exit. - */ -static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) -{ - struct dentry *de, *base; - struct vfsmount *our_vfsmnt, *mnt; - int res = 0; - - read_lock(¤t->fs->lock); - our_vfsmnt = mntget(current->fs->rootmnt); - base = dget(current->fs->root); - read_unlock(¤t->fs->lock); - - spin_lock(&vfsmount_lock); - de = root; - mnt = vfsmnt; - - while (mnt != our_vfsmnt) { - if (mnt == mnt->mnt_parent) - goto out; - de = mnt->mnt_mountpoint; - mnt = mnt->mnt_parent; - } - - if (!is_subdir(de, base)) - goto out; - spin_unlock(&vfsmount_lock); - -exit: - dput(base); - mntput(our_vfsmnt); - dput(root); - mntput(vfsmnt); - return res; -out: - spin_unlock(&vfsmount_lock); - res = -EACCES; - goto exit; -} - -static int proc_check_root(struct inode *inode) -{ - struct dentry *root; - struct vfsmount *vfsmnt; - - if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ - return -ENOENT; - return proc_check_chroot(root, vfsmnt); -} - -static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - if (generic_permission(inode, mask, NULL) != 0) - return -EACCES; - return proc_check_root(inode); -} - -static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - struct dentry *root; - struct vfsmount *vfsmnt; - - if (generic_permission(inode, mask, NULL) != 0) - return -EACCES; - - if (proc_task_root_link(inode, &root, &vfsmnt)) - return -ENOENT; - - return proc_check_chroot(root, vfsmnt); -} - -extern struct seq_operations proc_pid_maps_op; -static int maps_open(struct inode *inode, struct file *file) -{ - struct task_struct *task = proc_task(inode); - int ret = seq_open(file, &proc_pid_maps_op); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = task; - } - return ret; -} - -static struct file_operations proc_maps_operations = { - .open = maps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -#ifdef CONFIG_NUMA -extern struct seq_operations proc_pid_numa_maps_op; -static int numa_maps_open(struct inode *inode, struct file *file) -{ - struct task_struct *task = proc_task(inode); - int ret = seq_open(file, &proc_pid_numa_maps_op); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = task; - } - return ret; -} - -static struct file_operations proc_numa_maps_operations = { - .open = numa_maps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif - -#ifdef CONFIG_MMU -extern struct seq_operations proc_pid_smaps_op; -static int smaps_open(struct inode *inode, struct file *file) +static int proc_fd_access_allowed(struct inode *inode) { - struct task_struct *task = proc_task(inode); - int ret = seq_open(file, &proc_pid_smaps_op); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = task; + struct task_struct *task; + int allowed = 0; + /* Allow access to a task's file descriptors if it is us or we + * may use ptrace attach to the process and find out that + * information. + */ + task = get_proc_task(inode); + if (task) { + allowed = ptrace_may_attach(task); + put_task_struct(task); } - return ret; + return allowed; } -static struct file_operations proc_smaps_operations = { - .open = smaps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif - extern struct seq_operations mounts_op; struct proc_mounts { struct seq_file m; @@ -679,16 +560,19 @@ struct proc_mounts { static int mounts_open(struct inode *inode, struct file *file) { - struct task_struct *task = proc_task(inode); - struct namespace *namespace; + struct task_struct *task = get_proc_task(inode); + struct namespace *namespace = NULL; struct proc_mounts *p; int ret = -EINVAL; - task_lock(task); - namespace = task->namespace; - if (namespace) - get_namespace(namespace); - task_unlock(task); + if (task) { + task_lock(task); + namespace = task->namespace; + if (namespace) + get_namespace(namespace); + task_unlock(task); + put_task_struct(task); + } if (namespace) { ret = -ENOMEM; @@ -745,17 +629,21 @@ static struct file_operations proc_mounts_operations = { extern struct seq_operations mountstats_op; static int mountstats_open(struct inode *inode, struct file *file) { - struct task_struct *task = proc_task(inode); int ret = seq_open(file, &mountstats_op); if (!ret) { struct seq_file *m = file->private_data; - struct namespace *namespace; - task_lock(task); - namespace = task->namespace; - if (namespace) - get_namespace(namespace); - task_unlock(task); + struct namespace *namespace = NULL; + struct task_struct *task = get_proc_task(inode); + + if (task) { + task_lock(task); + namespace = task->namespace; + if (namespace) + get_namespace(namespace); + task_unlock(task); + put_task_struct(task); + } if (namespace) m->private = namespace; @@ -782,18 +670,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf, struct inode * inode = file->f_dentry->d_inode; unsigned long page; ssize_t length; - struct task_struct *task = proc_task(inode); + struct task_struct *task = get_proc_task(inode); + + length = -ESRCH; + if (!task) + goto out_no_task; if (count > PROC_BLOCK_SIZE) count = PROC_BLOCK_SIZE; + + length = -ENOMEM; if (!(page = __get_free_page(GFP_KERNEL))) - return -ENOMEM; + goto out; length = PROC_I(inode)->op.proc_read(task, (char*)page); if (length >= 0) length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); free_page(page); +out: + put_task_struct(task); +out_no_task: return length; } @@ -810,12 +707,15 @@ static int mem_open(struct inode* inode, struct file* file) static ssize_t mem_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct task_struct *task = proc_task(file->f_dentry->d_inode); + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); char *page; unsigned long src = *ppos; int ret = -ESRCH; struct mm_struct *mm; + if (!task) + goto out_no_task; + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) goto out; @@ -865,6 +765,8 @@ out_put: out_free: free_page((unsigned long) page); out: + put_task_struct(task); +out_no_task: return ret; } @@ -877,15 +779,20 @@ static ssize_t mem_write(struct file * file, const char * buf, { int copied = 0; char *page; - struct task_struct *task = proc_task(file->f_dentry->d_inode); + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); unsigned long dst = *ppos; + copied = -ESRCH; + if (!task) + goto out_no_task; + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) - return -ESRCH; + goto out; + copied = -ENOMEM; page = (char *)__get_free_page(GFP_USER); if (!page) - return -ENOMEM; + goto out; while (count > 0) { int this_len, retval; @@ -908,6 +815,9 @@ static ssize_t mem_write(struct file * file, const char * buf, } *ppos = dst; free_page((unsigned long) page); +out: + put_task_struct(task); +out_no_task: return copied; } #endif @@ -938,13 +848,18 @@ static struct file_operations proc_mem_operations = { static ssize_t oom_adjust_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = proc_task(file->f_dentry->d_inode); - char buffer[8]; + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + char buffer[PROC_NUMBUF]; size_t len; - int oom_adjust = task->oomkilladj; + int oom_adjust; loff_t __ppos = *ppos; - len = sprintf(buffer, "%i\n", oom_adjust); + if (!task) + return -ESRCH; + oom_adjust = task->oomkilladj; + put_task_struct(task); + + len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); if (__ppos >= len) return 0; if (count > len-__ppos) @@ -958,15 +873,15 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, static ssize_t oom_adjust_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = proc_task(file->f_dentry->d_inode); - char buffer[8], *end; + struct task_struct *task; + char buffer[PROC_NUMBUF], *end; int oom_adjust; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - memset(buffer, 0, 8); - if (count > 6) - count = 6; + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; oom_adjust = simple_strtol(buffer, &end, 0); @@ -974,7 +889,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, return -EINVAL; if (*end == '\n') end++; + task = get_proc_task(file->f_dentry->d_inode); + if (!task) + return -ESRCH; task->oomkilladj = oom_adjust; + put_task_struct(task); if (end - buffer == 0) return -EIO; return end - buffer; @@ -985,22 +904,21 @@ static struct file_operations proc_oom_adjust_operations = { .write = oom_adjust_write, }; -static struct inode_operations proc_mem_inode_operations = { - .permission = proc_permission, -}; - #ifdef CONFIG_AUDITSYSCALL #define TMPBUFLEN 21 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file->f_dentry->d_inode; - struct task_struct *task = proc_task(inode); + struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; + if (!task) + return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", audit_get_loginuid(task->audit_context)); + put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -1010,13 +928,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, struct inode * inode = file->f_dentry->d_inode; char *page, *tmp; ssize_t length; - struct task_struct *task = proc_task(inode); uid_t loginuid; if (!capable(CAP_AUDIT_CONTROL)) return -EPERM; - if (current != task) + if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) return -EPERM; if (count >= PAGE_SIZE) @@ -1040,7 +957,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - length = audit_set_loginuid(task, loginuid); + length = audit_set_loginuid(current, loginuid); if (likely(length == 0)) length = count; @@ -1059,13 +976,16 @@ static struct file_operations proc_loginuid_operations = { static ssize_t seccomp_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *tsk = proc_task(file->f_dentry->d_inode); + struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); char __buf[20]; loff_t __ppos = *ppos; size_t len; + if (!tsk) + return -ESRCH; /* no need to print the trailing zero, so use only len */ len = sprintf(__buf, "%u\n", tsk->seccomp.mode); + put_task_struct(tsk); if (__ppos >= len) return 0; if (count > len - __ppos) @@ -1079,29 +999,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf, static ssize_t seccomp_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *tsk = proc_task(file->f_dentry->d_inode); + struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); char __buf[20], *end; unsigned int seccomp_mode; + ssize_t result; + + result = -ESRCH; + if (!tsk) + goto out_no_task; /* can set it only once to be even more secure */ + result = -EPERM; if (unlikely(tsk->seccomp.mode)) - return -EPERM; + goto out; + result = -EFAULT; memset(__buf, 0, sizeof(__buf)); count = min(count, sizeof(__buf) - 1); if (copy_from_user(__buf, buf, count)) - return -EFAULT; + goto out; + seccomp_mode = simple_strtoul(__buf, &end, 0); if (*end == '\n') end++; + result = -EINVAL; if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { tsk->seccomp.mode = seccomp_mode; set_tsk_thread_flag(tsk, TIF_SECCOMP); } else - return -EINVAL; + goto out; + result = -EIO; if (unlikely(!(end - __buf))) - return -EIO; - return end - __buf; + goto out; + result = end - __buf; +out: + put_task_struct(tsk); +out_no_task: + return result; } static struct file_operations proc_seccomp_operations = { @@ -1118,10 +1052,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) /* We don't need a base pointer in the /proc filesystem */ path_release(nd); - if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) - goto out; - error = proc_check_root(inode); - if (error) + /* Are we allowed to snoop on the tasks file descriptors? */ + if (!proc_fd_access_allowed(inode)) goto out; error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); @@ -1163,12 +1095,8 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b struct dentry *de; struct vfsmount *mnt = NULL; - lock_kernel(); - - if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) - goto out; - error = proc_check_root(inode); - if (error) + /* Are we allowed to snoop on the tasks file descriptors? */ + if (!proc_fd_access_allowed(inode)) goto out; error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); @@ -1179,7 +1107,6 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b dput(de); mntput(mnt); out: - unlock_kernel(); return error; } @@ -1188,21 +1115,20 @@ static struct inode_operations proc_pid_link_inode_operations = { .follow_link = proc_pid_follow_link }; -#define NUMBUF 10 - static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) { - struct inode *inode = filp->f_dentry->d_inode; - struct task_struct *p = proc_task(inode); + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + struct task_struct *p = get_proc_task(inode); unsigned int fd, tid, ino; int retval; - char buf[NUMBUF]; + char buf[PROC_NUMBUF]; struct files_struct * files; struct fdtable *fdt; retval = -ENOENT; - if (!pid_alive(p)) - goto out; + if (!p) + goto out_no_task; retval = 0; tid = p->pid; @@ -1213,7 +1139,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) goto out; filp->f_pos++; case 1: - ino = fake_ino(tid, PROC_TID_INO); + ino = parent_ino(dentry); if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) goto out; filp->f_pos++; @@ -1232,7 +1158,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) continue; rcu_read_unlock(); - j = NUMBUF; + j = PROC_NUMBUF; i = fd; do { j--; @@ -1241,7 +1167,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) } while (i); ino = fake_ino(tid, PROC_TID_FD_DIR + fd); - if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { + if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) { rcu_read_lock(); break; } @@ -1251,6 +1177,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) put_files_struct(files); } out: + put_task_struct(p); +out_no_task: return retval; } @@ -1262,16 +1190,18 @@ static int proc_pident_readdir(struct file *filp, int pid; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; + struct task_struct *task = get_proc_task(inode); struct pid_entry *p; ino_t ino; int ret; ret = -ENOENT; - if (!pid_alive(proc_task(inode))) + if (!task) goto out; ret = 0; - pid = proc_task(inode)->pid; + pid = task->pid; + put_task_struct(task); i = filp->f_pos; switch (i) { case 0: @@ -1354,22 +1284,19 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st /* Common stuff */ ei = PROC_I(inode); - ei->task = NULL; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(task->pid, ino); - if (!pid_alive(task)) - goto out_unlock; - /* * grab the reference to task. */ - get_task_struct(task); - ei->task = task; - ei->type = ino; + ei->pid = get_pid(task->pids[PIDTYPE_PID].pid); + if (!ei->pid) + goto out_unlock; + inode->i_uid = 0; inode->i_gid = 0; - if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { + if (task_dumpable(task)) { inode->i_uid = task->euid; inode->i_gid = task->egid; } @@ -1379,7 +1306,6 @@ out: return inode; out_unlock: - ei->pde = NULL; iput(inode); return NULL; } @@ -1393,13 +1319,21 @@ out_unlock: * * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. + * + * Before the /proc/pid/status file was created the only way to read + * the effective uid of a /process was to stat /proc/pid. Reading + * /proc/pid/status is slow enough that procps and other packages + * kept stating /proc/pid. To keep the rules in /proc simple I have + * made this apply to all per process world readable and executable + * directories. */ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; - struct task_struct *task = proc_task(inode); - if (pid_alive(task)) { - if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { + struct task_struct *task = get_proc_task(inode); + if (task) { + if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || + task_dumpable(task)) { inode->i_uid = task->euid; inode->i_gid = task->egid; } else { @@ -1407,59 +1341,75 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = 0; } security_task_to_inode(task, inode); + put_task_struct(task); return 1; } d_drop(dentry); return 0; } +static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct task_struct *task; + generic_fillattr(inode, stat); + + rcu_read_lock(); + stat->uid = 0; + stat->gid = 0; + task = pid_task(proc_pid(inode), PIDTYPE_PID); + if (task) { + if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || + task_dumpable(task)) { + stat->uid = task->euid; + stat->gid = task->egid; + } + } + rcu_read_unlock(); + return 0; +} + static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; - struct task_struct *task = proc_task(inode); - int fd = proc_type(inode) - PROC_TID_FD_DIR; + struct task_struct *task = get_proc_task(inode); + int fd = proc_fd(inode); struct files_struct *files; - files = get_files_struct(task); - if (files) { - rcu_read_lock(); - if (fcheck_files(files, fd)) { + if (task) { + files = get_files_struct(task); + if (files) { + rcu_read_lock(); + if (fcheck_files(files, fd)) { + rcu_read_unlock(); + put_files_struct(files); + if (task_dumpable(task)) { + inode->i_uid = task->euid; + inode->i_gid = task->egid; + } else { + inode->i_uid = 0; + inode->i_gid = 0; + } + security_task_to_inode(task, inode); + put_task_struct(task); + return 1; + } rcu_read_unlock(); put_files_struct(files); - if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; - } else { - inode->i_uid = 0; - inode->i_gid = 0; - } - security_task_to_inode(task, inode); - return 1; } - rcu_read_unlock(); - put_files_struct(files); + put_task_struct(task); } d_drop(dentry); return 0; } -static void pid_base_iput(struct dentry *dentry, struct inode *inode) -{ - struct task_struct *task = proc_task(inode); - spin_lock(&task->proc_lock); - if (task->proc_dentry == dentry) - task->proc_dentry = NULL; - spin_unlock(&task->proc_lock); - iput(inode); -} - static int pid_delete_dentry(struct dentry * dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ - return !pid_alive(proc_task(dentry->d_inode)); + return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; } static struct dentry_operations tid_fd_dentry_operations = @@ -1474,13 +1424,6 @@ static struct dentry_operations pid_dentry_operations = .d_delete = pid_delete_dentry, }; -static struct dentry_operations pid_base_dentry_operations = -{ - .d_revalidate = pid_revalidate, - .d_iput = pid_base_iput, - .d_delete = pid_delete_dentry, -}; - /* Lookups */ static unsigned name_to_int(struct dentry *dentry) @@ -1508,22 +1451,24 @@ out: /* SMP-safe */ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { - struct task_struct *task = proc_task(dir); + struct task_struct *task = get_proc_task(dir); unsigned fd = name_to_int(dentry); + struct dentry *result = ERR_PTR(-ENOENT); struct file * file; struct files_struct * files; struct inode *inode; struct proc_inode *ei; + if (!task) + goto out_no_task; if (fd == ~0U) goto out; - if (!pid_alive(task)) - goto out; inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); if (!inode) goto out; ei = PROC_I(inode); + ei->fd = fd; files = get_files_struct(task); if (!files) goto out_unlock; @@ -1548,19 +1493,25 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, ei->op.proc_get_link = proc_fd_link; dentry->d_op = &tid_fd_dentry_operations; d_add(dentry, inode); - return NULL; + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, NULL)) + result = NULL; +out: + put_task_struct(task); +out_no_task: + return result; out_unlock2: spin_unlock(&files->file_lock); put_files_struct(files); out_unlock: iput(inode); -out: - return ERR_PTR(-ENOENT); + goto out; } static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); +static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); static struct file_operations proc_fd_operations = { .read = generic_read_dir, @@ -1577,12 +1528,11 @@ static struct file_operations proc_task_operations = { */ static struct inode_operations proc_fd_inode_operations = { .lookup = proc_lookupfd, - .permission = proc_permission, }; static struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, - .permission = proc_task_permission, + .getattr = proc_task_getattr, }; #ifdef CONFIG_SECURITY @@ -1592,12 +1542,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, struct inode * inode = file->f_dentry->d_inode; unsigned long page; ssize_t length; - struct task_struct *task = proc_task(inode); + struct task_struct *task = get_proc_task(inode); + + length = -ESRCH; + if (!task) + goto out_no_task; if (count > PAGE_SIZE) count = PAGE_SIZE; + length = -ENOMEM; if (!(page = __get_free_page(GFP_KERNEL))) - return -ENOMEM; + goto out; length = security_getprocattr(task, (char*)file->f_dentry->d_name.name, @@ -1605,6 +1560,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, if (length >= 0) length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); free_page(page); +out: + put_task_struct(task); +out_no_task: return length; } @@ -1614,26 +1572,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, struct inode * inode = file->f_dentry->d_inode; char *page; ssize_t length; - struct task_struct *task = proc_task(inode); + struct task_struct *task = get_proc_task(inode); + length = -ESRCH; + if (!task) + goto out_no_task; if (count > PAGE_SIZE) count = PAGE_SIZE; - if (*ppos != 0) { - /* No partial writes. */ - return -EINVAL; - } + + /* No partial writes. */ + length = -EINVAL; + if (*ppos != 0) + goto out; + + length = -ENOMEM; page = (char*)__get_free_page(GFP_USER); if (!page) - return -ENOMEM; + goto out; + length = -EFAULT; if (copy_from_user(page, buf, count)) - goto out; + goto out_free; length = security_setprocattr(task, (char*)file->f_dentry->d_name.name, (void*)page, count); -out: +out_free: free_page((unsigned long) page); +out: + put_task_struct(task); +out_no_task: return length; } @@ -1648,24 +1616,22 @@ static struct file_operations proc_tgid_attr_operations; static struct inode_operations proc_tgid_attr_inode_operations; #endif -static int get_tid_list(int index, unsigned int *tids, struct inode *dir); - /* SMP-safe */ static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, struct pid_entry *ents) { struct inode *inode; - int error; - struct task_struct *task = proc_task(dir); + struct dentry *error; + struct task_struct *task = get_proc_task(dir); struct pid_entry *p; struct proc_inode *ei; - error = -ENOENT; + error = ERR_PTR(-ENOENT); inode = NULL; - if (!pid_alive(task)) - goto out; + if (!task) + goto out_no_task; for (p = ents; p->name; p++) { if (p->len != dentry->d_name.len) @@ -1676,7 +1642,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, if (!p->name) goto out; - error = -EINVAL; + error = ERR_PTR(-EINVAL); inode = proc_pid_make_inode(dir->i_sb, task, p->type); if (!inode) goto out; @@ -1689,7 +1655,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, */ switch(p->type) { case PROC_TGID_TASK: - inode->i_nlink = 2 + get_tid_list(2, NULL, dir); + inode->i_nlink = 2; inode->i_op = &proc_task_inode_operations; inode->i_fop = &proc_task_operations; break; @@ -1759,7 +1725,6 @@ static struct dentry *proc_pident_lookup(struct inode *dir, #endif case PROC_TID_MEM: case PROC_TGID_MEM: - inode->i_op = &proc_mem_inode_operations; inode->i_fop = &proc_mem_operations; break; #ifdef CONFIG_SECCOMP @@ -1801,6 +1766,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_ATTR_EXEC: case PROC_TID_ATTR_FSCREATE: case PROC_TGID_ATTR_FSCREATE: + case PROC_TID_ATTR_KEYCREATE: + case PROC_TGID_ATTR_KEYCREATE: + case PROC_TID_ATTR_SOCKCREATE: + case PROC_TGID_ATTR_SOCKCREATE: inode->i_fop = &proc_pid_attr_operations; break; #endif @@ -1842,14 +1811,18 @@ static struct dentry *proc_pident_lookup(struct inode *dir, default: printk("procfs: impossible type (%d)",p->type); iput(inode); - return ERR_PTR(-EINVAL); + error = ERR_PTR(-EINVAL); + goto out; } dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); - return NULL; - + /* Close the race of the process dying before we return the dentry */ + if (pid_revalidate(dentry, NULL)) + error = NULL; out: - return ERR_PTR(error); + put_task_struct(task); +out_no_task: + return error; } static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ @@ -1872,10 +1845,12 @@ static struct file_operations proc_tid_base_operations = { static struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, + .getattr = pid_getattr, }; static struct inode_operations proc_tid_base_inode_operations = { .lookup = proc_tid_base_lookup, + .getattr = pid_getattr, }; #ifdef CONFIG_SECURITY @@ -1917,10 +1892,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir, static struct inode_operations proc_tgid_attr_inode_operations = { .lookup = proc_tgid_attr_lookup, + .getattr = pid_getattr, }; static struct inode_operations proc_tid_attr_inode_operations = { .lookup = proc_tid_attr_lookup, + .getattr = pid_getattr, }; #endif @@ -1930,14 +1907,14 @@ static struct inode_operations proc_tid_attr_inode_operations = { static int proc_self_readlink(struct dentry *dentry, char __user *buffer, int buflen) { - char tmp[30]; + char tmp[PROC_NUMBUF]; sprintf(tmp, "%d", current->tgid); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { - char tmp[30]; + char tmp[PROC_NUMBUF]; sprintf(tmp, "%d", current->tgid); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -1948,67 +1925,80 @@ static struct inode_operations proc_self_inode_operations = { }; /** - * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. - * @p: task that should be flushed. + * proc_flush_task - Remove dcache entries for @task from the /proc dcache. + * + * @task: task that should be flushed. + * + * Looks in the dcache for + * /proc/@pid + * /proc/@tgid/task/@pid + * if either directory is present flushes it and all of it'ts children + * from the dcache. * - * Drops the /proc/@pid dcache entry from the hash chains. + * It is safe and reasonable to cache /proc entries for a task until + * that task exits. After that they just clog up the dcache with + * useless entries, possibly causing useful dcache entries to be + * flushed instead. This routine is proved to flush those useless + * dcache entries at process exit time. * - * Dropping /proc/@pid entries and detach_pid must be synchroneous, - * otherwise e.g. /proc/@pid/exe might point to the wrong executable, - * if the pid value is immediately reused. This is enforced by - * - caller must acquire spin_lock(p->proc_lock) - * - must be called before detach_pid() - * - proc_pid_lookup acquires proc_lock, and checks that - * the target is not dead by looking at the attach count - * of PIDTYPE_PID. + * NOTE: This routine is just an optimization so it does not guarantee + * that no dcache entries will exist at process exit time it + * just makes it very unlikely that any will persist. */ - -struct dentry *proc_pid_unhash(struct task_struct *p) +void proc_flush_task(struct task_struct *task) { - struct dentry *proc_dentry; + struct dentry *dentry, *leader, *dir; + char buf[PROC_NUMBUF]; + struct qstr name; + + name.name = buf; + name.len = snprintf(buf, sizeof(buf), "%d", task->pid); + dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); + if (dentry) { + shrink_dcache_parent(dentry); + d_drop(dentry); + dput(dentry); + } - proc_dentry = p->proc_dentry; - if (proc_dentry != NULL) { + if (thread_group_leader(task)) + goto out; - spin_lock(&dcache_lock); - spin_lock(&proc_dentry->d_lock); - if (!d_unhashed(proc_dentry)) { - dget_locked(proc_dentry); - __d_drop(proc_dentry); - spin_unlock(&proc_dentry->d_lock); - } else { - spin_unlock(&proc_dentry->d_lock); - proc_dentry = NULL; - } - spin_unlock(&dcache_lock); - } - return proc_dentry; -} + name.name = buf; + name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); + leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); + if (!leader) + goto out; -/** - * proc_pid_flush - recover memory used by stale /proc/@pid/x entries - * @proc_dentry: directoy to prune. - * - * Shrink the /proc directory that was used by the just killed thread. - */ - -void proc_pid_flush(struct dentry *proc_dentry) -{ - might_sleep(); - if(proc_dentry != NULL) { - shrink_dcache_parent(proc_dentry); - dput(proc_dentry); + name.name = "task"; + name.len = strlen(name.name); + dir = d_hash_and_lookup(leader, &name); + if (!dir) + goto out_put_leader; + + name.name = buf; + name.len = snprintf(buf, sizeof(buf), "%d", task->pid); + dentry = d_hash_and_lookup(dir, &name); + if (dentry) { + shrink_dcache_parent(dentry); + d_drop(dentry); + dput(dentry); } + + dput(dir); +out_put_leader: + dput(leader); +out: + return; } /* SMP-safe */ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { + struct dentry *result = ERR_PTR(-ENOENT); struct task_struct *task; struct inode *inode; struct proc_inode *ei; unsigned tgid; - int died; if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { inode = new_inode(dir->i_sb); @@ -2029,21 +2019,18 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct if (tgid == ~0U) goto out; - read_lock(&tasklist_lock); + rcu_read_lock(); task = find_task_by_pid(tgid); if (task) get_task_struct(task); - read_unlock(&tasklist_lock); + rcu_read_unlock(); if (!task) goto out; inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + if (!inode) + goto out_put_task; - - if (!inode) { - put_task_struct(task); - goto out; - } inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; @@ -2054,45 +2041,40 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct inode->i_nlink = 4; #endif - dentry->d_op = &pid_base_dentry_operations; + dentry->d_op = &pid_dentry_operations; - died = 0; d_add(dentry, inode); - spin_lock(&task->proc_lock); - task->proc_dentry = dentry; - if (!pid_alive(task)) { - dentry = proc_pid_unhash(task); - died = 1; - } - spin_unlock(&task->proc_lock); + /* Close the race of the process dying before we return the dentry */ + if (pid_revalidate(dentry, NULL)) + result = NULL; +out_put_task: put_task_struct(task); - if (died) { - proc_pid_flush(dentry); - goto out; - } - return NULL; out: - return ERR_PTR(-ENOENT); + return result; } /* SMP-safe */ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { + struct dentry *result = ERR_PTR(-ENOENT); struct task_struct *task; - struct task_struct *leader = proc_task(dir); + struct task_struct *leader = get_proc_task(dir); struct inode *inode; unsigned tid; + if (!leader) + goto out_no_task; + tid = name_to_int(dentry); if (tid == ~0U) goto out; - read_lock(&tasklist_lock); + rcu_read_lock(); task = find_task_by_pid(tid); if (task) get_task_struct(task); - read_unlock(&tasklist_lock); + rcu_read_unlock(); if (!task) goto out; if (leader->tgid != task->tgid) @@ -2113,101 +2095,95 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry inode->i_nlink = 3; #endif - dentry->d_op = &pid_base_dentry_operations; + dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); + /* Close the race of the process dying before we return the dentry */ + if (pid_revalidate(dentry, NULL)) + result = NULL; - put_task_struct(task); - return NULL; out_drop_task: put_task_struct(task); out: - return ERR_PTR(-ENOENT); + put_task_struct(leader); +out_no_task: + return result; } -#define PROC_NUMBUF 10 -#define PROC_MAXPIDS 20 - /* - * Get a few tgid's to return for filldir - we need to hold the - * tasklist lock while doing this, and we must release it before - * we actually do the filldir itself, so we use a temp buffer.. + * Find the first tgid to return to user space. + * + * Usually this is just whatever follows &init_task, but if the users + * buffer was too small to hold the full list or there was a seek into + * the middle of the directory we have more work to do. + * + * In the case of a short read we start with find_task_by_pid. + * + * In the case of a seek we start with &init_task and walk nr + * threads past it. */ -static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) -{ - struct task_struct *p; - int nr_tgids = 0; - - index--; - read_lock(&tasklist_lock); - p = NULL; - if (version) { - p = find_task_by_pid(version); - if (p && !thread_group_leader(p)) - p = NULL; +static struct task_struct *first_tgid(int tgid, unsigned int nr) +{ + struct task_struct *pos; + rcu_read_lock(); + if (tgid && nr) { + pos = find_task_by_pid(tgid); + if (pos && thread_group_leader(pos)) + goto found; } + /* If nr exceeds the number of processes get out quickly */ + pos = NULL; + if (nr && nr >= nr_processes()) + goto done; - if (p) - index = 0; - else - p = next_task(&init_task); - - for ( ; p != &init_task; p = next_task(p)) { - int tgid = p->pid; - if (!pid_alive(p)) - continue; - if (--index >= 0) - continue; - tgids[nr_tgids] = tgid; - nr_tgids++; - if (nr_tgids >= PROC_MAXPIDS) - break; + /* If we haven't found our starting place yet start with + * the init_task and walk nr tasks forward. + */ + for (pos = next_task(&init_task); nr > 0; --nr) { + pos = next_task(pos); + if (pos == &init_task) { + pos = NULL; + goto done; + } } - read_unlock(&tasklist_lock); - return nr_tgids; +found: + get_task_struct(pos); +done: + rcu_read_unlock(); + return pos; } /* - * Get a few tid's to return for filldir - we need to hold the - * tasklist lock while doing this, and we must release it before - * we actually do the filldir itself, so we use a temp buffer.. + * Find the next task in the task list. + * Return NULL if we loop or there is any error. + * + * The reference to the input task_struct is released. */ -static int get_tid_list(int index, unsigned int *tids, struct inode *dir) -{ - struct task_struct *leader_task = proc_task(dir); - struct task_struct *task = leader_task; - int nr_tids = 0; - - index -= 2; - read_lock(&tasklist_lock); - /* - * The starting point task (leader_task) might be an already - * unlinked task, which cannot be used to access the task-list - * via next_thread(). - */ - if (pid_alive(task)) do { - int tid = task->pid; - - if (--index >= 0) - continue; - if (tids != NULL) - tids[nr_tids] = tid; - nr_tids++; - if (nr_tids >= PROC_MAXPIDS) - break; - } while ((task = next_thread(task)) != leader_task); - read_unlock(&tasklist_lock); - return nr_tids; +static struct task_struct *next_tgid(struct task_struct *start) +{ + struct task_struct *pos; + rcu_read_lock(); + pos = start; + if (pid_alive(start)) + pos = next_task(start); + if (pid_alive(pos) && (pos != &init_task)) { + get_task_struct(pos); + goto done; + } + pos = NULL; +done: + rcu_read_unlock(); + put_task_struct(start); + return pos; } /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { - unsigned int tgid_array[PROC_MAXPIDS]; char buf[PROC_NUMBUF]; unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; - unsigned int nr_tgids, i; - int next_tgid; + struct task_struct *task; + int tgid; if (!nr) { ino_t ino = fake_ino(0,PROC_TGID_INO); @@ -2216,63 +2192,116 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) filp->f_pos++; nr++; } + nr -= 1; /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - next_tgid = filp->f_version; + tgid = filp->f_version; filp->f_version = 0; - for (;;) { - nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); - if (!nr_tgids) { - /* no more entries ! */ + for (task = first_tgid(tgid, nr); + task; + task = next_tgid(task), filp->f_pos++) { + int len; + ino_t ino; + tgid = task->pid; + len = snprintf(buf, sizeof(buf), "%d", tgid); + ino = fake_ino(tgid, PROC_TGID_INO); + if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) { + /* returning this tgid failed, save it as the first + * pid for the next readir call */ + filp->f_version = tgid; + put_task_struct(task); break; } - next_tgid = 0; + } + return 0; +} - /* do not use the last found pid, reserve it for next_tgid */ - if (nr_tgids == PROC_MAXPIDS) { - nr_tgids--; - next_tgid = tgid_array[nr_tgids]; - } +/* + * Find the first tid of a thread group to return to user space. + * + * Usually this is just the thread group leader, but if the users + * buffer was too small or there was a seek into the middle of the + * directory we have more work todo. + * + * In the case of a short read we start with find_task_by_pid. + * + * In the case of a seek we start with the leader and walk nr + * threads past it. + */ +static struct task_struct *first_tid(struct task_struct *leader, + int tid, int nr) +{ + struct task_struct *pos; - for (i=0;i<nr_tgids;i++) { - int tgid = tgid_array[i]; - ino_t ino = fake_ino(tgid,PROC_TGID_INO); - unsigned long j = PROC_NUMBUF; + rcu_read_lock(); + /* Attempt to start with the pid of a thread */ + if (tid && (nr > 0)) { + pos = find_task_by_pid(tid); + if (pos && (pos->group_leader == leader)) + goto found; + } - do - buf[--j] = '0' + (tgid % 10); - while ((tgid /= 10) != 0); + /* If nr exceeds the number of threads there is nothing todo */ + pos = NULL; + if (nr && nr >= get_nr_threads(leader)) + goto out; - if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { - /* returning this tgid failed, save it as the first - * pid for the next readir call */ - filp->f_version = tgid_array[i]; - goto out; - } - filp->f_pos++; - nr++; + /* If we haven't found our starting place yet start + * with the leader and walk nr threads forward. + */ + for (pos = leader; nr > 0; --nr) { + pos = next_thread(pos); + if (pos == leader) { + pos = NULL; + goto out; } } +found: + get_task_struct(pos); out: - return 0; + rcu_read_unlock(); + return pos; +} + +/* + * Find the next thread in the thread list. + * Return NULL if there is an error or no next thread. + * + * The reference to the input task_struct is released. + */ +static struct task_struct *next_tid(struct task_struct *start) +{ + struct task_struct *pos = NULL; + rcu_read_lock(); + if (pid_alive(start)) { + pos = next_thread(start); + if (thread_group_leader(pos)) + pos = NULL; + else + get_task_struct(pos); + } + rcu_read_unlock(); + put_task_struct(start); + return pos; } /* for the /proc/TGID/task/ directories */ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) { - unsigned int tid_array[PROC_MAXPIDS]; char buf[PROC_NUMBUF]; - unsigned int nr_tids, i; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; + struct task_struct *leader = get_proc_task(inode); + struct task_struct *task; int retval = -ENOENT; ino_t ino; + int tid; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ - if (!pid_alive(proc_task(inode))) - goto out; + if (!leader) + goto out_no_task; retval = 0; switch (pos) { @@ -2290,24 +2319,45 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi /* fall through */ } - nr_tids = get_tid_list(pos, tid_array, inode); - inode->i_nlink = pos + nr_tids; - - for (i = 0; i < nr_tids; i++) { - unsigned long j = PROC_NUMBUF; - int tid = tid_array[i]; - - ino = fake_ino(tid,PROC_TID_INO); - - do - buf[--j] = '0' + (tid % 10); - while ((tid /= 10) != 0); - - if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) + /* f_version caches the tgid value that the last readdir call couldn't + * return. lseek aka telldir automagically resets f_version to 0. + */ + tid = filp->f_version; + filp->f_version = 0; + for (task = first_tid(leader, tid, pos - 2); + task; + task = next_tid(task), pos++) { + int len; + tid = task->pid; + len = snprintf(buf, sizeof(buf), "%d", tid); + ino = fake_ino(tid, PROC_TID_INO); + if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) { + /* returning this tgid failed, save it as the first + * pid for the next readir call */ + filp->f_version = tid; + put_task_struct(task); break; - pos++; + } } out: filp->f_pos = pos; + put_task_struct(leader); +out_no_task: return retval; } + +static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct task_struct *p = get_proc_task(inode); + generic_fillattr(inode, stat); + + if (p) { + rcu_read_lock(); + stat->nlink += get_nr_threads(p); + rcu_read_unlock(); + put_task_struct(p); + } + + return 0; +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 722b9c46311..6dcef089e18 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -58,14 +58,11 @@ static void de_put(struct proc_dir_entry *de) static void proc_delete_inode(struct inode *inode) { struct proc_dir_entry *de; - struct task_struct *tsk; truncate_inode_pages(&inode->i_data, 0); - /* Let go of any associated process */ - tsk = PROC_I(inode)->task; - if (tsk) - put_task_struct(tsk); + /* Stop tracking associated processes */ + put_pid(PROC_I(inode)->pid); /* Let go of any associated proc directory entry */ de = PROC_I(inode)->pde; @@ -94,8 +91,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; - ei->task = NULL; - ei->type = 0; + ei->pid = NULL; + ei->fd = 0; ei->op.proc_get_link = NULL; ei->pde = NULL; inode = &ei->vfs_inode; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 0502f17b860..146a434ba94 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -37,16 +37,30 @@ extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); +extern struct file_operations proc_maps_operations; +extern struct file_operations proc_numa_maps_operations; +extern struct file_operations proc_smaps_operations; + +extern struct file_operations proc_maps_operations; +extern struct file_operations proc_numa_maps_operations; +extern struct file_operations proc_smaps_operations; + + void free_proc_entry(struct proc_dir_entry *de); int proc_init_inodecache(void); -static inline struct task_struct *proc_task(struct inode *inode) +static inline struct pid *proc_pid(struct inode *inode) +{ + return PROC_I(inode)->pid; +} + +static inline struct task_struct *get_proc_task(struct inode *inode) { - return PROC_I(inode)->task; + return get_pid_task(proc_pid(inode), PIDTYPE_PID); } -static inline int proc_type(struct inode *inode) +static inline int proc_fd(struct inode *inode) { - return PROC_I(inode)->type; + return PROC_I(inode)->fd; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 91b7c15ab37..0137ec4c136 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -75,9 +75,13 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * { struct vm_area_struct * vma; int result = -ENOENT; - struct task_struct *task = proc_task(inode); - struct mm_struct * mm = get_task_mm(task); + struct task_struct *task = get_proc_task(inode); + struct mm_struct * mm = NULL; + if (task) { + mm = get_task_mm(task); + put_task_struct(task); + } if (!mm) goto out; down_read(&mm->mmap_sem); @@ -120,7 +124,8 @@ struct mem_size_stats static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) { - struct task_struct *task = m->private; + struct proc_maps_private *priv = m->private; + struct task_struct *task = priv->task; struct vm_area_struct *vma = v; struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; @@ -295,12 +300,16 @@ static int show_smap(struct seq_file *m, void *v) static void *m_start(struct seq_file *m, loff_t *pos) { - struct task_struct *task = m->private; + struct proc_maps_private *priv = m->private; unsigned long last_addr = m->version; struct mm_struct *mm; - struct vm_area_struct *vma, *tail_vma; + struct vm_area_struct *vma, *tail_vma = NULL; loff_t l = *pos; + /* Clear the per syscall fields in priv */ + priv->task = NULL; + priv->tail_vma = NULL; + /* * We remember last_addr rather than next_addr to hit with * mmap_cache most of the time. We have zero last_addr at @@ -311,11 +320,15 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (last_addr == -1UL) return NULL; - mm = get_task_mm(task); + priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + if (!priv->task) + return NULL; + + mm = get_task_mm(priv->task); if (!mm) return NULL; - tail_vma = get_gate_vma(task); + priv->tail_vma = tail_vma = get_gate_vma(priv->task); down_read(&mm->mmap_sem); /* Start with last addr hint */ @@ -350,11 +363,9 @@ out: return tail_vma; } -static void m_stop(struct seq_file *m, void *v) +static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) { - struct task_struct *task = m->private; - struct vm_area_struct *vma = v; - if (vma && vma != get_gate_vma(task)) { + if (vma && vma != priv->tail_vma) { struct mm_struct *mm = vma->vm_mm; up_read(&mm->mmap_sem); mmput(mm); @@ -363,38 +374,103 @@ static void m_stop(struct seq_file *m, void *v) static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct task_struct *task = m->private; + struct proc_maps_private *priv = m->private; struct vm_area_struct *vma = v; - struct vm_area_struct *tail_vma = get_gate_vma(task); + struct vm_area_struct *tail_vma = priv->tail_vma; (*pos)++; if (vma && (vma != tail_vma) && vma->vm_next) return vma->vm_next; - m_stop(m, v); + vma_stop(priv, vma); return (vma != tail_vma)? tail_vma: NULL; } -struct seq_operations proc_pid_maps_op = { +static void m_stop(struct seq_file *m, void *v) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + + vma_stop(priv, vma); + if (priv->task) + put_task_struct(priv->task); +} + +static struct seq_operations proc_pid_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_map }; -struct seq_operations proc_pid_smaps_op = { +static struct seq_operations proc_pid_smaps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_smap }; +static int do_maps_open(struct inode *inode, struct file *file, + struct seq_operations *ops) +{ + struct proc_maps_private *priv; + int ret = -ENOMEM; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv) { + priv->pid = proc_pid(inode); + ret = seq_open(file, ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = priv; + } else { + kfree(priv); + } + } + return ret; +} + +static int maps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_pid_maps_op); +} + +struct file_operations proc_maps_operations = { + .open = maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + #ifdef CONFIG_NUMA extern int show_numa_map(struct seq_file *m, void *v); -struct seq_operations proc_pid_numa_maps_op = { +static struct seq_operations proc_pid_numa_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_numa_map }; + +static int numa_maps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_pid_numa_maps_op); +} + +struct file_operations proc_numa_maps_operations = { + .open = numa_maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; #endif + +static int smaps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_pid_smaps_op); +} + +struct file_operations proc_smaps_operations = { + .open = smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 8f68827ed10..af69f28277b 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -156,9 +156,28 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos) { return NULL; } -struct seq_operations proc_pid_maps_op = { +static struct seq_operations proc_pid_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = show_map }; + +static int maps_open(struct inode *inode, struct file *file) +{ + int ret; + ret = seq_open(file, &proc_pid_maps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = NULL; + } + return ret; +} + +struct file_operations proc_maps_operations = { + .open = maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index cf6e1cf4035..752cea12e30 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -1560,12 +1560,6 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t return res; } -static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf, - size_t count, loff_t pos) -{ - return generic_file_aio_write(iocb, buf, count, pos); -} - const struct file_operations reiserfs_file_operations = { .read = generic_file_read, .write = reiserfs_file_write, @@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_file_operations = { .fsync = reiserfs_sync_file, .sendfile = generic_file_sendfile, .aio_read = generic_file_aio_read, - .aio_write = reiserfs_aio_write, + .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1b73529b809..49d1a53dbef 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -834,8 +834,7 @@ static int write_ordered_buffers(spinlock_t * lock, get_bh(bh); if (test_set_buffer_locked(bh)) { if (!buffer_dirty(bh)) { - list_del_init(&jh->list); - list_add(&jh->list, &tmp); + list_move(&jh->list, &tmp); goto loop_next; } spin_unlock(lock); @@ -855,8 +854,7 @@ static int write_ordered_buffers(spinlock_t * lock, ret = -EIO; } if (buffer_dirty(bh)) { - list_del_init(&jh->list); - list_add(&jh->list, &tmp); + list_move(&jh->list, &tmp); add_to_chunk(&chunk, bh, lock, write_ordered_chunk); } else { reiserfs_free_jh(bh); diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index c71dd2760d3..c8e96195b96 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -400,8 +400,7 @@ static int smb_request_send_req(struct smb_request *req) if (!(req->rq_flags & SMB_REQ_TRANSMITTED)) goto out; - list_del_init(&req->rq_queue); - list_add_tail(&req->rq_queue, &server->recvq); + list_move_tail(&req->rq_queue, &server->recvq); result = 1; out: return result; @@ -435,8 +434,7 @@ int smb_request_send_server(struct smb_sb_info *server) result = smb_request_send_req(req); if (result < 0) { server->conn_error = result; - list_del_init(&req->rq_queue); - list_add(&req->rq_queue, &server->xmitq); + list_move(&req->rq_queue, &server->xmitq); result = -EIO; goto out; } diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c index 3f71384020c..24577e2c489 100644 --- a/fs/smbfs/smbiod.c +++ b/fs/smbfs/smbiod.c @@ -193,8 +193,7 @@ int smbiod_retry(struct smb_sb_info *server) if (req->rq_flags & SMB_REQ_RETRY) { /* must move the request to the xmitq */ VERBOSE("retrying request %p on recvq\n", req); - list_del(&req->rq_queue); - list_add(&req->rq_queue, &server->xmitq); + list_move(&req->rq_queue, &server->xmitq); continue; } #endif diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 610b5bdbe75..61c42430cba 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -430,10 +430,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) i++; /* fallthrough */ default: - if (filp->f_pos == 2) { - list_del(q); - list_add(q, &parent_sd->s_children); - } + if (filp->f_pos == 2) + list_move(q, &parent_sd->s_children); + for (p=q->next; p!= &parent_sd->s_children; p=p->next) { struct sysfs_dirent *next; const char * name; @@ -455,8 +454,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) dt_type(next)) < 0) return 0; - list_del(q); - list_add(q, p); + list_move(q, p); p = q; filp->f_pos++; } |