summaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c163
-rw-r--r--fs/proc/base.c183
-rw-r--r--fs/proc/generic.c74
-rw-r--r--fs/proc/inode.c61
-rw-r--r--fs/proc/internal.h25
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/proc/nommu.c4
-rw-r--r--fs/proc/proc_misc.c172
-rw-r--r--fs/proc/proc_net.c7
-rw-r--r--fs/proc/proc_sysctl.c6
-rw-r--r--fs/proc/proc_tty.c5
-rw-r--r--fs/proc/root.c1
-rw-r--r--fs/proc/task_mmu.c692
-rw-r--r--fs/proc/task_nommu.c13
-rw-r--r--fs/proc/vmcore.c1
15 files changed, 893 insertions, 517 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index b380313092b..07d6c4853fe 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -77,6 +77,7 @@
#include <linux/cpuset.h>
#include <linux/rcupdate.h>
#include <linux/delayacct.h>
+#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
#include <asm/pgtable.h>
@@ -88,18 +89,21 @@
do { memcpy(buffer, string, strlen(string)); \
buffer += strlen(string); } while (0)
-static inline char *task_name(struct task_struct *p, char *buf)
+static inline void task_name(struct seq_file *m, struct task_struct *p)
{
int i;
+ char *buf, *end;
char *name;
char tcomm[sizeof(p->comm)];
get_task_comm(tcomm, p);
- ADDBUF(buf, "Name:\t");
+ seq_printf(m, "Name:\t");
+ end = m->buf + m->size;
+ buf = m->buf + m->count;
name = tcomm;
i = sizeof(tcomm);
- do {
+ while (i && (buf < end)) {
unsigned char c = *name;
name++;
i--;
@@ -107,20 +111,21 @@ static inline char *task_name(struct task_struct *p, char *buf)
if (!c)
break;
if (c == '\\') {
- buf[1] = c;
- buf += 2;
+ buf++;
+ if (buf < end)
+ *buf++ = c;
continue;
}
if (c == '\n') {
- buf[0] = '\\';
- buf[1] = 'n';
- buf += 2;
+ *buf++ = '\\';
+ if (buf < end)
+ *buf++ = 'n';
continue;
}
buf++;
- } while (i);
- *buf = '\n';
- return buf+1;
+ }
+ m->count = buf - m->buf;
+ seq_printf(m, "\n");
}
/*
@@ -151,21 +156,20 @@ static inline const char *get_task_state(struct task_struct *tsk)
return *p;
}
-static inline char *task_state(struct task_struct *p, char *buffer)
+static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *p)
{
struct group_info *group_info;
int g;
struct fdtable *fdt = NULL;
- struct pid_namespace *ns;
pid_t ppid, tpid;
- ns = current->nsproxy->pid_ns;
rcu_read_lock();
ppid = pid_alive(p) ?
task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
tpid = pid_alive(p) && p->ptrace ?
task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
- buffer += sprintf(buffer,
+ seq_printf(m,
"State:\t%s\n"
"Tgid:\t%d\n"
"Pid:\t%d\n"
@@ -175,7 +179,7 @@ static inline char *task_state(struct task_struct *p, char *buffer)
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
task_tgid_nr_ns(p, ns),
- task_pid_nr_ns(p, ns),
+ pid_nr_ns(pid, ns),
ppid, tpid,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
@@ -183,7 +187,7 @@ static inline char *task_state(struct task_struct *p, char *buffer)
task_lock(p);
if (p->files)
fdt = files_fdtable(p->files);
- buffer += sprintf(buffer,
+ seq_printf(m,
"FDSize:\t%d\n"
"Groups:\t",
fdt ? fdt->max_fds : 0);
@@ -194,20 +198,18 @@ static inline char *task_state(struct task_struct *p, char *buffer)
task_unlock(p);
for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
- buffer += sprintf(buffer, "%d ", GROUP_AT(group_info, g));
+ seq_printf(m, "%d ", GROUP_AT(group_info, g));
put_group_info(group_info);
- buffer += sprintf(buffer, "\n");
- return buffer;
+ seq_printf(m, "\n");
}
-static char *render_sigset_t(const char *header, sigset_t *set, char *buffer)
+static void render_sigset_t(struct seq_file *m, const char *header,
+ sigset_t *set)
{
- int i, len;
+ int i;
- len = strlen(header);
- memcpy(buffer, header, len);
- buffer += len;
+ seq_printf(m, "%s", header);
i = _NSIG;
do {
@@ -218,12 +220,10 @@ static char *render_sigset_t(const char *header, sigset_t *set, char *buffer)
if (sigismember(set, i+2)) x |= 2;
if (sigismember(set, i+3)) x |= 4;
if (sigismember(set, i+4)) x |= 8;
- *buffer++ = (x < 10 ? '0' : 'a' - 10) + x;
+ seq_printf(m, "%x", x);
} while (i >= 4);
- *buffer++ = '\n';
- *buffer = 0;
- return buffer;
+ seq_printf(m, "\n");
}
static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
@@ -241,7 +241,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
}
}
-static inline char *task_sig(struct task_struct *p, char *buffer)
+static inline void task_sig(struct seq_file *m, struct task_struct *p)
{
unsigned long flags;
sigset_t pending, shpending, blocked, ignored, caught;
@@ -268,58 +268,66 @@ static inline char *task_sig(struct task_struct *p, char *buffer)
}
rcu_read_unlock();
- buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
- buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
+ seq_printf(m, "Threads:\t%d\n", num_threads);
+ seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
/* render them all */
- buffer = render_sigset_t("SigPnd:\t", &pending, buffer);
- buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer);
- buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
- buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
- buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
+ render_sigset_t(m, "SigPnd:\t", &pending);
+ render_sigset_t(m, "ShdPnd:\t", &shpending);
+ render_sigset_t(m, "SigBlk:\t", &blocked);
+ render_sigset_t(m, "SigIgn:\t", &ignored);
+ render_sigset_t(m, "SigCgt:\t", &caught);
+}
- return buffer;
+static void render_cap_t(struct seq_file *m, const char *header,
+ kernel_cap_t *a)
+{
+ unsigned __capi;
+
+ seq_printf(m, "%s", header);
+ CAP_FOR_EACH_U32(__capi) {
+ seq_printf(m, "%08x",
+ a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
+ }
+ seq_printf(m, "\n");
}
-static inline char *task_cap(struct task_struct *p, char *buffer)
+static inline void task_cap(struct seq_file *m, struct task_struct *p)
{
- return buffer + sprintf(buffer, "CapInh:\t%016x\n"
- "CapPrm:\t%016x\n"
- "CapEff:\t%016x\n",
- cap_t(p->cap_inheritable),
- cap_t(p->cap_permitted),
- cap_t(p->cap_effective));
+ render_cap_t(m, "CapInh:\t", &p->cap_inheritable);
+ render_cap_t(m, "CapPrm:\t", &p->cap_permitted);
+ render_cap_t(m, "CapEff:\t", &p->cap_effective);
}
-static inline char *task_context_switch_counts(struct task_struct *p,
- char *buffer)
+static inline void task_context_switch_counts(struct seq_file *m,
+ struct task_struct *p)
{
- return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
- "nonvoluntary_ctxt_switches:\t%lu\n",
- p->nvcsw,
- p->nivcsw);
+ seq_printf(m, "voluntary_ctxt_switches:\t%lu\n"
+ "nonvoluntary_ctxt_switches:\t%lu\n",
+ p->nvcsw,
+ p->nivcsw);
}
-int proc_pid_status(struct task_struct *task, char *buffer)
+int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- char *orig = buffer;
struct mm_struct *mm = get_task_mm(task);
- buffer = task_name(task, buffer);
- buffer = task_state(task, buffer);
+ task_name(m, task);
+ task_state(m, ns, pid, task);
if (mm) {
- buffer = task_mem(mm, buffer);
+ task_mem(m, mm);
mmput(mm);
}
- buffer = task_sig(task, buffer);
- buffer = task_cap(task, buffer);
- buffer = cpuset_task_status_allowed(task, buffer);
+ task_sig(m, task);
+ task_cap(m, task);
+ cpuset_task_status_allowed(m, task);
#if defined(CONFIG_S390)
- buffer = task_show_regs(task, buffer);
+ task_show_regs(m, task);
#endif
- buffer = task_context_switch_counts(task, buffer);
- return buffer - orig;
+ task_context_switch_counts(m, task);
+ return 0;
}
/*
@@ -381,14 +389,14 @@ static cputime_t task_gtime(struct task_struct *p)
return p->gtime;
}
-static int do_task_stat(struct task_struct *task, char *buffer, int whole)
+static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task, int whole)
{
unsigned long vsize, eip, esp, wchan = ~0UL;
long priority, nice;
int tty_pgrp = -1, tty_nr = 0;
sigset_t sigign, sigcatch;
char state;
- int res;
pid_t ppid = 0, pgid = -1, sid = -1;
int num_threads = 0;
struct mm_struct *mm;
@@ -400,9 +408,6 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
- struct pid_namespace *ns;
-
- ns = current->nsproxy->pid_ns;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -489,10 +494,10 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
/* convert nsec -> ticks */
start_time = nsec_to_clock_t(start_time);
- res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
+ seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
- task_pid_nr_ns(task, ns),
+ pid_nr_ns(pid, ns),
tcomm,
state,
ppid,
@@ -541,20 +546,23 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
cputime_to_clock_t(cgtime));
if (mm)
mmput(mm);
- return res;
+ return 0;
}
-int proc_tid_stat(struct task_struct *task, char *buffer)
+int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return do_task_stat(task, buffer, 0);
+ return do_task_stat(m, ns, pid, task, 0);
}
-int proc_tgid_stat(struct task_struct *task, char *buffer)
+int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return do_task_stat(task, buffer, 1);
+ return do_task_stat(m, ns, pid, task, 1);
}
-int proc_pid_statm(struct task_struct *task, char *buffer)
+int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
struct mm_struct *mm = get_task_mm(task);
@@ -563,7 +571,8 @@ int proc_pid_statm(struct task_struct *task, char *buffer)
size = task_statm(mm, &shared, &text, &data, &resident);
mmput(mm);
}
+ seq_printf(m, "%d %d %d %d %d %d %d\n",
+ size, resident, shared, text, lib, data, 0);
- return sprintf(buffer, "%d %d %d %d %d %d %d\n",
- size, resident, shared, text, lib, data, 0);
+ return 0;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 33537487f5a..88f8edf1825 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -88,10 +88,6 @@
* in /proc for a task before it execs a suid executable.
*/
-
-/* Worst case buffer size needed for holding an integer. */
-#define PROC_NUMBUF 13
-
struct pid_entry {
char *name;
int len;
@@ -125,6 +121,10 @@ struct pid_entry {
NOD(NAME, (S_IFREG|(MODE)), \
NULL, &proc_info_file_operations, \
{ .proc_read = &proc_##OTYPE } )
+#define ONE(NAME, MODE, OTYPE) \
+ NOD(NAME, (S_IFREG|(MODE)), \
+ NULL, &proc_single_file_operations, \
+ { .proc_show = &proc_##OTYPE } )
int maps_protect;
EXPORT_SYMBOL(maps_protect);
@@ -153,7 +153,7 @@ static int get_nr_threads(struct task_struct *tsk)
return count;
}
-static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+static int proc_cwd_link(struct inode *inode, struct path *path)
{
struct task_struct *task = get_proc_task(inode);
struct fs_struct *fs = NULL;
@@ -165,8 +165,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
}
if (fs) {
read_lock(&fs->lock);
- *mnt = mntget(fs->pwdmnt);
- *dentry = dget(fs->pwd);
+ *path = fs->pwd;
+ path_get(&fs->pwd);
read_unlock(&fs->lock);
result = 0;
put_fs_struct(fs);
@@ -174,7 +174,7 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
return result;
}
-static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+static int proc_root_link(struct inode *inode, struct path *path)
{
struct task_struct *task = get_proc_task(inode);
struct fs_struct *fs = NULL;
@@ -186,8 +186,8 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
}
if (fs) {
read_lock(&fs->lock);
- *mnt = mntget(fs->rootmnt);
- *dentry = dget(fs->root);
+ *path = fs->root;
+ path_get(&fs->root);
read_unlock(&fs->lock);
result = 0;
put_fs_struct(fs);
@@ -506,7 +506,7 @@ static const struct inode_operations proc_def_inode_operations = {
.setattr = proc_setattr,
};
-extern struct seq_operations mounts_op;
+extern const struct seq_operations mounts_op;
struct proc_mounts {
struct seq_file m;
int event;
@@ -585,7 +585,7 @@ static const struct file_operations proc_mounts_operations = {
.poll = mounts_poll,
};
-extern struct seq_operations mountstats_op;
+extern const struct seq_operations mountstats_op;
static int mountstats_open(struct inode *inode, struct file *file)
{
int ret = seq_open(file, &mountstats_op);
@@ -662,6 +662,45 @@ static const struct file_operations proc_info_file_operations = {
.read = proc_info_read,
};
+static int proc_single_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct pid_namespace *ns;
+ struct pid *pid;
+ struct task_struct *task;
+ int ret;
+
+ ns = inode->i_sb->s_fs_info;
+ pid = proc_pid(inode);
+ task = get_pid_task(pid, PIDTYPE_PID);
+ if (!task)
+ return -ESRCH;
+
+ ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
+
+ put_task_struct(task);
+ return ret;
+}
+
+static int proc_single_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+ ret = single_open(filp, proc_single_show, NULL);
+ if (!ret) {
+ struct seq_file *m = filp->private_data;
+
+ m->private = inode;
+ }
+ return ret;
+}
+
+static const struct file_operations proc_single_file_operations = {
+ .open = proc_single_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int mem_open(struct inode* inode, struct file* file)
{
file->private_data = (void*)((long)current->self_exec_id);
@@ -787,7 +826,7 @@ out_no_task:
}
#endif
-static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
+loff_t mem_lseek(struct file *file, loff_t offset, int orig)
{
switch (orig) {
case 0:
@@ -935,42 +974,6 @@ static const struct file_operations proc_oom_adjust_operations = {
.write = oom_adjust_write,
};
-#ifdef CONFIG_MMU
-static ssize_t clear_refs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *task;
- char buffer[PROC_NUMBUF], *end;
- struct mm_struct *mm;
-
- memset(buffer, 0, sizeof(buffer));
- if (count > sizeof(buffer) - 1)
- count = sizeof(buffer) - 1;
- if (copy_from_user(buffer, buf, count))
- return -EFAULT;
- if (!simple_strtol(buffer, &end, 0))
- return -EINVAL;
- if (*end == '\n')
- end++;
- task = get_proc_task(file->f_path.dentry->d_inode);
- if (!task)
- return -ESRCH;
- mm = get_task_mm(task);
- if (mm) {
- clear_refs_smap(mm);
- mmput(mm);
- }
- put_task_struct(task);
- if (end - buffer == 0)
- return -EIO;
- return end - buffer;
-}
-
-static struct file_operations proc_clear_refs_operations = {
- .write = clear_refs_write,
-};
-#endif
-
#ifdef CONFIG_AUDITSYSCALL
#define TMPBUFLEN 21
static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -1161,39 +1164,36 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
int error = -EACCES;
/* We don't need a base pointer in the /proc filesystem */
- path_release(nd);
+ path_put(&nd->path);
/* Are we allowed to snoop on the tasks file descriptors? */
if (!proc_fd_access_allowed(inode))
goto out;
- error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
+ error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
nd->last_type = LAST_BIND;
out:
return ERR_PTR(error);
}
-static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
- char __user *buffer, int buflen)
+static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
{
- struct inode * inode;
char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
- char *path;
+ char *pathname;
int len;
if (!tmp)
return -ENOMEM;
- inode = dentry->d_inode;
- path = d_path(dentry, mnt, tmp, PAGE_SIZE);
- len = PTR_ERR(path);
- if (IS_ERR(path))
+ pathname = d_path(path, tmp, PAGE_SIZE);
+ len = PTR_ERR(pathname);
+ if (IS_ERR(pathname))
goto out;
- len = tmp + PAGE_SIZE - 1 - path;
+ len = tmp + PAGE_SIZE - 1 - pathname;
if (len > buflen)
len = buflen;
- if (copy_to_user(buffer, path, len))
+ if (copy_to_user(buffer, pathname, len))
len = -EFAULT;
out:
free_page((unsigned long)tmp);
@@ -1204,20 +1204,18 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
{
int error = -EACCES;
struct inode *inode = dentry->d_inode;
- struct dentry *de;
- struct vfsmount *mnt = NULL;
+ struct path path;
/* Are we allowed to snoop on the tasks file descriptors? */
if (!proc_fd_access_allowed(inode))
goto out;
- error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
+ error = PROC_I(inode)->op.proc_get_link(inode, &path);
if (error)
goto out;
- error = do_proc_readlink(de, mnt, buffer, buflen);
- dput(de);
- mntput(mnt);
+ error = do_proc_readlink(&path, buffer, buflen);
+ path_put(&path);
out:
return error;
}
@@ -1444,8 +1442,7 @@ out:
#define PROC_FDINFO_MAX 64
-static int proc_fd_info(struct inode *inode, struct dentry **dentry,
- struct vfsmount **mnt, char *info)
+static int proc_fd_info(struct inode *inode, struct path *path, char *info)
{
struct task_struct *task = get_proc_task(inode);
struct files_struct *files = NULL;
@@ -1464,10 +1461,10 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry,
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (file) {
- if (mnt)
- *mnt = mntget(file->f_path.mnt);
- if (dentry)
- *dentry = dget(file->f_path.dentry);
+ if (path) {
+ *path = file->f_path;
+ path_get(&file->f_path);
+ }
if (info)
snprintf(info, PROC_FDINFO_MAX,
"pos:\t%lli\n"
@@ -1484,10 +1481,9 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry,
return -ENOENT;
}
-static int proc_fd_link(struct inode *inode, struct dentry **dentry,
- struct vfsmount **mnt)
+static int proc_fd_link(struct inode *inode, struct path *path)
{
- return proc_fd_info(inode, dentry, mnt, NULL);
+ return proc_fd_info(inode, path, NULL);
}
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -1681,7 +1677,7 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
char tmp[PROC_FDINFO_MAX];
- int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, NULL, tmp);
+ int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
if (!err)
err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
return err;
@@ -2098,15 +2094,23 @@ static const struct file_operations proc_coredump_filter_operations = {
static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
int buflen)
{
+ struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ pid_t tgid = task_tgid_nr_ns(current, ns);
char tmp[PROC_NUMBUF];
- sprintf(tmp, "%d", task_tgid_vnr(current));
+ if (!tgid)
+ return -ENOENT;
+ sprintf(tmp, "%d", tgid);
return vfs_readlink(dentry,buffer,buflen,tmp);
}
static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
+ struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ pid_t tgid = task_tgid_nr_ns(current, ns);
char tmp[PROC_NUMBUF];
- sprintf(tmp, "%d", task_tgid_vnr(current));
+ if (!tgid)
+ return ERR_PTR(-ENOENT);
+ sprintf(tmp, "%d", task_tgid_nr_ns(current, ns));
return ERR_PTR(vfs_follow_link(nd,tmp));
}
@@ -2271,14 +2275,14 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
REG("environ", S_IRUSR, environ),
INF("auxv", S_IRUSR, pid_auxv),
- INF("status", S_IRUGO, pid_status),
+ ONE("status", S_IRUGO, pid_status),
INF("limits", S_IRUSR, pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, pid_sched),
#endif
INF("cmdline", S_IRUGO, pid_cmdline),
- INF("stat", S_IRUGO, tgid_stat),
- INF("statm", S_IRUGO, pid_statm),
+ ONE("stat", S_IRUGO, tgid_stat),
+ ONE("statm", S_IRUGO, pid_statm),
REG("maps", S_IRUGO, maps),
#ifdef CONFIG_NUMA
REG("numa_maps", S_IRUGO, numa_maps),
@@ -2289,9 +2293,10 @@ static const struct pid_entry tgid_base_stuff[] = {
LNK("exe", exe),
REG("mounts", S_IRUGO, mounts),
REG("mountstats", S_IRUSR, mountstats),
-#ifdef CONFIG_MMU
+#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
+ REG("pagemap", S_IRUSR, pagemap),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
@@ -2360,7 +2365,8 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
name.len = snprintf(buf, sizeof(buf), "%d", pid);
dentry = d_hash_and_lookup(mnt->mnt_root, &name);
if (dentry) {
- shrink_dcache_parent(dentry);
+ if (!(current->flags & PF_EXITING))
+ shrink_dcache_parent(dentry);
d_drop(dentry);
dput(dentry);
}
@@ -2600,14 +2606,14 @@ static const struct pid_entry tid_base_stuff[] = {
DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
REG("environ", S_IRUSR, environ),
INF("auxv", S_IRUSR, pid_auxv),
- INF("status", S_IRUGO, pid_status),
+ ONE("status", S_IRUGO, pid_status),
INF("limits", S_IRUSR, pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, pid_sched),
#endif
INF("cmdline", S_IRUGO, pid_cmdline),
- INF("stat", S_IRUGO, tid_stat),
- INF("statm", S_IRUGO, pid_statm),
+ ONE("stat", S_IRUGO, tid_stat),
+ ONE("statm", S_IRUGO, pid_statm),
REG("maps", S_IRUGO, maps),
#ifdef CONFIG_NUMA
REG("numa_maps", S_IRUGO, numa_maps),
@@ -2617,9 +2623,10 @@ static const struct pid_entry tid_base_stuff[] = {
LNK("root", root),
LNK("exe", exe),
REG("mounts", S_IRUGO, mounts),
-#ifdef CONFIG_MMU
+#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
+ REG("pagemap", S_IRUSR, pagemap),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 6a2fe5187b6..68971e66cd4 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -25,12 +25,6 @@
#include "internal.h"
-static ssize_t proc_file_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos);
-static ssize_t proc_file_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos);
-static loff_t proc_file_lseek(struct file *, loff_t, int);
-
DEFINE_SPINLOCK(proc_subdir_lock);
static int proc_match(int len, const char *name, struct proc_dir_entry *de)
@@ -40,12 +34,6 @@ static int proc_match(int len, const char *name, struct proc_dir_entry *de)
return !memcmp(name, de->name, len);
}
-static const struct file_operations proc_file_operations = {
- .llseek = proc_file_lseek,
- .read = proc_file_read,
- .write = proc_file_write,
-};
-
/* buffer size is one page but our output routines use some slack for overruns */
#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
@@ -233,6 +221,12 @@ proc_file_lseek(struct file *file, loff_t offset, int orig)
return retval;
}
+static const struct file_operations proc_file_operations = {
+ .llseek = proc_file_lseek,
+ .read = proc_file_read,
+ .write = proc_file_write,
+};
+
static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = dentry->d_inode;
@@ -406,12 +400,12 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
spin_unlock(&proc_subdir_lock);
error = -EINVAL;
inode = proc_get_inode(dir->i_sb, ino, de);
- spin_lock(&proc_subdir_lock);
- break;
+ goto out_unlock;
}
}
}
spin_unlock(&proc_subdir_lock);
+out_unlock:
unlock_kernel();
if (inode) {
@@ -527,6 +521,7 @@ static const struct inode_operations proc_dir_inode_operations = {
static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
{
unsigned int i;
+ struct proc_dir_entry *tmp;
i = get_inode_number();
if (i == 0)
@@ -550,6 +545,15 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
}
spin_lock(&proc_subdir_lock);
+
+ for (tmp = dir->subdir; tmp; tmp = tmp->next)
+ if (strcmp(tmp->name, dp->name) == 0) {
+ printk(KERN_WARNING "proc_dir_entry '%s' already "
+ "registered\n", dp->name);
+ dump_stack();
+ break;
+ }
+
dp->next = dir->subdir;
dp->parent = dir;
dir->subdir = dp;
@@ -558,7 +562,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
return 0;
}
-static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent,
+static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
const char *name,
mode_t mode,
nlink_t nlink)
@@ -601,7 +605,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
{
struct proc_dir_entry *ent;
- ent = proc_create(&parent,name,
+ ent = __proc_create(&parent, name,
(S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
if (ent) {
@@ -626,7 +630,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
{
struct proc_dir_entry *ent;
- ent = proc_create(&parent, name, S_IFDIR | mode, 2);
+ ent = __proc_create(&parent, name, S_IFDIR | mode, 2);
if (ent) {
if (proc_register(parent, ent) < 0) {
kfree(ent);
@@ -660,7 +664,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
nlink = 1;
}
- ent = proc_create(&parent,name,mode,nlink);
+ ent = __proc_create(&parent, name, mode, nlink);
if (ent) {
if (proc_register(parent, ent) < 0) {
kfree(ent);
@@ -670,6 +674,38 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
return ent;
}
+struct proc_dir_entry *proc_create(const char *name, mode_t mode,
+ struct proc_dir_entry *parent,
+ const struct file_operations *proc_fops)
+{
+ struct proc_dir_entry *pde;
+ nlink_t nlink;
+
+ if (S_ISDIR(mode)) {
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO | S_IXUGO;
+ nlink = 2;
+ } else {
+ if ((mode & S_IFMT) == 0)
+ mode |= S_IFREG;
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO;
+ nlink = 1;
+ }
+
+ pde = __proc_create(&parent, name, mode, nlink);
+ if (!pde)
+ goto out;
+ pde->proc_fops = proc_fops;
+ if (proc_register(parent, pde) < 0)
+ goto out_free;
+ return pde;
+out_free:
+ kfree(pde);
+out:
+ return NULL;
+}
+
void free_proc_entry(struct proc_dir_entry *de)
{
unsigned int ino = de->low_ino;
@@ -679,7 +715,7 @@ void free_proc_entry(struct proc_dir_entry *de)
release_inode_number(ino);
- if (S_ISLNK(de->mode) && de->data)
+ if (S_ISLNK(de->mode))
kfree(de->data);
kfree(de);
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 1a551d92e1d..82b3a1b5a70 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -73,11 +73,6 @@ static void proc_delete_inode(struct inode *inode)
struct vfsmount *proc_mnt;
-static void proc_read_inode(struct inode * inode)
-{
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-}
-
static struct kmem_cache * proc_inode_cachep;
static struct inode *proc_alloc_inode(struct super_block *sb)
@@ -128,7 +123,6 @@ static int proc_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode,
- .read_inode = proc_read_inode,
.drop_inode = generic_delete_inode,
.delete_inode = proc_delete_inode,
.statfs = simple_statfs,
@@ -401,39 +395,41 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
if (de != NULL && !try_module_get(de->owner))
goto out_mod;
- inode = iget(sb, ino);
+ inode = iget_locked(sb, ino);
if (!inode)
goto out_ino;
-
- PROC_I(inode)->fd = 0;
- PROC_I(inode)->pde = de;
- if (de) {
- if (de->mode) {
- inode->i_mode = de->mode;
- inode->i_uid = de->uid;
- inode->i_gid = de->gid;
- }
- if (de->size)
- inode->i_size = de->size;
- if (de->nlink)
- inode->i_nlink = de->nlink;
- if (de->proc_iops)
- inode->i_op = de->proc_iops;
- if (de->proc_fops) {
- if (S_ISREG(inode->i_mode)) {
+ if (inode->i_state & I_NEW) {
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ PROC_I(inode)->fd = 0;
+ PROC_I(inode)->pde = de;
+ if (de) {
+ if (de->mode) {
+ inode->i_mode = de->mode;
+ inode->i_uid = de->uid;
+ inode->i_gid = de->gid;
+ }
+ if (de->size)
+ inode->i_size = de->size;
+ if (de->nlink)
+ inode->i_nlink = de->nlink;
+ if (de->proc_iops)
+ inode->i_op = de->proc_iops;
+ if (de->proc_fops) {
+ if (S_ISREG(inode->i_mode)) {
#ifdef CONFIG_COMPAT
- if (!de->proc_fops->compat_ioctl)
- inode->i_fop =
- &proc_reg_file_ops_no_compat;
- else
+ if (!de->proc_fops->compat_ioctl)
+ inode->i_fop =
+ &proc_reg_file_ops_no_compat;
+ else
#endif
- inode->i_fop = &proc_reg_file_ops;
+ inode->i_fop = &proc_reg_file_ops;
+ } else {
+ inode->i_fop = de->proc_fops;
+ }
}
- else
- inode->i_fop = de->proc_fops;
}
+ unlock_new_inode(inode);
}
-
return inode;
out_ino:
@@ -471,4 +467,3 @@ out_no_root:
de_put(&proc_root);
return -ENOMEM;
}
-MODULE_LICENSE("GPL");
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 05b3e900626..1c81c8f1aee 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -46,21 +46,24 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
extern int maps_protect;
-extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
-extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **);
-extern int proc_tid_stat(struct task_struct *, char *);
-extern int proc_tgid_stat(struct task_struct *, char *);
-extern int proc_pid_status(struct task_struct *, char *);
-extern int proc_pid_statm(struct task_struct *, char *);
+extern void create_seq_entry(char *name, mode_t mode,
+ const struct file_operations *f);
+extern int proc_exe_link(struct inode *, struct path *);
+extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task);
+extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task);
+extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task);
+extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task);
+extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
extern const struct file_operations proc_maps_operations;
extern const struct file_operations proc_numa_maps_operations;
extern const struct file_operations proc_smaps_operations;
-
-extern const struct file_operations proc_maps_operations;
-extern const struct file_operations proc_numa_maps_operations;
-extern const struct file_operations proc_smaps_operations;
-
+extern const struct file_operations proc_clear_refs_operations;
+extern const struct file_operations proc_pagemap_operations;
void free_proc_entry(struct proc_dir_entry *de);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 1be73082edd..e78c81fcf54 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -12,7 +12,6 @@
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/user.h>
-#include <linux/a.out.h>
#include <linux/capability.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
@@ -325,7 +324,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (m == NULL) {
if (clear_user(buffer, tsz))
return -EFAULT;
- } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) {
+ } else if (is_vmalloc_addr((void *)start)) {
char * elf_buf;
struct vm_struct *m;
unsigned long curstart = start;
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 22f789de390..941e95114b5 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -67,7 +67,7 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
if (len < 1)
len = 1;
seq_printf(m, "%*c", len, ' ');
- seq_path(m, file->f_path.mnt, file->f_path.dentry, "");
+ seq_path(m, &file->f_path, "");
}
seq_putc(m, '\n');
@@ -116,7 +116,7 @@ static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos)
return rb_next((struct rb_node *) v);
}
-static struct seq_operations proc_nommu_vma_list_seqop = {
+static const struct seq_operations proc_nommu_vma_list_seqop = {
.start = nommu_vma_list_start,
.next = nommu_vma_list_next,
.stop = nommu_vma_list_stop,
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 3462bfde89f..468805d40e2 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -29,6 +29,7 @@
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/pagemap.h>
+#include <linux/interrupt.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/smp.h>
@@ -46,6 +47,7 @@
#include <linux/vmalloc.h>
#include <linux/crash_dump.h>
#include <linux/pid_namespace.h>
+#include <linux/bootmem.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -63,7 +65,6 @@
*/
extern int get_hardware_list(char *);
extern int get_stram_list(char *);
-extern int get_filesystem_list(char *);
extern int get_exec_domain_list(char *);
extern int get_dma_list(char *);
@@ -83,10 +84,15 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
{
int a, b, c;
int len;
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ a = avenrun[0] + (FIXED_1/200);
+ b = avenrun[1] + (FIXED_1/200);
+ c = avenrun[2] + (FIXED_1/200);
+ } while (read_seqretry(&xtime_lock, seq));
- a = avenrun[0] + (FIXED_1/200);
- b = avenrun[1] + (FIXED_1/200);
- c = avenrun[2] + (FIXED_1/200);
len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
LOAD_INT(a), LOAD_FRAC(a),
LOAD_INT(b), LOAD_FRAC(b),
@@ -216,7 +222,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
#undef K
}
-extern struct seq_operations fragmentation_op;
+extern const struct seq_operations fragmentation_op;
static int fragmentation_open(struct inode *inode, struct file *file)
{
(void)inode;
@@ -230,7 +236,7 @@ static const struct file_operations fragmentation_file_operations = {
.release = seq_release,
};
-extern struct seq_operations pagetypeinfo_op;
+extern const struct seq_operations pagetypeinfo_op;
static int pagetypeinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &pagetypeinfo_op);
@@ -243,7 +249,7 @@ static const struct file_operations pagetypeinfo_file_ops = {
.release = seq_release,
};
-extern struct seq_operations zoneinfo_op;
+extern const struct seq_operations zoneinfo_op;
static int zoneinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &zoneinfo_op);
@@ -268,7 +274,7 @@ static int version_read_proc(char *page, char **start, off_t off,
return proc_calc_metrics(page, start, off, count, eof, len);
}
-extern struct seq_operations cpuinfo_op;
+extern const struct seq_operations cpuinfo_op;
static int cpuinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &cpuinfo_op);
@@ -321,7 +327,7 @@ static void devinfo_stop(struct seq_file *f, void *v)
/* Nothing to do */
}
-static struct seq_operations devinfo_ops = {
+static const struct seq_operations devinfo_ops = {
.start = devinfo_start,
.next = devinfo_next,
.stop = devinfo_stop,
@@ -340,7 +346,7 @@ static const struct file_operations proc_devinfo_operations = {
.release = seq_release,
};
-extern struct seq_operations vmstat_op;
+extern const struct seq_operations vmstat_op;
static int vmstat_open(struct inode *inode, struct file *file)
{
return seq_open(file, &vmstat_op);
@@ -371,7 +377,7 @@ static int stram_read_proc(char *page, char **start, off_t off,
#endif
#ifdef CONFIG_BLOCK
-extern struct seq_operations partitions_op;
+extern const struct seq_operations partitions_op;
static int partitions_open(struct inode *inode, struct file *file)
{
return seq_open(file, &partitions_op);
@@ -383,7 +389,7 @@ static const struct file_operations proc_partitions_operations = {
.release = seq_release,
};
-extern struct seq_operations diskstats_op;
+extern const struct seq_operations diskstats_op;
static int diskstats_open(struct inode *inode, struct file *file)
{
return seq_open(file, &diskstats_op);
@@ -397,7 +403,7 @@ static const struct file_operations proc_diskstats_operations = {
#endif
#ifdef CONFIG_MODULES
-extern struct seq_operations modules_op;
+extern const struct seq_operations modules_op;
static int modules_open(struct inode *inode, struct file *file)
{
return seq_open(file, &modules_op);
@@ -424,7 +430,7 @@ static const struct file_operations proc_slabinfo_operations = {
};
#ifdef CONFIG_DEBUG_SLAB_LEAK
-extern struct seq_operations slabstats_op;
+extern const struct seq_operations slabstats_op;
static int slabstats_open(struct inode *inode, struct file *file)
{
unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
@@ -598,8 +604,7 @@ static void int_seq_stop(struct seq_file *f, void *v)
}
-extern int show_interrupts(struct seq_file *f, void *v); /* In arch code */
-static struct seq_operations int_seq_ops = {
+static const struct seq_operations int_seq_ops = {
.start = int_seq_start,
.next = int_seq_next,
.stop = int_seq_stop,
@@ -675,6 +680,137 @@ static const struct file_operations proc_sysrq_trigger_operations = {
};
#endif
+#ifdef CONFIG_PROC_PAGE_MONITOR
+#define KPMSIZE sizeof(u64)
+#define KPMMASK (KPMSIZE - 1)
+/* /proc/kpagecount - an array exposing page counts
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page count.
+ */
+static ssize_t kpagecount_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 __user *out = (u64 __user *)buf;
+ struct page *ppage;
+ unsigned long src = *ppos;
+ unsigned long pfn;
+ ssize_t ret = 0;
+ u64 pcount;
+
+ pfn = src / KPMSIZE;
+ count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
+ if (src & KPMMASK || count & KPMMASK)
+ return -EIO;
+
+ while (count > 0) {
+ ppage = NULL;
+ if (pfn_valid(pfn))
+ ppage = pfn_to_page(pfn);
+ pfn++;
+ if (!ppage)
+ pcount = 0;
+ else
+ pcount = atomic_read(&ppage->_count);
+
+ if (put_user(pcount, out++)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ count -= KPMSIZE;
+ }
+
+ *ppos += (char __user *)out - buf;
+ if (!ret)
+ ret = (char __user *)out - buf;
+ return ret;
+}
+
+static struct file_operations proc_kpagecount_operations = {
+ .llseek = mem_lseek,
+ .read = kpagecount_read,
+};
+
+/* /proc/kpageflags - an array exposing page flags
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page flags.
+ */
+
+/* These macros are used to decouple internal flags from exported ones */
+
+#define KPF_LOCKED 0
+#define KPF_ERROR 1
+#define KPF_REFERENCED 2
+#define KPF_UPTODATE 3
+#define KPF_DIRTY 4
+#define KPF_LRU 5
+#define KPF_ACTIVE 6
+#define KPF_SLAB 7
+#define KPF_WRITEBACK 8
+#define KPF_RECLAIM 9
+#define KPF_BUDDY 10
+
+#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos)
+
+static ssize_t kpageflags_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 __user *out = (u64 __user *)buf;
+ struct page *ppage;
+ unsigned long src = *ppos;
+ unsigned long pfn;
+ ssize_t ret = 0;
+ u64 kflags, uflags;
+
+ pfn = src / KPMSIZE;
+ count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
+ if (src & KPMMASK || count & KPMMASK)
+ return -EIO;
+
+ while (count > 0) {
+ ppage = NULL;
+ if (pfn_valid(pfn))
+ ppage = pfn_to_page(pfn);
+ pfn++;
+ if (!ppage)
+ kflags = 0;
+ else
+ kflags = ppage->flags;
+
+ uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) |
+ kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
+ kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
+ kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |
+ kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) |
+ kpf_copy_bit(kflags, KPF_LRU, PG_lru) |
+ kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) |
+ kpf_copy_bit(kflags, KPF_SLAB, PG_slab) |
+ kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) |
+ kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) |
+ kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy);
+
+ if (put_user(uflags, out++)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ count -= KPMSIZE;
+ }
+
+ *ppos += (char __user *)out - buf;
+ if (!ret)
+ ret = (char __user *)out - buf;
+ return ret;
+}
+
+static struct file_operations proc_kpageflags_operations = {
+ .llseek = mem_lseek,
+ .read = kpageflags_read,
+};
+#endif /* CONFIG_PROC_PAGE_MONITOR */
+
struct proc_dir_entry *proc_root_kcore;
void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
@@ -755,6 +891,10 @@ void __init proc_misc_init(void)
(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
}
#endif
+#ifdef CONFIG_PROC_PAGE_MONITOR
+ create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations);
+ create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations);
+#endif
#ifdef CONFIG_PROC_VMCORE
proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
if (proc_vmcore)
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4823c9677fa..14e9b5aaf86 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -67,12 +67,7 @@ EXPORT_SYMBOL_GPL(seq_release_net);
struct proc_dir_entry *proc_net_fops_create(struct net *net,
const char *name, mode_t mode, const struct file_operations *fops)
{
- struct proc_dir_entry *res;
-
- res = create_proc_entry(name, mode, net->proc_net);
- if (res)
- res->proc_fops = fops;
- return res;
+ return proc_create(name, mode, net->proc_net, fops);
}
EXPORT_SYMBOL_GPL(proc_net_fops_create);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 4e57fcf8598..614c34b6d1c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -9,7 +9,7 @@
static struct dentry_operations proc_sys_dentry_operations;
static const struct file_operations proc_sys_file_operations;
-static struct inode_operations proc_sys_inode_operations;
+static const struct inode_operations proc_sys_inode_operations;
static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
{
@@ -407,7 +407,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
if (!nd || !depth)
goto out;
- dentry = nd->dentry;
+ dentry = nd->path.dentry;
table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
/* If the entry does not exist deny permission */
@@ -446,7 +446,7 @@ static const struct file_operations proc_sys_file_operations = {
.readdir = proc_sys_readdir,
};
-static struct inode_operations proc_sys_inode_operations = {
+static const struct inode_operations proc_sys_inode_operations = {
.lookup = proc_sys_lookup,
.permission = proc_sys_permission,
.setattr = proc_sys_setattr,
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 22846225acf..49816e00b51 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -15,9 +15,6 @@
#include <linux/seq_file.h>
#include <linux/bitops.h>
-static int tty_ldiscs_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data);
-
/*
* The /proc/tty directory inodes...
*/
@@ -120,7 +117,7 @@ static void t_stop(struct seq_file *m, void *v)
mutex_unlock(&tty_mutex);
}
-static struct seq_operations tty_drivers_op = {
+static const struct seq_operations tty_drivers_op = {
.start = t_start,
.next = t_next,
.stop = t_stop,
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 81f99e691f9..ef0fb57fc9e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -232,6 +232,7 @@ void pid_ns_release_proc(struct pid_namespace *ns)
EXPORT_SYMBOL(proc_symlink);
EXPORT_SYMBOL(proc_mkdir);
EXPORT_SYMBOL(create_proc_entry);
+EXPORT_SYMBOL(proc_create);
EXPORT_SYMBOL(remove_proc_entry);
EXPORT_SYMBOL(proc_root);
EXPORT_SYMBOL(proc_root_fs);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8043a3eab52..49958cffbd8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -5,14 +5,18 @@
#include <linux/highmem.h>
#include <linux/ptrace.h>
#include <linux/pagemap.h>
+#include <linux/ptrace.h>
#include <linux/mempolicy.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/seq_file.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include "internal.h"
-char *task_mem(struct mm_struct *mm, char *buffer)
+void task_mem(struct seq_file *m, struct mm_struct *mm)
{
unsigned long data, text, lib;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
@@ -34,7 +38,7 @@ char *task_mem(struct mm_struct *mm, char *buffer)
data = mm->total_vm - mm->shared_vm - mm->stack_vm;
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
- buffer += sprintf(buffer,
+ seq_printf(m,
"VmPeak:\t%8lu kB\n"
"VmSize:\t%8lu kB\n"
"VmLck:\t%8lu kB\n"
@@ -53,7 +57,6 @@ char *task_mem(struct mm_struct *mm, char *buffer)
data << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
- return buffer;
}
unsigned long task_vsize(struct mm_struct *mm)
@@ -72,7 +75,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
return mm->total_vm;
}
-int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+int proc_exe_link(struct inode *inode, struct path *path)
{
struct vm_area_struct * vma;
int result = -ENOENT;
@@ -95,8 +98,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
}
if (vma) {
- *mnt = mntget(vma->vm_file->f_path.mnt);
- *dentry = dget(vma->vm_file->f_path.dentry);
+ *path = vma->vm_file->f_path;
+ path_get(&vma->vm_file->f_path);
result = 0;
}
@@ -114,24 +117,124 @@ static void pad_len_spaces(struct seq_file *m, int len)
seq_printf(m, "%*c", len, ' ');
}
-struct mem_size_stats
+static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
{
- unsigned long resident;
- unsigned long shared_clean;
- unsigned long shared_dirty;
- unsigned long private_clean;
- unsigned long private_dirty;
- unsigned long referenced;
-};
+ if (vma && vma != priv->tail_vma) {
+ struct mm_struct *mm = vma->vm_mm;
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ }
+}
-struct pmd_walker {
- struct vm_area_struct *vma;
- void *private;
- void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
- unsigned long, void *);
-};
+static void *m_start(struct seq_file *m, loff_t *pos)
+{
+ struct proc_maps_private *priv = m->private;
+ unsigned long last_addr = m->version;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma, *tail_vma = NULL;
+ loff_t l = *pos;
+
+ /* Clear the per syscall fields in priv */
+ priv->task = NULL;
+ priv->tail_vma = NULL;
+
+ /*
+ * We remember last_addr rather than next_addr to hit with
+ * mmap_cache most of the time. We have zero last_addr at
+ * the beginning and also after lseek. We will have -1 last_addr
+ * after the end of the vmas.
+ */
+
+ if (last_addr == -1UL)
+ return NULL;
+
+ priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
+ if (!priv->task)
+ return NULL;
+
+ mm = mm_for_maps(priv->task);
+ if (!mm)
+ return NULL;
+
+ tail_vma = get_gate_vma(priv->task);
+ priv->tail_vma = tail_vma;
+
+ /* Start with last addr hint */
+ vma = find_vma(mm, last_addr);
+ if (last_addr && vma) {
+ vma = vma->vm_next;
+ goto out;
+ }
+
+ /*
+ * Check the vma index is within the range and do
+ * sequential scan until m_index.
+ */
+ vma = NULL;
+ if ((unsigned long)l < mm->map_count) {
+ vma = mm->mmap;
+ while (l-- && vma)
+ vma = vma->vm_next;
+ goto out;
+ }
+
+ if (l != mm->map_count)
+ tail_vma = NULL; /* After gate vma */
+
+out:
+ if (vma)
+ return vma;
+
+ /* End of vmas has been reached */
+ m->version = (tail_vma != NULL)? 0: -1UL;
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ return tail_vma;
+}
+
+static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct proc_maps_private *priv = m->private;
+ struct vm_area_struct *vma = v;
+ struct vm_area_struct *tail_vma = priv->tail_vma;
+
+ (*pos)++;
+ if (vma && (vma != tail_vma) && vma->vm_next)
+ return vma->vm_next;
+ vma_stop(priv, vma);
+ return (vma != tail_vma)? tail_vma: NULL;
+}
-static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
+static void m_stop(struct seq_file *m, void *v)
+{
+ struct proc_maps_private *priv = m->private;
+ struct vm_area_struct *vma = v;
+
+ vma_stop(priv, vma);
+ if (priv->task)
+ put_task_struct(priv->task);
+}
+
+static int do_maps_open(struct inode *inode, struct file *file,
+ const struct seq_operations *ops)
+{
+ struct proc_maps_private *priv;
+ int ret = -ENOMEM;
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (priv) {
+ priv->pid = proc_pid(inode);
+ ret = seq_open(file, ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = priv;
+ } else {
+ kfree(priv);
+ }
+ }
+ return ret;
+}
+
+static int show_map(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
struct task_struct *task = priv->task;
@@ -168,7 +271,7 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
*/
if (file) {
pad_len_spaces(m, len);
- seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n");
+ seq_path(m, &file->f_path, "\n");
} else {
const char *name = arch_vma_name(vma);
if (!name) {
@@ -191,41 +294,71 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
}
seq_putc(m, '\n');
- if (mss)
- seq_printf(m,
- "Size: %8lu kB\n"
- "Rss: %8lu kB\n"
- "Shared_Clean: %8lu kB\n"
- "Shared_Dirty: %8lu kB\n"
- "Private_Clean: %8lu kB\n"
- "Private_Dirty: %8lu kB\n"
- "Referenced: %8lu kB\n",
- (vma->vm_end - vma->vm_start) >> 10,
- mss->resident >> 10,
- mss->shared_clean >> 10,
- mss->shared_dirty >> 10,
- mss->private_clean >> 10,
- mss->private_dirty >> 10,
- mss->referenced >> 10);
-
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
return 0;
}
-static int show_map(struct seq_file *m, void *v)
+static const struct seq_operations proc_pid_maps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_map
+};
+
+static int maps_open(struct inode *inode, struct file *file)
{
- return show_map_internal(m, v, NULL);
+ return do_maps_open(inode, file, &proc_pid_maps_op);
}
-static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- void *private)
+const struct file_operations proc_maps_operations = {
+ .open = maps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/*
+ * Proportional Set Size(PSS): my share of RSS.
+ *
+ * PSS of a process is the count of pages it has in memory, where each
+ * page is divided by the number of processes sharing it. So if a
+ * process has 1000 pages all to itself, and 1000 shared with one other
+ * process, its PSS will be 1500.
+ *
+ * To keep (accumulated) division errors low, we adopt a 64bit
+ * fixed-point pss counter to minimize division errors. So (pss >>
+ * PSS_SHIFT) would be the real byte count.
+ *
+ * A shift of 12 before division means (assuming 4K page size):
+ * - 1M 3-user-pages add up to 8KB errors;
+ * - supports mapcount up to 2^24, or 16M;
+ * - supports PSS up to 2^52 bytes, or 4PB.
+ */
+#define PSS_SHIFT 12
+
+#ifdef CONFIG_PROC_PAGE_MONITOR
+struct mem_size_stats
+{
+ struct vm_area_struct *vma;
+ unsigned long resident;
+ unsigned long shared_clean;
+ unsigned long shared_dirty;
+ unsigned long private_clean;
+ unsigned long private_dirty;
+ unsigned long referenced;
+ u64 pss;
+};
+
+static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ void *private)
{
struct mem_size_stats *mss = private;
+ struct vm_area_struct *vma = mss->vma;
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
+ int mapcount;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -242,26 +375,88 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
/* Accumulate the size in pages that have been accessed. */
if (pte_young(ptent) || PageReferenced(page))
mss->referenced += PAGE_SIZE;
- if (page_mapcount(page) >= 2) {
+ mapcount = page_mapcount(page);
+ if (mapcount >= 2) {
if (pte_dirty(ptent))
mss->shared_dirty += PAGE_SIZE;
else
mss->shared_clean += PAGE_SIZE;
+ mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
} else {
if (pte_dirty(ptent))
mss->private_dirty += PAGE_SIZE;
else
mss->private_clean += PAGE_SIZE;
+ mss->pss += (PAGE_SIZE << PSS_SHIFT);
}
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
+ return 0;
+}
+
+static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
+
+static int show_smap(struct seq_file *m, void *v)
+{
+ struct vm_area_struct *vma = v;
+ struct mem_size_stats mss;
+ int ret;
+
+ memset(&mss, 0, sizeof mss);
+ mss.vma = vma;
+ if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+ walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
+ &smaps_walk, &mss);
+
+ ret = show_map(m, v);
+ if (ret)
+ return ret;
+
+ seq_printf(m,
+ "Size: %8lu kB\n"
+ "Rss: %8lu kB\n"
+ "Pss: %8lu kB\n"
+ "Shared_Clean: %8lu kB\n"
+ "Shared_Dirty: %8lu kB\n"
+ "Private_Clean: %8lu kB\n"
+ "Private_Dirty: %8lu kB\n"
+ "Referenced: %8lu kB\n",
+ (vma->vm_end - vma->vm_start) >> 10,
+ mss.resident >> 10,
+ (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
+ mss.shared_clean >> 10,
+ mss.shared_dirty >> 10,
+ mss.private_clean >> 10,
+ mss.private_dirty >> 10,
+ mss.referenced >> 10);
+
+ return ret;
+}
+
+static const struct seq_operations proc_pid_smaps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_smap
+};
+
+static int smaps_open(struct inode *inode, struct file *file)
+{
+ return do_maps_open(inode, file, &proc_pid_smaps_op);
}
-static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- void *private)
+const struct file_operations proc_smaps_operations = {
+ .open = smaps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, void *private)
{
+ struct vm_area_struct *vma = private;
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
@@ -282,235 +477,248 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
+ return 0;
}
-static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
- unsigned long addr, unsigned long end)
+static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
+
+static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
{
- pmd_t *pmd;
- unsigned long next;
+ struct task_struct *task;
+ char buffer[PROC_NUMBUF], *end;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
- for (pmd = pmd_offset(pud, addr); addr != end;
- pmd++, addr = next) {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- walker->action(walker->vma, pmd, addr, next, walker->private);
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+ if (!simple_strtol(buffer, &end, 0))
+ return -EINVAL;
+ if (*end == '\n')
+ end++;
+ task = get_proc_task(file->f_path.dentry->d_inode);
+ if (!task)
+ return -ESRCH;
+ mm = get_task_mm(task);
+ if (mm) {
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ if (!is_vm_hugetlb_page(vma))
+ walk_page_range(mm, vma->vm_start, vma->vm_end,
+ &clear_refs_walk, vma);
+ flush_tlb_mm(mm);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
}
+ put_task_struct(task);
+ if (end - buffer == 0)
+ return -EIO;
+ return end - buffer;
}
-static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
- unsigned long addr, unsigned long end)
-{
- pud_t *pud;
- unsigned long next;
+const struct file_operations proc_clear_refs_operations = {
+ .write = clear_refs_write,
+};
- for (pud = pud_offset(pgd, addr); addr != end;
- pud++, addr = next) {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- walk_pmd_range(walker, pud, addr, next);
+struct pagemapread {
+ char __user *out, *end;
+};
+
+#define PM_ENTRY_BYTES sizeof(u64)
+#define PM_RESERVED_BITS 3
+#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS)
+#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
+#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
+#define PM_NOT_PRESENT PM_SPECIAL(1LL)
+#define PM_SWAP PM_SPECIAL(2LL)
+#define PM_END_OF_BUFFER 1
+
+static int add_to_pagemap(unsigned long addr, u64 pfn,
+ struct pagemapread *pm)
+{
+ /*
+ * Make sure there's room in the buffer for an
+ * entire entry. Otherwise, only copy part of
+ * the pfn.
+ */
+ if (pm->out + PM_ENTRY_BYTES >= pm->end) {
+ if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
+ return -EFAULT;
+ pm->out = pm->end;
+ return PM_END_OF_BUFFER;
}
+
+ if (put_user(pfn, pm->out))
+ return -EFAULT;
+ pm->out += PM_ENTRY_BYTES;
+ return 0;
}
-/*
- * walk_page_range - walk the page tables of a VMA with a callback
- * @vma - VMA to walk
- * @action - callback invoked for every bottom-level (PTE) page table
- * @private - private data passed to the callback function
- *
- * Recursively walk the page table for the memory area in a VMA, calling
- * a callback for every bottom-level (PTE) page table.
- */
-static inline void walk_page_range(struct vm_area_struct *vma,
- void (*action)(struct vm_area_struct *,
- pmd_t *, unsigned long,
- unsigned long, void *),
- void *private)
+static int pagemap_pte_hole(unsigned long start, unsigned long end,
+ void *private)
{
- unsigned long addr = vma->vm_start;
- unsigned long end = vma->vm_end;
- struct pmd_walker walker = {
- .vma = vma,
- .private = private,
- .action = action,
- };
- pgd_t *pgd;
- unsigned long next;
-
- for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
- pgd++, addr = next) {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- walk_pud_range(&walker, pgd, addr, next);
+ struct pagemapread *pm = private;
+ unsigned long addr;
+ int err = 0;
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
+ if (err)
+ break;
}
+ return err;
}
-static int show_smap(struct seq_file *m, void *v)
+u64 swap_pte_to_pagemap_entry(pte_t pte)
{
- struct vm_area_struct *vma = v;
- struct mem_size_stats mss;
-
- memset(&mss, 0, sizeof mss);
- if (vma->vm_mm && !is_vm_hugetlb_page(vma))
- walk_page_range(vma, smaps_pte_range, &mss);
- return show_map_internal(m, v, &mss);
+ swp_entry_t e = pte_to_swp_entry(pte);
+ return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
}
-void clear_refs_smap(struct mm_struct *mm)
+static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ void *private)
{
- struct vm_area_struct *vma;
+ struct pagemapread *pm = private;
+ pte_t *pte;
+ int err = 0;
+
+ for (; addr != end; addr += PAGE_SIZE) {
+ u64 pfn = PM_NOT_PRESENT;
+ pte = pte_offset_map(pmd, addr);
+ if (is_swap_pte(*pte))
+ pfn = swap_pte_to_pagemap_entry(*pte);
+ else if (pte_present(*pte))
+ pfn = pte_pfn(*pte);
+ /* unmap so we're not in atomic when we copy to userspace */
+ pte_unmap(pte);
+ err = add_to_pagemap(addr, pfn, pm);
+ if (err)
+ return err;
+ }
- down_read(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next)
- if (vma->vm_mm && !is_vm_hugetlb_page(vma))
- walk_page_range(vma, clear_refs_pte_range, NULL);
- flush_tlb_mm(mm);
- up_read(&mm->mmap_sem);
+ cond_resched();
+
+ return err;
}
-static void *m_start(struct seq_file *m, loff_t *pos)
+static struct mm_walk pagemap_walk = {
+ .pmd_entry = pagemap_pte_range,
+ .pte_hole = pagemap_pte_hole
+};
+
+/*
+ * /proc/pid/pagemap - an array mapping virtual pages to pfns
+ *
+ * For each page in the address space, this file contains one 64-bit
+ * entry representing the corresponding physical page frame number
+ * (PFN) if the page is present. If there is a swap entry for the
+ * physical page, then an encoding of the swap file number and the
+ * page's offset into the swap file are returned. If no page is
+ * present at all, PM_NOT_PRESENT is returned. This allows determining
+ * precisely which pages are mapped (or in swap) and comparing mapped
+ * pages between processes.
+ *
+ * Efficient users of this interface will use /proc/pid/maps to
+ * determine which areas of memory are actually mapped and llseek to
+ * skip over unmapped regions.
+ */
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
{
- struct proc_maps_private *priv = m->private;
- unsigned long last_addr = m->version;
+ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ struct page **pages, *page;
+ unsigned long uaddr, uend;
struct mm_struct *mm;
- struct vm_area_struct *vma, *tail_vma = NULL;
- loff_t l = *pos;
-
- /* Clear the per syscall fields in priv */
- priv->task = NULL;
- priv->tail_vma = NULL;
+ struct pagemapread pm;
+ int pagecount;
+ int ret = -ESRCH;
- /*
- * We remember last_addr rather than next_addr to hit with
- * mmap_cache most of the time. We have zero last_addr at
- * the beginning and also after lseek. We will have -1 last_addr
- * after the end of the vmas.
- */
+ if (!task)
+ goto out;
- if (last_addr == -1UL)
- return NULL;
+ ret = -EACCES;
+ if (!ptrace_may_attach(task))
+ goto out;
- priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
- if (!priv->task)
- return NULL;
+ ret = -EINVAL;
+ /* file position must be aligned */
+ if (*ppos % PM_ENTRY_BYTES)
+ goto out;
- mm = mm_for_maps(priv->task);
+ ret = 0;
+ mm = get_task_mm(task);
if (!mm)
- return NULL;
-
- priv->tail_vma = tail_vma = get_gate_vma(priv->task);
-
- /* Start with last addr hint */
- if (last_addr && (vma = find_vma(mm, last_addr))) {
- vma = vma->vm_next;
goto out;
- }
- /*
- * Check the vma index is within the range and do
- * sequential scan until m_index.
- */
- vma = NULL;
- if ((unsigned long)l < mm->map_count) {
- vma = mm->mmap;
- while (l-- && vma)
- vma = vma->vm_next;
- goto out;
- }
+ ret = -ENOMEM;
+ uaddr = (unsigned long)buf & PAGE_MASK;
+ uend = (unsigned long)(buf + count);
+ pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
+ pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ goto out_task;
- if (l != mm->map_count)
- tail_vma = NULL; /* After gate vma */
+ down_read(&current->mm->mmap_sem);
+ ret = get_user_pages(current, current->mm, uaddr, pagecount,
+ 1, 0, pages, NULL);
+ up_read(&current->mm->mmap_sem);
-out:
- if (vma)
- return vma;
+ if (ret < 0)
+ goto out_free;
- /* End of vmas has been reached */
- m->version = (tail_vma != NULL)? 0: -1UL;
- up_read(&mm->mmap_sem);
- mmput(mm);
- return tail_vma;
-}
+ pm.out = buf;
+ pm.end = buf + count;
-static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
-{
- if (vma && vma != priv->tail_vma) {
- struct mm_struct *mm = vma->vm_mm;
- up_read(&mm->mmap_sem);
- mmput(mm);
+ if (!ptrace_may_attach(task)) {
+ ret = -EIO;
+ } else {
+ unsigned long src = *ppos;
+ unsigned long svpfn = src / PM_ENTRY_BYTES;
+ unsigned long start_vaddr = svpfn << PAGE_SHIFT;
+ unsigned long end_vaddr = TASK_SIZE_OF(task);
+
+ /* watch out for wraparound */
+ if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+ start_vaddr = end_vaddr;
+
+ /*
+ * The odds are that this will stop walking way
+ * before end_vaddr, because the length of the
+ * user buffer is tracked in "pm", and the walk
+ * will stop when we hit the end of the buffer.
+ */
+ ret = walk_page_range(mm, start_vaddr, end_vaddr,
+ &pagemap_walk, &pm);
+ if (ret == PM_END_OF_BUFFER)
+ ret = 0;
+ /* don't need mmap_sem for these, but this looks cleaner */
+ *ppos += pm.out - buf;
+ if (!ret)
+ ret = pm.out - buf;
}
-}
-
-static void *m_next(struct seq_file *m, void *v, loff_t *pos)
-{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
- struct vm_area_struct *tail_vma = priv->tail_vma;
-
- (*pos)++;
- if (vma && (vma != tail_vma) && vma->vm_next)
- return vma->vm_next;
- vma_stop(priv, vma);
- return (vma != tail_vma)? tail_vma: NULL;
-}
-
-static void m_stop(struct seq_file *m, void *v)
-{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
-
- vma_stop(priv, vma);
- if (priv->task)
- put_task_struct(priv->task);
-}
-
-static struct seq_operations proc_pid_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_map
-};
-
-static struct seq_operations proc_pid_smaps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_smap
-};
-static int do_maps_open(struct inode *inode, struct file *file,
- struct seq_operations *ops)
-{
- struct proc_maps_private *priv;
- int ret = -ENOMEM;
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (priv) {
- priv->pid = proc_pid(inode);
- ret = seq_open(file, ops);
- if (!ret) {
- struct seq_file *m = file->private_data;
- m->private = priv;
- } else {
- kfree(priv);
- }
+ for (; pagecount; pagecount--) {
+ page = pages[pagecount-1];
+ if (!PageReserved(page))
+ SetPageDirty(page);
+ page_cache_release(page);
}
+ mmput(mm);
+out_free:
+ kfree(pages);
+out_task:
+ put_task_struct(task);
+out:
return ret;
}
-static int maps_open(struct inode *inode, struct file *file)
-{
- return do_maps_open(inode, file, &proc_pid_maps_op);
-}
-
-const struct file_operations proc_maps_operations = {
- .open = maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
+const struct file_operations proc_pagemap_operations = {
+ .llseek = mem_lseek, /* borrow this */
+ .read = pagemap_read,
};
+#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
extern int show_numa_map(struct seq_file *m, void *v);
@@ -526,7 +734,7 @@ static int show_numa_map_checked(struct seq_file *m, void *v)
return show_numa_map(m, v);
}
-static struct seq_operations proc_pid_numa_maps_op = {
+static const struct seq_operations proc_pid_numa_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
@@ -545,15 +753,3 @@ const struct file_operations proc_numa_maps_operations = {
.release = seq_release_private,
};
#endif
-
-static int smaps_open(struct inode *inode, struct file *file)
-{
- return do_maps_open(inode, file, &proc_pid_smaps_op);
-}
-
-const struct file_operations proc_smaps_operations = {
- .open = smaps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 1932c2ca345..8011528518b 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -12,7 +12,7 @@
* each process that owns it. Non-shared memory is counted
* accurately.
*/
-char *task_mem(struct mm_struct *mm, char *buffer)
+void task_mem(struct seq_file *m, struct mm_struct *mm)
{
struct vm_list_struct *vml;
unsigned long bytes = 0, sbytes = 0, slack = 0;
@@ -58,14 +58,13 @@ char *task_mem(struct mm_struct *mm, char *buffer)
bytes += kobjsize(current); /* includes kernel stack */
- buffer += sprintf(buffer,
+ seq_printf(m,
"Mem:\t%8lu bytes\n"
"Slack:\t%8lu bytes\n"
"Shared:\t%8lu bytes\n",
bytes, slack, sbytes);
up_read(&mm->mmap_sem);
- return buffer;
}
unsigned long task_vsize(struct mm_struct *mm)
@@ -104,7 +103,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
return size;
}
-int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+int proc_exe_link(struct inode *inode, struct path *path)
{
struct vm_list_struct *vml;
struct vm_area_struct *vma;
@@ -127,8 +126,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
}
if (vma) {
- *mnt = mntget(vma->vm_file->f_path.mnt);
- *dentry = dget(vma->vm_file->f_path.dentry);
+ *path = vma->vm_file->f_path;
+ path_get(&vma->vm_file->f_path);
result = 0;
}
@@ -199,7 +198,7 @@ static void *m_next(struct seq_file *m, void *_vml, loff_t *pos)
return vml ? vml->next : NULL;
}
-static struct seq_operations proc_pid_maps_ops = {
+static const struct seq_operations proc_pid_maps_ops = {
.start = m_start,
.next = m_next,
.stop = m_stop,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 523e1098ae8..9ac0f5e064e 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -10,7 +10,6 @@
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/user.h>
-#include <linux/a.out.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
#include <linux/highmem.h>