From f30c53a873d0d227493197064b8886af2d57bbd6 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Mon, 15 Jan 2007 20:22:02 +0100
Subject: MODULES: add the module name for built in kernel drivers

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 kernel/module.c | 33 ++++++++++++++++++++++++---------
 kernel/params.c | 12 +++++-------
 2 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index d0f2260a021..0f4489af3e2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1131,8 +1131,8 @@ static int mod_sysfs_setup(struct module *mod,
 	if (err)
 		goto out;
 
-	mod->drivers_dir = kobject_add_dir(&mod->mkobj.kobj, "drivers");
-	if (!mod->drivers_dir) {
+	mod->mkobj.drivers_dir = kobject_add_dir(&mod->mkobj.kobj, "drivers");
+	if (!mod->mkobj.drivers_dir) {
 		err = -ENOMEM;
 		goto out_unreg;
 	}
@@ -1151,7 +1151,7 @@ static int mod_sysfs_setup(struct module *mod,
 out_unreg_param:
 	module_param_sysfs_remove(mod);
 out_unreg_drivers:
-	kobject_unregister(mod->drivers_dir);
+	kobject_unregister(mod->mkobj.drivers_dir);
 out_unreg:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
@@ -1163,7 +1163,7 @@ static void mod_kobject_remove(struct module *mod)
 {
 	module_remove_modinfo_attrs(mod);
 	module_param_sysfs_remove(mod);
-	kobject_unregister(mod->drivers_dir);
+	kobject_unregister(mod->mkobj.drivers_dir);
 
 	kobject_unregister(&mod->mkobj.kobj);
 }
@@ -2344,15 +2344,30 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
 {
 	char *driver_name;
 	int no_warn;
+	struct module_kobject *mk = NULL;
 
-	if (!mod || !drv)
+	if (!drv)
+		return;
+
+	if (mod)
+		mk = &mod->mkobj;
+	else if (drv->mod_name) {
+		struct kobject *mkobj;
+
+		/* Lookup built-in module entry in /sys/modules */
+		mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name);
+		if (mkobj)
+			mk = container_of(mkobj, struct module_kobject, kobj);
+	}
+
+	if (!mk)
 		return;
 
 	/* Don't check return codes; these calls are idempotent */
-	no_warn = sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module");
+	no_warn = sysfs_create_link(&drv->kobj, &mk->kobj, "module");
 	driver_name = make_driver_name(drv);
 	if (driver_name) {
-		no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj,
+		no_warn = sysfs_create_link(mk->drivers_dir, &drv->kobj,
 					    driver_name);
 		kfree(driver_name);
 	}
@@ -2367,10 +2382,10 @@ void module_remove_driver(struct device_driver *drv)
 		return;
 
 	sysfs_remove_link(&drv->kobj, "module");
-	if (drv->owner && drv->owner->drivers_dir) {
+	if (drv->owner && drv->owner->mkobj.drivers_dir) {
 		driver_name = make_driver_name(drv);
 		if (driver_name) {
-			sysfs_remove_link(drv->owner->drivers_dir,
+			sysfs_remove_link(drv->owner->mkobj.drivers_dir,
 					  driver_name);
 			kfree(driver_name);
 		}
diff --git a/kernel/params.c b/kernel/params.c
index 718945da8f5..737b7c5e93a 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -561,14 +561,12 @@ static void __init kernel_param_sysfs_setup(const char *name,
 	mk->mod = THIS_MODULE;
 	kobj_set_kset_s(mk, module_subsys);
 	kobject_set_name(&mk->kobj, name);
-	ret = kobject_register(&mk->kobj);
+	kobject_init(&mk->kobj);
+	ret = kobject_add(&mk->kobj);
 	BUG_ON(ret < 0);
-
-	/* no need to keep the kobject if no parameter is exported */
-	if (!param_sysfs_setup(mk, kparam, num_params, name_skip)) {
-		kobject_unregister(&mk->kobj);
-		kfree(mk);
-	}
+	param_sysfs_setup(mk, kparam, num_params, name_skip);
+	mk->drivers_dir = kobject_add_dir(&mk->kobj, "drivers");
+	kobject_uevent(&mk->kobj, KOBJ_ADD);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From fe480a2675ed236af396597d9f05245c7bbd0149 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 15 Jan 2007 11:50:02 -0800
Subject: Modules: only add drivers/ direcory if needed

This changes the module core to only create the drivers/ directory if we
are going to put something in it.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 kernel/module.c | 20 +++++++++++---------
 kernel/params.c |  1 -
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index 0f4489af3e2..9de4209f6a6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1131,12 +1131,6 @@ static int mod_sysfs_setup(struct module *mod,
 	if (err)
 		goto out;
 
-	mod->mkobj.drivers_dir = kobject_add_dir(&mod->mkobj.kobj, "drivers");
-	if (!mod->mkobj.drivers_dir) {
-		err = -ENOMEM;
-		goto out_unreg;
-	}
-
 	err = module_param_sysfs_setup(mod, kparam, num_params);
 	if (err)
 		goto out_unreg_drivers;
@@ -1151,8 +1145,6 @@ static int mod_sysfs_setup(struct module *mod,
 out_unreg_param:
 	module_param_sysfs_remove(mod);
 out_unreg_drivers:
-	kobject_unregister(mod->mkobj.drivers_dir);
-out_unreg:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
 out:
@@ -1163,7 +1155,8 @@ static void mod_kobject_remove(struct module *mod)
 {
 	module_remove_modinfo_attrs(mod);
 	module_param_sysfs_remove(mod);
-	kobject_unregister(mod->mkobj.drivers_dir);
+	if (mod->mkobj.drivers_dir)
+		kobject_unregister(mod->mkobj.drivers_dir);
 
 	kobject_unregister(&mod->mkobj.kobj);
 }
@@ -2340,6 +2333,14 @@ static char *make_driver_name(struct device_driver *drv)
 	return driver_name;
 }
 
+static void module_create_drivers_dir(struct module_kobject *mk)
+{
+	if (!mk || mk->drivers_dir)
+		return;
+
+	mk->drivers_dir = kobject_add_dir(&mk->kobj, "drivers");
+}
+
 void module_add_driver(struct module *mod, struct device_driver *drv)
 {
 	char *driver_name;
@@ -2367,6 +2368,7 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
 	no_warn = sysfs_create_link(&drv->kobj, &mk->kobj, "module");
 	driver_name = make_driver_name(drv);
 	if (driver_name) {
+		module_create_drivers_dir(mk);
 		no_warn = sysfs_create_link(mk->drivers_dir, &drv->kobj,
 					    driver_name);
 		kfree(driver_name);
diff --git a/kernel/params.c b/kernel/params.c
index 737b7c5e93a..cbaac85942d 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -565,7 +565,6 @@ static void __init kernel_param_sysfs_setup(const char *name,
 	ret = kobject_add(&mk->kobj);
 	BUG_ON(ret < 0);
 	param_sysfs_setup(mk, kparam, num_params, name_skip);
-	mk->drivers_dir = kobject_add_dir(&mk->kobj, "drivers");
 	kobject_uevent(&mk->kobj, KOBJ_ADD);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 270a6c4cad809e92d7b81adde92d0b3d94eeb8ee Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 18 Jan 2007 13:26:15 +0100
Subject: /sys/modules/*/holders

  /sys/module/usbcore/
  |-- drivers
  |   |-- usb:hub -> ../../../subsystem/usb/drivers/hub
  |   |-- usb:usb -> ../../../subsystem/usb/drivers/usb
  |   `-- usb:usbfs -> ../../../subsystem/usb/drivers/usbfs
  |-- holders
  |   |-- ehci_hcd -> ../../../module/ehci_hcd
  |   |-- uhci_hcd -> ../../../module/uhci_hcd
  |   |-- usb_storage -> ../../../module/usb_storage
  |   `-- usbhid -> ../../../module/usbhid
  |-- initstate

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/module.h |  1 +
 kernel/module.c        | 37 +++++++++++++++++++++++++++++++------
 kernel/params.c        | 17 ++++++++++++++++-
 3 files changed, 48 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/module.h b/include/linux/module.h
index 90dc2542978..419d3ef293d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -264,6 +264,7 @@ struct module
 	struct module_attribute *modinfo_attrs;
 	const char *version;
 	const char *srcversion;
+	struct kobject *holders_dir;
 
 	/* Exported symbols */
 	const struct kernel_symbol *syms;
diff --git a/kernel/module.c b/kernel/module.c
index 9de4209f6a6..8a94e054230 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -537,6 +537,8 @@ static int already_uses(struct module *a, struct module *b)
 static int use_module(struct module *a, struct module *b)
 {
 	struct module_use *use;
+	int no_warn;
+
 	if (b == NULL || already_uses(a, b)) return 1;
 
 	if (!strong_try_module_get(b))
@@ -552,6 +554,7 @@ static int use_module(struct module *a, struct module *b)
 
 	use->module_which_uses = a;
 	list_add(&use->list, &b->modules_which_use_me);
+	no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name);
 	return 1;
 }
 
@@ -569,6 +572,7 @@ static void module_unload_free(struct module *mod)
 				module_put(i);
 				list_del(&use->list);
 				kfree(use);
+				sysfs_remove_link(i->holders_dir, mod->name);
 				/* There can be at most one match. */
 				break;
 			}
@@ -1106,9 +1110,7 @@ static void module_remove_modinfo_attrs(struct module *mod)
 	kfree(mod->modinfo_attrs);
 }
 
-static int mod_sysfs_setup(struct module *mod,
-			   struct kernel_param *kparam,
-			   unsigned int num_params)
+static int mod_sysfs_init(struct module *mod)
 {
 	int err;
 
@@ -1125,15 +1127,30 @@ static int mod_sysfs_setup(struct module *mod,
 	kobj_set_kset_s(&mod->mkobj, module_subsys);
 	mod->mkobj.mod = mod;
 
-	/* delay uevent until full sysfs population */
 	kobject_init(&mod->mkobj.kobj);
+
+out:
+	return err;
+}
+
+static int mod_sysfs_setup(struct module *mod,
+			   struct kernel_param *kparam,
+			   unsigned int num_params)
+{
+	int err;
+
+	/* delay uevent until full sysfs population */
 	err = kobject_add(&mod->mkobj.kobj);
 	if (err)
 		goto out;
 
+	mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders");
+	if (!mod->holders_dir)
+		goto out_unreg;
+
 	err = module_param_sysfs_setup(mod, kparam, num_params);
 	if (err)
-		goto out_unreg_drivers;
+		goto out_unreg_holders;
 
 	err = module_add_modinfo_attrs(mod);
 	if (err)
@@ -1144,7 +1161,9 @@ static int mod_sysfs_setup(struct module *mod,
 
 out_unreg_param:
 	module_param_sysfs_remove(mod);
-out_unreg_drivers:
+out_unreg_holders:
+	kobject_unregister(mod->holders_dir);
+out_unreg:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
 out:
@@ -1157,6 +1176,8 @@ static void mod_kobject_remove(struct module *mod)
 	module_param_sysfs_remove(mod);
 	if (mod->mkobj.drivers_dir)
 		kobject_unregister(mod->mkobj.drivers_dir);
+	if (mod->holders_dir)
+		kobject_unregister(mod->holders_dir);
 
 	kobject_unregister(&mod->mkobj.kobj);
 }
@@ -1761,6 +1782,10 @@ static struct module *load_module(void __user *umod,
 	/* Now we've moved module, initialize linked lists, etc. */
 	module_unload_init(mod);
 
+	/* Initialize kobject, so we can reference it. */
+	if (mod_sysfs_init(mod) != 0)
+		goto cleanup;
+
 	/* Set up license info based on the info section */
 	set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
 
diff --git a/kernel/params.c b/kernel/params.c
index cbaac85942d..553cf7d6a4b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -30,6 +30,8 @@
 #define DEBUGP(fmt, a...)
 #endif
 
+static struct kobj_type module_ktype;
+
 static inline char dash2underscore(char c)
 {
 	if (c == '-')
@@ -671,6 +673,19 @@ static struct sysfs_ops module_sysfs_ops = {
 	.store = module_attr_store,
 };
 
+static int uevent_filter(struct kset *kset, struct kobject *kobj)
+{
+	struct kobj_type *ktype = get_ktype(kobj);
+
+	if (ktype == &module_ktype)
+		return 1;
+	return 0;
+}
+
+static struct kset_uevent_ops module_uevent_ops = {
+	.filter = uevent_filter,
+};
+
 #else
 static struct sysfs_ops module_sysfs_ops = {
 	.show = NULL,
@@ -682,7 +697,7 @@ static struct kobj_type module_ktype = {
 	.sysfs_ops =	&module_sysfs_ops,
 };
 
-decl_subsys(module, &module_ktype, NULL);
+decl_subsys(module, &module_ktype, &module_uevent_ops);
 
 /*
  * param_sysfs_init - wrapper for built-in params support
-- 
cgit v1.2.3-70-g09d2


From 5b912c108c8b1fcecbfe13d6d9a183db97b682d3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 28 Jan 2007 12:52:03 -0700
Subject: msi: Kill the msi_desc array.

We need to be able to get from an irq number to a struct msi_desc.
The msi_desc array in msi.c had several short comings the big one was
that it could not be used outside of msi.c.  Using irq_data in struct
irq_desc almost worked except on some architectures irq_data needs to
be used for something else.

So this patch adds a msi_desc pointer to irq_desc, adds the appropriate
wrappers and changes all of the msi code to use them.

The dynamic_irq_init/cleanup code was tweaked to ensure the new
field is left in a well defined state.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/ia64/sn/kernel/msi_sn.c |  2 +-
 drivers/pci/msi.c            | 44 +++++++++++++++++++++-----------------------
 include/linux/irq.h          |  4 ++++
 kernel/irq/chip.c            | 28 ++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 24 deletions(-)

(limited to 'kernel')

diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index b3a435fd70f..31fbb859b67 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -74,7 +74,7 @@ int sn_setup_msi_irq(unsigned int irq, struct pci_dev *pdev)
 	struct pcibus_bussoft *bussoft = SN_PCIDEV_BUSSOFT(pdev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
-	entry = get_irq_data(irq);
+	entry = get_irq_msi(irq);
 	if (!entry->msi_attrib.is_64)
 		return -EINVAL;
 
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 55fe83dfd77..52c253c5ad3 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -24,7 +24,6 @@
 #include "pci.h"
 #include "msi.h"
 
-static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
 static struct kmem_cache* msi_cachep;
 
 static int pci_msi_enable = 1;
@@ -43,7 +42,7 @@ static void msi_set_mask_bit(unsigned int irq, int flag)
 {
 	struct msi_desc *entry;
 
-	entry = msi_desc[irq];
+	entry = get_irq_msi(irq);
 	BUG_ON(!entry || !entry->dev);
 	switch (entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
@@ -73,7 +72,7 @@ static void msi_set_mask_bit(unsigned int irq, int flag)
 
 void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_data(irq);
+	struct msi_desc *entry = get_irq_msi(irq);
 	switch(entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
 	{
@@ -112,7 +111,7 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 
 void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_data(irq);
+	struct msi_desc *entry = get_irq_msi(irq);
 	switch (entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
 	{
@@ -208,7 +207,7 @@ static int create_msi_irq(void)
 		return -EBUSY;
 	}
 
-	set_irq_data(irq, entry);
+	set_irq_msi(irq, entry);
 
 	return irq;
 }
@@ -217,9 +216,9 @@ static void destroy_msi_irq(unsigned int irq)
 {
 	struct msi_desc *entry;
 
-	entry = get_irq_data(irq);
+	entry = get_irq_msi(irq);
 	set_irq_chip(irq, NULL);
-	set_irq_data(irq, NULL);
+	set_irq_msi(irq, NULL);
 	destroy_irq(irq);
 	kmem_cache_free(msi_cachep, entry);
 }
@@ -360,10 +359,10 @@ static int __pci_save_msix_state(struct pci_dev *dev)
 	while (head != tail) {
 		struct msi_desc *entry;
 
-		entry = msi_desc[irq];
+		entry = get_irq_msi(irq);
 		read_msi_msg(irq, &entry->msg_save);
 
-		tail = msi_desc[irq]->link.tail;
+		tail = entry->link.tail;
 		irq = tail;
 	}
 
@@ -410,10 +409,10 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
 	/* route the table */
 	irq = head = dev->first_msi_irq;
 	while (head != tail) {
-		entry = msi_desc[irq];
+		entry = get_irq_msi(irq);
 		write_msi_msg(irq, &entry->msg_save);
 
-		tail = msi_desc[irq]->link.tail;
+		tail = entry->link.tail;
 		irq = tail;
 	}
 
@@ -451,7 +450,7 @@ static int msi_capability_init(struct pci_dev *dev)
 	if (irq < 0)
 		return irq;
 
-	entry = get_irq_data(irq);
+	entry = get_irq_msi(irq);
 	entry->link.head = irq;
 	entry->link.tail = irq;
 	entry->msi_attrib.type = PCI_CAP_ID_MSI;
@@ -486,7 +485,7 @@ static int msi_capability_init(struct pci_dev *dev)
 	}
 
 	dev->first_msi_irq = irq;
-	msi_desc[irq] = entry;
+	set_irq_msi(irq, entry);
 	/* Set MSI enabled bits	 */
 	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
 
@@ -535,7 +534,7 @@ static int msix_capability_init(struct pci_dev *dev,
 		if (irq < 0)
 			break;
 
-		entry = get_irq_data(irq);
+		entry = get_irq_msi(irq);
  		j = entries[i].entry;
  		entries[i].vector = irq;
 		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
@@ -565,7 +564,7 @@ static int msix_capability_init(struct pci_dev *dev,
 			break;
 		}
 
-		msi_desc[irq] = entry;
+		set_irq_msi(irq, entry);
 	}
 	if (i != nvec) {
 		int avail = i - 1;
@@ -682,7 +681,7 @@ void pci_disable_msi(struct pci_dev* dev)
 
 	disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
 
-	entry = msi_desc[dev->first_msi_irq];
+	entry = get_irq_msi(dev->first_msi_irq);
 	if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
 		return;
 	}
@@ -709,7 +708,7 @@ static int msi_free_irq(struct pci_dev* dev, int irq)
 
 	arch_teardown_msi_irq(irq);
 
-	entry = msi_desc[irq];
+	entry = get_irq_msi(irq);
 	if (!entry || entry->dev != dev) {
 		return -EINVAL;
 	}
@@ -717,10 +716,9 @@ static int msi_free_irq(struct pci_dev* dev, int irq)
 	entry_nr = entry->msi_attrib.entry_nr;
 	head = entry->link.head;
 	base = entry->mask_base;
-	msi_desc[entry->link.head]->link.tail = entry->link.tail;
-	msi_desc[entry->link.tail]->link.head = entry->link.head;
+	get_irq_msi(entry->link.head)->link.tail = entry->link.tail;
+	get_irq_msi(entry->link.tail)->link.head = entry->link.head;
 	entry->dev = NULL;
-	msi_desc[irq] = NULL;
 
 	destroy_msi_irq(irq);
 
@@ -821,7 +819,7 @@ void pci_disable_msix(struct pci_dev* dev)
 
 	irq = head = dev->first_msi_irq;
 	while (head != tail) {
-		tail = msi_desc[irq]->link.tail;
+		tail = get_irq_msi(irq)->link.tail;
 		if (irq_has_action(irq))
 			warning = 1;
 		else if (irq != head)	/* Release MSI-X irq */
@@ -867,8 +865,8 @@ void msi_remove_pci_irq_vectors(struct pci_dev* dev)
 
 		irq = head = dev->first_msi_irq;
 		while (head != tail) {
-			tail = msi_desc[irq]->link.tail;
-			base = msi_desc[irq]->mask_base;
+			tail = get_irq_msi(irq)->link.tail;
+			base = get_irq_msi(irq)->mask_base;
 			if (irq_has_action(irq))
 				warning = 1;
 			else if (irq != head) /* Release MSI-X irq */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 52fc4052a0a..5504b671357 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -68,6 +68,7 @@ typedef	void fastcall (*irq_flow_handler_t)(unsigned int irq,
 #define IRQ_MOVE_PENDING	0x40000000	/* need to re-target IRQ destination */
 
 struct proc_dir_entry;
+struct msi_desc;
 
 /**
  * struct irq_chip - hardware interrupt chip descriptor
@@ -148,6 +149,7 @@ struct irq_chip {
 struct irq_desc {
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
+	struct msi_desc		*msi_desc;
 	void			*handler_data;
 	void			*chip_data;
 	struct irqaction	*action;	/* IRQ action list */
@@ -373,10 +375,12 @@ extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
 extern int set_irq_data(unsigned int irq, void *data);
 extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
+extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
 #define get_irq_chip(irq)	(irq_desc[irq].chip)
 #define get_irq_chip_data(irq)	(irq_desc[irq].chip_data)
 #define get_irq_data(irq)	(irq_desc[irq].handler_data)
+#define get_irq_msi(irq)	(irq_desc[irq].msi_desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d27b2585574..475e8a71bcd 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -39,6 +39,7 @@ void dynamic_irq_init(unsigned int irq)
 	desc->chip = &no_irq_chip;
 	desc->handle_irq = handle_bad_irq;
 	desc->depth = 1;
+	desc->msi_desc = NULL;
 	desc->handler_data = NULL;
 	desc->chip_data = NULL;
 	desc->action = NULL;
@@ -74,6 +75,9 @@ void dynamic_irq_cleanup(unsigned int irq)
 		WARN_ON(1);
 		return;
 	}
+	desc->msi_desc = NULL;
+	desc->handler_data = NULL;
+	desc->chip_data = NULL;
 	desc->handle_irq = handle_bad_irq;
 	desc->chip = &no_irq_chip;
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -161,6 +165,30 @@ int set_irq_data(unsigned int irq, void *data)
 }
 EXPORT_SYMBOL(set_irq_data);
 
+/**
+ *	set_irq_data - set irq type data for an irq
+ *	@irq:	Interrupt number
+ *	@data:	Pointer to interrupt specific data
+ *
+ *	Set the hardware irq controller data for an irq
+ */
+int set_irq_msi(unsigned int irq, struct msi_desc *entry)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR
+		       "Trying to install msi data for IRQ%d\n", irq);
+		return -EINVAL;
+	}
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->msi_desc = entry;
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return 0;
+}
+
 /**
  *	set_irq_chip_data - set irq chip data for an irq
  *	@irq:	Interrupt number
-- 
cgit v1.2.3-70-g09d2


From 7726942fb15edd46e4fe8ab37f9a99795191e585 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 9 Feb 2007 17:08:57 +0000
Subject: [APM] Add shared version of APM emulation

Currently ARM and MIPS both have nearly identical copies of the APM
emulation code in their arch code.  Add yet another copy of it to
drivers char and make it selectable through SYS_SUPPORTS_APM_EMULATION.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/char/Makefile         |   2 +
 drivers/char/apm-emulation.c  | 672 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/apm-emulation.h |  62 ++++
 kernel/power/Kconfig          |  26 ++
 4 files changed, 762 insertions(+)
 create mode 100644 drivers/char/apm-emulation.c
 create mode 100644 include/linux/apm-emulation.h

(limited to 'kernel')

diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 0326ca1a848..ae8567cc529 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -60,6 +60,8 @@ obj-$(CONFIG_BRIQ_PANEL)	+= briq_panel.o
 obj-$(CONFIG_PRINTER)		+= lp.o
 obj-$(CONFIG_TIPAR)		+= tipar.o
 
+obj-$(CONFIG_APM_EMULATION)	+= apm-emulation.o
+
 obj-$(CONFIG_DTLK)		+= dtlk.o
 obj-$(CONFIG_R3964)		+= n_r3964.o
 obj-$(CONFIG_APPLICOM)		+= applicom.o
diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c
new file mode 100644
index 00000000000..179c7a3b6e7
--- /dev/null
+++ b/drivers/char/apm-emulation.c
@@ -0,0 +1,672 @@
+/*
+ * bios-less APM driver for ARM Linux
+ *  Jamey Hicks <jamey@crl.dec.com>
+ *  adapted from the APM BIOS driver for Linux by Stephen Rothwell (sfr@linuxcare.com)
+ *
+ * APM 1.2 Reference:
+ *   Intel Corporation, Microsoft Corporation. Advanced Power Management
+ *   (APM) BIOS Interface Specification, Revision 1.2, February 1996.
+ *
+ * [This document is available from Microsoft at:
+ *    http://www.microsoft.com/hwdev/busbios/amp_12.htm]
+ */
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/miscdevice.h>
+#include <linux/apm_bios.h>
+#include <linux/capability.h>
+#include <linux/sched.h>
+#include <linux/pm.h>
+#include <linux/apm-emulation.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/completion.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+#include <asm/system.h>
+
+/*
+ * The apm_bios device is one of the misc char devices.
+ * This is its minor number.
+ */
+#define APM_MINOR_DEV	134
+
+/*
+ * See Documentation/Config.help for the configuration options.
+ *
+ * Various options can be changed at boot time as follows:
+ * (We allow underscores for compatibility with the modules code)
+ *	apm=on/off			enable/disable APM
+ */
+
+/*
+ * Maximum number of events stored
+ */
+#define APM_MAX_EVENTS		16
+
+struct apm_queue {
+	unsigned int		event_head;
+	unsigned int		event_tail;
+	apm_event_t		events[APM_MAX_EVENTS];
+};
+
+/*
+ * The per-file APM data
+ */
+struct apm_user {
+	struct list_head	list;
+
+	unsigned int		suser: 1;
+	unsigned int		writer: 1;
+	unsigned int		reader: 1;
+
+	int			suspend_result;
+	unsigned int		suspend_state;
+#define SUSPEND_NONE	0		/* no suspend pending */
+#define SUSPEND_PENDING	1		/* suspend pending read */
+#define SUSPEND_READ	2		/* suspend read, pending ack */
+#define SUSPEND_ACKED	3		/* suspend acked */
+#define SUSPEND_WAIT	4		/* waiting for suspend */
+#define SUSPEND_DONE	5		/* suspend completed */
+
+	struct apm_queue	queue;
+};
+
+/*
+ * Local variables
+ */
+static int suspends_pending;
+static int apm_disabled;
+static struct task_struct *kapmd_tsk;
+
+static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
+static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
+
+/*
+ * This is a list of everyone who has opened /dev/apm_bios
+ */
+static DECLARE_RWSEM(user_list_lock);
+static LIST_HEAD(apm_user_list);
+
+/*
+ * kapmd info.  kapmd provides us a process context to handle
+ * "APM" events within - specifically necessary if we're going
+ * to be suspending the system.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(kapmd_wait);
+static DEFINE_SPINLOCK(kapmd_queue_lock);
+static struct apm_queue kapmd_queue;
+
+static DEFINE_MUTEX(state_lock);
+
+static const char driver_version[] = "1.13";	/* no spaces */
+
+
+
+/*
+ * Compatibility cruft until the IPAQ people move over to the new
+ * interface.
+ */
+static void __apm_get_power_status(struct apm_power_info *info)
+{
+}
+
+/*
+ * This allows machines to provide their own "apm get power status" function.
+ */
+void (*apm_get_power_status)(struct apm_power_info *) = __apm_get_power_status;
+EXPORT_SYMBOL(apm_get_power_status);
+
+
+/*
+ * APM event queue management.
+ */
+static inline int queue_empty(struct apm_queue *q)
+{
+	return q->event_head == q->event_tail;
+}
+
+static inline apm_event_t queue_get_event(struct apm_queue *q)
+{
+	q->event_tail = (q->event_tail + 1) % APM_MAX_EVENTS;
+	return q->events[q->event_tail];
+}
+
+static void queue_add_event(struct apm_queue *q, apm_event_t event)
+{
+	q->event_head = (q->event_head + 1) % APM_MAX_EVENTS;
+	if (q->event_head == q->event_tail) {
+		static int notified;
+
+		if (notified++ == 0)
+		    printk(KERN_ERR "apm: an event queue overflowed\n");
+		q->event_tail = (q->event_tail + 1) % APM_MAX_EVENTS;
+	}
+	q->events[q->event_head] = event;
+}
+
+static void queue_event(apm_event_t event)
+{
+	struct apm_user *as;
+
+	down_read(&user_list_lock);
+	list_for_each_entry(as, &apm_user_list, list) {
+		if (as->reader)
+			queue_add_event(&as->queue, event);
+	}
+	up_read(&user_list_lock);
+	wake_up_interruptible(&apm_waitqueue);
+}
+
+/*
+ * queue_suspend_event - queue an APM suspend event.
+ *
+ * Check that we're in a state where we can suspend.  If not,
+ * return -EBUSY.  Otherwise, queue an event to all "writer"
+ * users.  If there are no "writer" users, return '1' to
+ * indicate that we can immediately suspend.
+ */
+static int queue_suspend_event(apm_event_t event, struct apm_user *sender)
+{
+	struct apm_user *as;
+	int ret = 1;
+
+	mutex_lock(&state_lock);
+	down_read(&user_list_lock);
+
+	/*
+	 * If a thread is still processing, we can't suspend, so reject
+	 * the request.
+	 */
+	list_for_each_entry(as, &apm_user_list, list) {
+		if (as != sender && as->reader && as->writer && as->suser &&
+		    as->suspend_state != SUSPEND_NONE) {
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	list_for_each_entry(as, &apm_user_list, list) {
+		if (as != sender && as->reader && as->writer && as->suser) {
+			as->suspend_state = SUSPEND_PENDING;
+			suspends_pending++;
+			queue_add_event(&as->queue, event);
+			ret = 0;
+		}
+	}
+ out:
+	up_read(&user_list_lock);
+	mutex_unlock(&state_lock);
+	wake_up_interruptible(&apm_waitqueue);
+	return ret;
+}
+
+static void apm_suspend(void)
+{
+	struct apm_user *as;
+	int err = pm_suspend(PM_SUSPEND_MEM);
+
+	/*
+	 * Anyone on the APM queues will think we're still suspended.
+	 * Send a message so everyone knows we're now awake again.
+	 */
+	queue_event(APM_NORMAL_RESUME);
+
+	/*
+	 * Finally, wake up anyone who is sleeping on the suspend.
+	 */
+	mutex_lock(&state_lock);
+	down_read(&user_list_lock);
+	list_for_each_entry(as, &apm_user_list, list) {
+		if (as->suspend_state == SUSPEND_WAIT ||
+		    as->suspend_state == SUSPEND_ACKED) {
+			as->suspend_result = err;
+			as->suspend_state = SUSPEND_DONE;
+		}
+	}
+	up_read(&user_list_lock);
+	mutex_unlock(&state_lock);
+
+	wake_up(&apm_suspend_waitqueue);
+}
+
+static ssize_t apm_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct apm_user *as = fp->private_data;
+	apm_event_t event;
+	int i = count, ret = 0;
+
+	if (count < sizeof(apm_event_t))
+		return -EINVAL;
+
+	if (queue_empty(&as->queue) && fp->f_flags & O_NONBLOCK)
+		return -EAGAIN;
+
+	wait_event_interruptible(apm_waitqueue, !queue_empty(&as->queue));
+
+	while ((i >= sizeof(event)) && !queue_empty(&as->queue)) {
+		event = queue_get_event(&as->queue);
+
+		ret = -EFAULT;
+		if (copy_to_user(buf, &event, sizeof(event)))
+			break;
+
+		mutex_lock(&state_lock);
+		if (as->suspend_state == SUSPEND_PENDING &&
+		    (event == APM_SYS_SUSPEND || event == APM_USER_SUSPEND))
+			as->suspend_state = SUSPEND_READ;
+		mutex_unlock(&state_lock);
+
+		buf += sizeof(event);
+		i -= sizeof(event);
+	}
+
+	if (i < count)
+		ret = count - i;
+
+	return ret;
+}
+
+static unsigned int apm_poll(struct file *fp, poll_table * wait)
+{
+	struct apm_user *as = fp->private_data;
+
+	poll_wait(fp, &apm_waitqueue, wait);
+	return queue_empty(&as->queue) ? 0 : POLLIN | POLLRDNORM;
+}
+
+/*
+ * apm_ioctl - handle APM ioctl
+ *
+ * APM_IOC_SUSPEND
+ *   This IOCTL is overloaded, and performs two functions.  It is used to:
+ *     - initiate a suspend
+ *     - acknowledge a suspend read from /dev/apm_bios.
+ *   Only when everyone who has opened /dev/apm_bios with write permission
+ *   has acknowledge does the actual suspend happen.
+ */
+static int
+apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg)
+{
+	struct apm_user *as = filp->private_data;
+	unsigned long flags;
+	int err = -EINVAL;
+
+	if (!as->suser || !as->writer)
+		return -EPERM;
+
+	switch (cmd) {
+	case APM_IOC_SUSPEND:
+		mutex_lock(&state_lock);
+
+		as->suspend_result = -EINTR;
+
+		if (as->suspend_state == SUSPEND_READ) {
+			int pending;
+
+			/*
+			 * If we read a suspend command from /dev/apm_bios,
+			 * then the corresponding APM_IOC_SUSPEND ioctl is
+			 * interpreted as an acknowledge.
+			 */
+			as->suspend_state = SUSPEND_ACKED;
+			suspends_pending--;
+			pending = suspends_pending == 0;
+			mutex_unlock(&state_lock);
+
+			/*
+			 * If there are no further acknowledges required,
+			 * suspend the system.
+			 */
+			if (pending)
+				apm_suspend();
+
+			/*
+			 * Wait for the suspend/resume to complete.  If there
+			 * are pending acknowledges, we wait here for them.
+			 *
+			 * Note: we need to ensure that the PM subsystem does
+			 * not kick us out of the wait when it suspends the
+			 * threads.
+			 */
+			flags = current->flags;
+			current->flags |= PF_NOFREEZE;
+
+			wait_event(apm_suspend_waitqueue,
+				   as->suspend_state == SUSPEND_DONE);
+		} else {
+			as->suspend_state = SUSPEND_WAIT;
+			mutex_unlock(&state_lock);
+
+			/*
+			 * Otherwise it is a request to suspend the system.
+			 * Queue an event for all readers, and expect an
+			 * acknowledge from all writers who haven't already
+			 * acknowledged.
+			 */
+			err = queue_suspend_event(APM_USER_SUSPEND, as);
+			if (err < 0) {
+				/*
+				 * Avoid taking the lock here - this
+				 * should be fine.
+				 */
+				as->suspend_state = SUSPEND_NONE;
+				break;
+			}
+
+			if (err > 0)
+				apm_suspend();
+
+			/*
+			 * Wait for the suspend/resume to complete.  If there
+			 * are pending acknowledges, we wait here for them.
+			 *
+			 * Note: we need to ensure that the PM subsystem does
+			 * not kick us out of the wait when it suspends the
+			 * threads.
+			 */
+			flags = current->flags;
+			current->flags |= PF_NOFREEZE;
+
+			wait_event_interruptible(apm_suspend_waitqueue,
+					 as->suspend_state == SUSPEND_DONE);
+		}
+
+		current->flags = flags;
+
+		mutex_lock(&state_lock);
+		err = as->suspend_result;
+		as->suspend_state = SUSPEND_NONE;
+		mutex_unlock(&state_lock);
+		break;
+	}
+
+	return err;
+}
+
+static int apm_release(struct inode * inode, struct file * filp)
+{
+	struct apm_user *as = filp->private_data;
+	int pending = 0;
+
+	filp->private_data = NULL;
+
+	down_write(&user_list_lock);
+	list_del(&as->list);
+	up_write(&user_list_lock);
+
+	/*
+	 * We are now unhooked from the chain.  As far as new
+	 * events are concerned, we no longer exist.  However, we
+	 * need to balance suspends_pending, which means the
+	 * possibility of sleeping.
+	 */
+	mutex_lock(&state_lock);
+	if (as->suspend_state != SUSPEND_NONE) {
+		suspends_pending -= 1;
+		pending = suspends_pending == 0;
+	}
+	mutex_unlock(&state_lock);
+	if (pending)
+		apm_suspend();
+
+	kfree(as);
+	return 0;
+}
+
+static int apm_open(struct inode * inode, struct file * filp)
+{
+	struct apm_user *as;
+
+	as = kzalloc(sizeof(*as), GFP_KERNEL);
+	if (as) {
+		/*
+		 * XXX - this is a tiny bit broken, when we consider BSD
+		 * process accounting. If the device is opened by root, we
+		 * instantly flag that we used superuser privs. Who knows,
+		 * we might close the device immediately without doing a
+		 * privileged operation -- cevans
+		 */
+		as->suser = capable(CAP_SYS_ADMIN);
+		as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE;
+		as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ;
+
+		down_write(&user_list_lock);
+		list_add(&as->list, &apm_user_list);
+		up_write(&user_list_lock);
+
+		filp->private_data = as;
+	}
+
+	return as ? 0 : -ENOMEM;
+}
+
+static struct file_operations apm_bios_fops = {
+	.owner		= THIS_MODULE,
+	.read		= apm_read,
+	.poll		= apm_poll,
+	.ioctl		= apm_ioctl,
+	.open		= apm_open,
+	.release	= apm_release,
+};
+
+static struct miscdevice apm_device = {
+	.minor		= APM_MINOR_DEV,
+	.name		= "apm_bios",
+	.fops		= &apm_bios_fops
+};
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ * Arguments, with symbols from linux/apm_bios.h.
+ *
+ *   0) Linux driver version (this will change if format changes)
+ *   1) APM BIOS Version.  Usually 1.0, 1.1 or 1.2.
+ *   2) APM flags from APM Installation Check (0x00):
+ *	bit 0: APM_16_BIT_SUPPORT
+ *	bit 1: APM_32_BIT_SUPPORT
+ *	bit 2: APM_IDLE_SLOWS_CLOCK
+ *	bit 3: APM_BIOS_DISABLED
+ *	bit 4: APM_BIOS_DISENGAGED
+ *   3) AC line status
+ *	0x00: Off-line
+ *	0x01: On-line
+ *	0x02: On backup power (BIOS >= 1.1 only)
+ *	0xff: Unknown
+ *   4) Battery status
+ *	0x00: High
+ *	0x01: Low
+ *	0x02: Critical
+ *	0x03: Charging
+ *	0x04: Selected battery not present (BIOS >= 1.2 only)
+ *	0xff: Unknown
+ *   5) Battery flag
+ *	bit 0: High
+ *	bit 1: Low
+ *	bit 2: Critical
+ *	bit 3: Charging
+ *	bit 7: No system battery
+ *	0xff: Unknown
+ *   6) Remaining battery life (percentage of charge):
+ *	0-100: valid
+ *	-1: Unknown
+ *   7) Remaining battery life (time units):
+ *	Number of remaining minutes or seconds
+ *	-1: Unknown
+ *   8) min = minutes; sec = seconds
+ */
+static int apm_get_info(char *buf, char **start, off_t fpos, int length)
+{
+	struct apm_power_info info;
+	char *units;
+	int ret;
+
+	info.ac_line_status = 0xff;
+	info.battery_status = 0xff;
+	info.battery_flag   = 0xff;
+	info.battery_life   = -1;
+	info.time	    = -1;
+	info.units	    = -1;
+
+	if (apm_get_power_status)
+		apm_get_power_status(&info);
+
+	switch (info.units) {
+	default:	units = "?";	break;
+	case 0: 	units = "min";	break;
+	case 1: 	units = "sec";	break;
+	}
+
+	ret = sprintf(buf, "%s 1.2 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
+		     driver_version, APM_32_BIT_SUPPORT,
+		     info.ac_line_status, info.battery_status,
+		     info.battery_flag, info.battery_life,
+		     info.time, units);
+
+	return ret;
+}
+#endif
+
+static int kapmd(void *arg)
+{
+	do {
+		apm_event_t event;
+		int ret;
+
+		wait_event_interruptible(kapmd_wait,
+				!queue_empty(&kapmd_queue) || kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		spin_lock_irq(&kapmd_queue_lock);
+		event = 0;
+		if (!queue_empty(&kapmd_queue))
+			event = queue_get_event(&kapmd_queue);
+		spin_unlock_irq(&kapmd_queue_lock);
+
+		switch (event) {
+		case 0:
+			break;
+
+		case APM_LOW_BATTERY:
+		case APM_POWER_STATUS_CHANGE:
+			queue_event(event);
+			break;
+
+		case APM_USER_SUSPEND:
+		case APM_SYS_SUSPEND:
+			ret = queue_suspend_event(event, NULL);
+			if (ret < 0) {
+				/*
+				 * We were busy.  Try again in 50ms.
+				 */
+				queue_add_event(&kapmd_queue, event);
+				msleep(50);
+			}
+			if (ret > 0)
+				apm_suspend();
+			break;
+
+		case APM_CRITICAL_SUSPEND:
+			apm_suspend();
+			break;
+		}
+	} while (1);
+
+	return 0;
+}
+
+static int __init apm_init(void)
+{
+	int ret;
+
+	if (apm_disabled) {
+		printk(KERN_NOTICE "apm: disabled on user request.\n");
+		return -ENODEV;
+	}
+
+	kapmd_tsk = kthread_create(kapmd, NULL, "kapmd");
+	if (IS_ERR(kapmd_tsk)) {
+		ret = PTR_ERR(kapmd_tsk);
+		kapmd_tsk = NULL;
+		return ret;
+	}
+	kapmd_tsk->flags |= PF_NOFREEZE;
+	wake_up_process(kapmd_tsk);
+
+#ifdef CONFIG_PROC_FS
+	create_proc_info_entry("apm", 0, NULL, apm_get_info);
+#endif
+
+	ret = misc_register(&apm_device);
+	if (ret != 0) {
+		remove_proc_entry("apm", NULL);
+		kthread_stop(kapmd_tsk);
+	}
+
+	return ret;
+}
+
+static void __exit apm_exit(void)
+{
+	misc_deregister(&apm_device);
+	remove_proc_entry("apm", NULL);
+
+	kthread_stop(kapmd_tsk);
+}
+
+module_init(apm_init);
+module_exit(apm_exit);
+
+MODULE_AUTHOR("Stephen Rothwell");
+MODULE_DESCRIPTION("Advanced Power Management");
+MODULE_LICENSE("GPL");
+
+#ifndef MODULE
+static int __init apm_setup(char *str)
+{
+	while ((str != NULL) && (*str != '\0')) {
+		if (strncmp(str, "off", 3) == 0)
+			apm_disabled = 1;
+		if (strncmp(str, "on", 2) == 0)
+			apm_disabled = 0;
+		str = strchr(str, ',');
+		if (str != NULL)
+			str += strspn(str, ", \t");
+	}
+	return 1;
+}
+
+__setup("apm=", apm_setup);
+#endif
+
+/**
+ * apm_queue_event - queue an APM event for kapmd
+ * @event: APM event
+ *
+ * Queue an APM event for kapmd to process and ultimately take the
+ * appropriate action.  Only a subset of events are handled:
+ *   %APM_LOW_BATTERY
+ *   %APM_POWER_STATUS_CHANGE
+ *   %APM_USER_SUSPEND
+ *   %APM_SYS_SUSPEND
+ *   %APM_CRITICAL_SUSPEND
+ */
+void apm_queue_event(apm_event_t event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kapmd_queue_lock, flags);
+	queue_add_event(&kapmd_queue, event);
+	spin_unlock_irqrestore(&kapmd_queue_lock, flags);
+
+	wake_up_interruptible(&kapmd_wait);
+}
+EXPORT_SYMBOL(apm_queue_event);
diff --git a/include/linux/apm-emulation.h b/include/linux/apm-emulation.h
new file mode 100644
index 00000000000..e6d800358dd
--- /dev/null
+++ b/include/linux/apm-emulation.h
@@ -0,0 +1,62 @@
+/* -*- linux-c -*-
+ *
+ * (C) 2003 zecke@handhelds.org
+ *
+ * GPL version 2
+ *
+ * based on arch/arm/kernel/apm.c
+ * factor out the information needed by architectures to provide
+ * apm status
+ */
+#ifndef __LINUX_APM_EMULATION_H
+#define __LINUX_APM_EMULATION_H
+
+#include <linux/apm_bios.h>
+
+/*
+ * This structure gets filled in by the machine specific 'get_power_status'
+ * implementation.  Any fields which are not set default to a safe value.
+ */
+struct apm_power_info {
+	unsigned char	ac_line_status;
+#define APM_AC_OFFLINE			0
+#define APM_AC_ONLINE			1
+#define APM_AC_BACKUP			2
+#define APM_AC_UNKNOWN			0xff
+
+	unsigned char	battery_status;
+#define APM_BATTERY_STATUS_HIGH		0
+#define APM_BATTERY_STATUS_LOW		1
+#define APM_BATTERY_STATUS_CRITICAL	2
+#define APM_BATTERY_STATUS_CHARGING	3
+#define APM_BATTERY_STATUS_NOT_PRESENT	4
+#define APM_BATTERY_STATUS_UNKNOWN	0xff
+
+	unsigned char	battery_flag;
+#define APM_BATTERY_FLAG_HIGH		(1 << 0)
+#define APM_BATTERY_FLAG_LOW		(1 << 1)
+#define APM_BATTERY_FLAG_CRITICAL	(1 << 2)
+#define APM_BATTERY_FLAG_CHARGING	(1 << 3)
+#define APM_BATTERY_FLAG_NOT_PRESENT	(1 << 7)
+#define APM_BATTERY_FLAG_UNKNOWN	0xff
+
+	int		battery_life;
+	int		time;
+	int		units;
+#define APM_UNITS_MINS			0
+#define APM_UNITS_SECS			1
+#define APM_UNITS_UNKNOWN		-1
+
+};
+
+/*
+ * This allows machines to provide their own "apm get power status" function.
+ */
+extern void (*apm_get_power_status)(struct apm_power_info *);
+
+/*
+ * Queue an event (APM_SYS_SUSPEND or APM_CRITICAL_SUSPEND)
+ */
+void apm_queue_event(apm_event_t event);
+
+#endif /* __LINUX_APM_EMULATION_H */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ed296225dcd..95f6657fff7 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -131,3 +131,29 @@ config SUSPEND_SMP
 	bool
 	depends on HOTPLUG_CPU && X86 && PM
 	default y
+
+config APM_EMULATION
+	tristate "Advanced Power Management Emulation"
+	depends on PM && SYS_SUPPORTS_APM_EMULATION
+	help
+	  APM is a BIOS specification for saving power using several different
+	  techniques. This is mostly useful for battery powered laptops with
+	  APM compliant BIOSes. If you say Y here, the system time will be
+	  reset after a RESUME operation, the /proc/apm device will provide
+	  battery status information, and user-space programs will receive
+	  notification of APM "events" (e.g. battery status change).
+
+	  In order to use APM, you will need supporting software. For location
+	  and more information, read <file:Documentation/pm.txt> and the
+	  Battery Powered Linux mini-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  This driver does not spin down disk drives (see the hdparm(8)
+	  manpage ("man 8 hdparm") for that), and it doesn't turn off
+	  VESA-compliant "green" monitors.
+
+	  Generally, if you don't have a battery in your machine, there isn't
+	  much point in using this driver and you should say N. If you get
+	  random kernel OOPSes or reboots that don't seem to be related to
+	  anything, try disabling/enabling this option (or disabling/enabling
+	  APM in your BIOS).
-- 
cgit v1.2.3-70-g09d2


From 9ac7849e35f705830f7b016ff272b0ff1f7ff759 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sat, 20 Jan 2007 16:00:26 +0900
Subject: devres: device resource management

Implement device resource management, in short, devres.  A device
driver can allocate arbirary size of devres data which is associated
with a release function.  On driver detach, release function is
invoked on the devres data, then, devres data is freed.

devreses are typed by associated release functions.  Some devreses are
better represented by single instance of the type while others need
multiple instances sharing the same release function.  Both usages are
supported.

devreses can be grouped using devres group such that a device driver
can easily release acquired resources halfway through initialization
or selectively release resources (e.g. resources for port 1 out of 4
ports).

This patch adds devres core including documentation and the following
managed interfaces.

* alloc/free	: devm_kzalloc(), devm_kzfree()
* IO region	: devm_request_region(), devm_release_region()
* IRQ		: devm_request_irq(), devm_free_irq()
* DMA		: dmam_alloc_coherent(), dmam_free_coherent(),
		  dmam_declare_coherent_memory(), dmam_pool_create(),
		  dmam_pool_destroy()
* PCI		: pcim_enable_device(), pcim_pin_device(), pci_is_managed()
* iomap		: devm_ioport_map(), devm_ioport_unmap(), devm_ioremap(),
		  devm_ioremap_nocache(), devm_iounmap(), pcim_iomap_table(),
		  pcim_iomap(), pcim_iounmap()

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 Documentation/driver-model/devres.txt | 268 ++++++++++++++
 drivers/base/Kconfig                  |  12 +
 drivers/base/Makefile                 |   1 +
 drivers/base/base.h                   |   1 +
 drivers/base/core.c                   |   2 +
 drivers/base/dd.c                     |   3 +
 drivers/base/devres.c                 | 644 ++++++++++++++++++++++++++++++++++
 drivers/base/dma-mapping.c            | 218 ++++++++++++
 drivers/base/dmapool.c                |  59 ++++
 drivers/pci/pci.c                     | 127 ++++++-
 include/linux/device.h                |  38 ++
 include/linux/dma-mapping.h           |  29 +-
 include/linux/dmapool.h               |   7 +
 include/linux/interrupt.h             |   6 +
 include/linux/io.h                    |  17 +
 include/linux/ioport.h                |  20 ++
 include/linux/pci.h                   |   9 +
 kernel/irq/manage.c                   |  86 +++++
 kernel/resource.c                     |  62 ++++
 lib/Makefile                          |   3 +-
 lib/iomap.c                           | 246 ++++++++++++-
 21 files changed, 1853 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/driver-model/devres.txt
 create mode 100644 drivers/base/devres.c
 create mode 100644 drivers/base/dma-mapping.c

(limited to 'kernel')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
new file mode 100644
index 00000000000..5163b85308f
--- /dev/null
+++ b/Documentation/driver-model/devres.txt
@@ -0,0 +1,268 @@
+Devres - Managed Device Resource
+================================
+
+Tejun Heo	<teheo@suse.de>
+
+First draft	10 January 2007
+
+
+1. Intro			: Huh? Devres?
+2. Devres			: Devres in a nutshell
+3. Devres Group			: Group devres'es and release them together
+4. Details			: Life time rules, calling context, ...
+5. Overhead			: How much do we have to pay for this?
+6. List of managed interfaces	: Currently implemented managed interfaces
+
+
+  1. Intro
+  --------
+
+devres came up while trying to convert libata to use iomap.  Each
+iomapped address should be kept and unmapped on driver detach.  For
+example, a plain SFF ATA controller (that is, good old PCI IDE) in
+native mode makes use of 5 PCI BARs and all of them should be
+maintained.
+
+As with many other device drivers, libata low level drivers have
+sufficient bugs in ->remove and ->probe failure path.  Well, yes,
+that's probably because libata low level driver developers are lazy
+bunch, but aren't all low level driver developers?  After spending a
+day fiddling with braindamaged hardware with no document or
+braindamaged document, if it's finally working, well, it's working.
+
+For one reason or another, low level drivers don't receive as much
+attention or testing as core code, and bugs on driver detach or
+initilaization failure doesn't happen often enough to be noticeable.
+Init failure path is worse because it's much less travelled while
+needs to handle multiple entry points.
+
+So, many low level drivers end up leaking resources on driver detach
+and having half broken failure path implementation in ->probe() which
+would leak resources or even cause oops when failure occurs.  iomap
+adds more to this mix.  So do msi and msix.
+
+
+  2. Devres
+  ---------
+
+devres is basically linked list of arbitrarily sized memory areas
+associated with a struct device.  Each devres entry is associated with
+a release function.  A devres can be released in several ways.  No
+matter what, all devres entries are released on driver detach.  On
+release, the associated release function is invoked and then the
+devres entry is freed.
+
+Managed interface is created for resources commonly used by device
+drivers using devres.  For example, coherent DMA memory is acquired
+using dma_alloc_coherent().  The managed version is called
+dmam_alloc_coherent().  It is identical to dma_alloc_coherent() except
+for the DMA memory allocated using it is managed and will be
+automatically released on driver detach.  Implementation looks like
+the following.
+
+  struct dma_devres {
+	size_t		size;
+	void		*vaddr;
+	dma_addr_t	dma_handle;
+  };
+
+  static void dmam_coherent_release(struct device *dev, void *res)
+  {
+	struct dma_devres *this = res;
+
+	dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle);
+  }
+
+  dmam_alloc_coherent(dev, size, dma_handle, gfp)
+  {
+	struct dma_devres *dr;
+	void *vaddr;
+
+	dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp);
+	...
+
+	/* alloc DMA memory as usual */
+	vaddr = dma_alloc_coherent(...);
+	...
+
+	/* record size, vaddr, dma_handle in dr */
+	dr->vaddr = vaddr;
+	...
+
+	devres_add(dev, dr);
+
+	return vaddr;
+  }
+
+If a driver uses dmam_alloc_coherent(), the area is guaranteed to be
+freed whether initialization fails half-way or the device gets
+detached.  If most resources are acquired using managed interface, a
+driver can have much simpler init and exit code.  Init path basically
+looks like the following.
+
+  my_init_one()
+  {
+	struct mydev *d;
+
+	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->ring = dmam_alloc_coherent(...);
+	if (!d->ring)
+		return -ENOMEM;
+
+	if (check something)
+		return -EINVAL;
+	...
+
+	return register_to_upper_layer(d);
+  }
+
+And exit path,
+
+  my_remove_one()
+  {
+	unregister_from_upper_layer(d);
+	shutdown_my_hardware();
+  }
+
+As shown above, low level drivers can be simplified a lot by using
+devres.  Complexity is shifted from less maintained low level drivers
+to better maintained higher layer.  Also, as init failure path is
+shared with exit path, both can get more testing.
+
+
+  3. Devres group
+  ---------------
+
+Devres entries can be grouped using devres group.  When a group is
+released, all contained normal devres entries and properly nested
+groups are released.  One usage is to rollback series of acquired
+resources on failure.  For example,
+
+  if (!devres_open_group(dev, NULL, GFP_KERNEL))
+	return -ENOMEM;
+
+  acquire A;
+  if (failed)
+	goto err;
+
+  acquire B;
+  if (failed)
+	goto err;
+  ...
+
+  devres_remove_group(dev, NULL);
+  return 0;
+
+ err:
+  devres_release_group(dev, NULL);
+  return err_code;
+
+As resource acquision failure usually means probe failure, constructs
+like above are usually useful in midlayer driver (e.g. libata core
+layer) where interface function shouldn't have side effect on failure.
+For LLDs, just returning error code suffices in most cases.
+
+Each group is identified by void *id.  It can either be explicitly
+specified by @id argument to devres_open_group() or automatically
+created by passing NULL as @id as in the above example.  In both
+cases, devres_open_group() returns the group's id.  The returned id
+can be passed to other devres functions to select the target group.
+If NULL is given to those functions, the latest open group is
+selected.
+
+For example, you can do something like the following.
+
+  int my_midlayer_create_something()
+  {
+	if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL))
+		return -ENOMEM;
+
+	...
+
+	devres_close_group(dev, my_midlayer_something);
+	return 0;
+  }
+
+  void my_midlayer_destroy_something()
+  {
+	devres_release_group(dev, my_midlayer_create_soemthing);
+  }
+
+
+  4. Details
+  ----------
+
+Lifetime of a devres entry begins on devres allocation and finishes
+when it is released or destroyed (removed and freed) - no reference
+counting.
+
+devres core guarantees atomicity to all basic devres operations and
+has support for single-instance devres types (atomic
+lookup-and-add-if-not-found).  Other than that, synchronizing
+concurrent accesses to allocated devres data is caller's
+responsibility.  This is usually non-issue because bus ops and
+resource allocations already do the job.
+
+For an example of single-instance devres type, read pcim_iomap_table()
+in lib/iomap.c.
+
+All devres interface functions can be called without context if the
+right gfp mask is given.
+
+
+  5. Overhead
+  -----------
+
+Each devres bookkeeping info is allocated together with requested data
+area.  With debug option turned off, bookkeeping info occupies 16
+bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded
+up to ull alignment).  If singly linked list is used, it can be
+reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit).
+
+Each devres group occupies 8 pointers.  It can be reduced to 6 if
+singly linked list is used.
+
+Memory space overhead on ahci controller with two ports is between 300
+and 400 bytes on 32bit machine after naive conversion (we can
+certainly invest a bit more effort into libata core layer).
+
+
+  6. List of managed interfaces
+  -----------------------------
+
+IO region
+  devm_request_region()
+  devm_request_mem_region()
+  devm_release_region()
+  devm_release_mem_region()
+
+IRQ
+  devm_request_irq()
+  devm_free_irq()
+
+DMA
+  dmam_alloc_coherent()
+  dmam_free_coherent()
+  dmam_alloc_noncoherent()
+  dmam_free_noncoherent()
+  dmam_declare_coherent_memory()
+  dmam_pool_create()
+  dmam_pool_destroy()
+
+PCI
+  pcim_enable_device()	: after success, all PCI ops become managed
+  pcim_pin_device()	: keep PCI device enabled after release
+
+IOMAP
+  devm_ioport_map()
+  devm_ioport_unmap()
+  devm_ioremap()
+  devm_ioremap_nocache()
+  devm_iounmap()
+  pcim_iomap()
+  pcim_iounmap()
+  pcim_iomap_table()	: array of mapped addresses indexed by BAR
+  pcim_iomap_regions()	: do request_region() and iomap() on multiple BARs
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 1429f3a2629..5d6312e3349 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -37,6 +37,18 @@ config DEBUG_DRIVER
 
 	  If you are unsure about this, say N here.
 
+config DEBUG_DEVRES
+	bool "Managed device resources verbose debug messages"
+	depends on DEBUG_KERNEL
+	help
+	  This option enables kernel parameter devres.log. If set to
+	  non-zero, devres debug messages are printed. Select this if
+	  you are having a problem with devres or want to debug
+	  resource management for a managed device. devres.log can be
+	  switched on and off from sysfs node.
+
+	  If you are unsure about this, Say N here.
+
 config SYS_HYPERVISOR
 	bool
 	default n
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 7bbb9eeda23..e9eb7382ac3 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -3,6 +3,7 @@
 obj-y			:= core.o sys.o bus.o dd.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o dmapool.o \
+			   dma-mapping.o devres.o \
 			   attribute_container.o transport_class.o
 obj-y			+= power/
 obj-$(CONFIG_ISA)	+= isa.o
diff --git a/drivers/base/base.h b/drivers/base/base.h
index d26644a5953..de7e1442ce6 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -44,3 +44,4 @@ struct class_device_attribute *to_class_dev_attr(struct attribute *_attr)
 
 extern char *make_class_name(const char *name, struct kobject *kobj);
 
+extern void devres_release_all(struct device *dev);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index e13614241c9..a8ac34ba610 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -428,6 +428,8 @@ void device_initialize(struct device *dev)
 	INIT_LIST_HEAD(&dev->dma_pools);
 	INIT_LIST_HEAD(&dev->node);
 	init_MUTEX(&dev->sem);
+	spin_lock_init(&dev->devres_lock);
+	INIT_LIST_HEAD(&dev->devres_head);
 	device_init_wakeup(dev, 0);
 	set_dev_node(dev, -1);
 }
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index b5bf243d9cd..6a48824e43f 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -112,6 +112,7 @@ static int really_probe(void *void_data)
 	atomic_inc(&probe_count);
 	pr_debug("%s: Probing driver %s with device %s\n",
 		 drv->bus->name, drv->name, dev->bus_id);
+	WARN_ON(!list_empty(&dev->devres_head));
 
 	dev->driver = drv;
 	if (driver_sysfs_add(dev)) {
@@ -137,6 +138,7 @@ static int really_probe(void *void_data)
 	goto done;
 
 probe_failed:
+	devres_release_all(dev);
 	driver_sysfs_remove(dev);
 	dev->driver = NULL;
 
@@ -327,6 +329,7 @@ static void __device_release_driver(struct device * dev)
 			dev->bus->remove(dev);
 		else if (drv->remove)
 			drv->remove(dev);
+		devres_release_all(dev);
 		dev->driver = NULL;
 		put_driver(drv);
 	}
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
new file mode 100644
index 00000000000..e177c9533b6
--- /dev/null
+++ b/drivers/base/devres.c
@@ -0,0 +1,644 @@
+/*
+ * drivers/base/devres.c - device resource management
+ *
+ * Copyright (c) 2006  SUSE Linux Products GmbH
+ * Copyright (c) 2006  Tejun Heo <teheo@suse.de>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+
+struct devres_node {
+	struct list_head		entry;
+	dr_release_t			release;
+#ifdef CONFIG_DEBUG_DEVRES
+	const char			*name;
+	size_t				size;
+#endif
+};
+
+struct devres {
+	struct devres_node		node;
+	/* -- 3 pointers */
+	unsigned long long		data[];	/* guarantee ull alignment */
+};
+
+struct devres_group {
+	struct devres_node		node[2];
+	void				*id;
+	int				color;
+	/* -- 8 pointers */
+};
+
+#ifdef CONFIG_DEBUG_DEVRES
+static int log_devres = 0;
+module_param_named(log, log_devres, int, S_IRUGO | S_IWUSR);
+
+static void set_node_dbginfo(struct devres_node *node, const char *name,
+			     size_t size)
+{
+	node->name = name;
+	node->size = size;
+}
+
+static void devres_log(struct device *dev, struct devres_node *node,
+		       const char *op)
+{
+	if (unlikely(log_devres))
+		dev_printk(KERN_ERR, dev, "DEVRES %3s %p %s (%lu bytes)\n",
+			   op, node, node->name, (unsigned long)node->size);
+}
+#else /* CONFIG_DEBUG_DEVRES */
+#define set_node_dbginfo(node, n, s)	do {} while (0)
+#define devres_log(dev, node, op)	do {} while (0)
+#endif /* CONFIG_DEBUG_DEVRES */
+
+/*
+ * Release functions for devres group.  These callbacks are used only
+ * for identification.
+ */
+static void group_open_release(struct device *dev, void *res)
+{
+	/* noop */
+}
+
+static void group_close_release(struct device *dev, void *res)
+{
+	/* noop */
+}
+
+static struct devres_group * node_to_group(struct devres_node *node)
+{
+	if (node->release == &group_open_release)
+		return container_of(node, struct devres_group, node[0]);
+	if (node->release == &group_close_release)
+		return container_of(node, struct devres_group, node[1]);
+	return NULL;
+}
+
+static __always_inline struct devres * alloc_dr(dr_release_t release,
+						size_t size, gfp_t gfp)
+{
+	size_t tot_size = sizeof(struct devres) + size;
+	struct devres *dr;
+
+	dr = kmalloc_track_caller(tot_size, gfp);
+	if (unlikely(!dr))
+		return NULL;
+
+	memset(dr, 0, tot_size);
+	INIT_LIST_HEAD(&dr->node.entry);
+	dr->node.release = release;
+	return dr;
+}
+
+static void add_dr(struct device *dev, struct devres_node *node)
+{
+	devres_log(dev, node, "ADD");
+	BUG_ON(!list_empty(&node->entry));
+	list_add_tail(&node->entry, &dev->devres_head);
+}
+
+/**
+ * devres_alloc - Allocate device resource data
+ * @release: Release function devres will be associated with
+ * @size: Allocation size
+ * @gfp: Allocation flags
+ *
+ * allocate devres of @size bytes.  The allocated area is zeroed, then
+ * associated with @release.  The returned pointer can be passed to
+ * other devres_*() functions.
+ *
+ * RETURNS:
+ * Pointer to allocated devres on success, NULL on failure.
+ */
+#ifdef CONFIG_DEBUG_DEVRES
+void * __devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
+		      const char *name)
+{
+	struct devres *dr;
+
+	dr = alloc_dr(release, size, gfp);
+	if (unlikely(!dr))
+		return NULL;
+	set_node_dbginfo(&dr->node, name, size);
+	return dr->data;
+}
+EXPORT_SYMBOL_GPL(__devres_alloc);
+#else
+void * devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
+{
+	struct devres *dr;
+
+	dr = alloc_dr(release, size, gfp);
+	if (unlikely(!dr))
+		return NULL;
+	return dr->data;
+}
+EXPORT_SYMBOL_GPL(devres_alloc);
+#endif
+
+/**
+ * devres_free - Free device resource data
+ * @res: Pointer to devres data to free
+ *
+ * Free devres created with devres_alloc().
+ */
+void devres_free(void *res)
+{
+	if (res) {
+		struct devres *dr = container_of(res, struct devres, data);
+
+		BUG_ON(!list_empty(&dr->node.entry));
+		kfree(dr);
+	}
+}
+EXPORT_SYMBOL_GPL(devres_free);
+
+/**
+ * devres_add - Register device resource
+ * @dev: Device to add resource to
+ * @res: Resource to register
+ *
+ * Register devres @res to @dev.  @res should have been allocated
+ * using devres_alloc().  On driver detach, the associated release
+ * function will be invoked and devres will be freed automatically.
+ */
+void devres_add(struct device *dev, void *res)
+{
+	struct devres *dr = container_of(res, struct devres, data);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+	add_dr(dev, &dr->node);
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+}
+EXPORT_SYMBOL_GPL(devres_add);
+
+static struct devres *find_dr(struct device *dev, dr_release_t release,
+			      dr_match_t match, void *match_data)
+{
+	struct devres_node *node;
+
+	list_for_each_entry_reverse(node, &dev->devres_head, entry) {
+		struct devres *dr = container_of(node, struct devres, node);
+
+		if (node->release != release)
+			continue;
+		if (match && !match(dev, dr->data, match_data))
+			continue;
+		return dr;
+	}
+
+	return NULL;
+}
+
+/**
+ * devres_find - Find device resource
+ * @dev: Device to lookup resource from
+ * @release: Look for resources associated with this release function
+ * @match: Match function (optional)
+ * @match_data: Data for the match function
+ *
+ * Find the latest devres of @dev which is associated with @release
+ * and for which @match returns 1.  If @match is NULL, it's considered
+ * to match all.
+ *
+ * RETURNS:
+ * Pointer to found devres, NULL if not found.
+ */
+void * devres_find(struct device *dev, dr_release_t release,
+		   dr_match_t match, void *match_data)
+{
+	struct devres *dr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+	dr = find_dr(dev, release, match, match_data);
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+
+	if (dr)
+		return dr->data;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(devres_find);
+
+/**
+ * devres_get - Find devres, if non-existent, add one atomically
+ * @dev: Device to lookup or add devres for
+ * @new_res: Pointer to new initialized devres to add if not found
+ * @match: Match function (optional)
+ * @match_data: Data for the match function
+ *
+ * Find the latest devres of @dev which has the same release function
+ * as @new_res and for which @match return 1.  If found, @new_res is
+ * freed; otherwise, @new_res is added atomically.
+ *
+ * RETURNS:
+ * Pointer to found or added devres.
+ */
+void * devres_get(struct device *dev, void *new_res,
+		  dr_match_t match, void *match_data)
+{
+	struct devres *new_dr = container_of(new_res, struct devres, data);
+	struct devres *dr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+	dr = find_dr(dev, new_dr->node.release, match, match_data);
+	if (!dr) {
+		add_dr(dev, &new_dr->node);
+		dr = new_dr;
+		new_dr = NULL;
+	}
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+	devres_free(new_dr);
+
+	return dr->data;
+}
+EXPORT_SYMBOL_GPL(devres_get);
+
+/**
+ * devres_remove - Find a device resource and remove it
+ * @dev: Device to find resource from
+ * @release: Look for resources associated with this release function
+ * @match: Match function (optional)
+ * @match_data: Data for the match function
+ *
+ * Find the latest devres of @dev associated with @release and for
+ * which @match returns 1.  If @match is NULL, it's considered to
+ * match all.  If found, the resource is removed atomically and
+ * returned.
+ *
+ * RETURNS:
+ * Pointer to removed devres on success, NULL if not found.
+ */
+void * devres_remove(struct device *dev, dr_release_t release,
+		     dr_match_t match, void *match_data)
+{
+	struct devres *dr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+	dr = find_dr(dev, release, match, match_data);
+	if (dr) {
+		list_del_init(&dr->node.entry);
+		devres_log(dev, &dr->node, "REM");
+	}
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+
+	if (dr)
+		return dr->data;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(devres_remove);
+
+/**
+ * devres_destroy - Find a device resource and destroy it
+ * @dev: Device to find resource from
+ * @release: Look for resources associated with this release function
+ * @match: Match function (optional)
+ * @match_data: Data for the match function
+ *
+ * Find the latest devres of @dev associated with @release and for
+ * which @match returns 1.  If @match is NULL, it's considered to
+ * match all.  If found, the resource is removed atomically and freed.
+ *
+ * RETURNS:
+ * 0 if devres is found and freed, -ENOENT if not found.
+ */
+int devres_destroy(struct device *dev, dr_release_t release,
+		   dr_match_t match, void *match_data)
+{
+	void *res;
+
+	res = devres_remove(dev, release, match, match_data);
+	if (unlikely(!res))
+		return -ENOENT;
+
+	devres_free(res);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devres_destroy);
+
+static int remove_nodes(struct device *dev,
+			struct list_head *first, struct list_head *end,
+			struct list_head *todo)
+{
+	int cnt = 0, nr_groups = 0;
+	struct list_head *cur;
+
+	/* First pass - move normal devres entries to @todo and clear
+	 * devres_group colors.
+	 */
+	cur = first;
+	while (cur != end) {
+		struct devres_node *node;
+		struct devres_group *grp;
+
+		node = list_entry(cur, struct devres_node, entry);
+		cur = cur->next;
+
+		grp = node_to_group(node);
+		if (grp) {
+			/* clear color of group markers in the first pass */
+			grp->color = 0;
+			nr_groups++;
+		} else {
+			/* regular devres entry */
+			if (&node->entry == first)
+				first = first->next;
+			list_move_tail(&node->entry, todo);
+			cnt++;
+		}
+	}
+
+	if (!nr_groups)
+		return cnt;
+
+	/* Second pass - Scan groups and color them.  A group gets
+	 * color value of two iff the group is wholly contained in
+	 * [cur, end).  That is, for a closed group, both opening and
+	 * closing markers should be in the range, while just the
+	 * opening marker is enough for an open group.
+	 */
+	cur = first;
+	while (cur != end) {
+		struct devres_node *node;
+		struct devres_group *grp;
+
+		node = list_entry(cur, struct devres_node, entry);
+		cur = cur->next;
+
+		grp = node_to_group(node);
+		BUG_ON(!grp || list_empty(&grp->node[0].entry));
+
+		grp->color++;
+		if (list_empty(&grp->node[1].entry))
+			grp->color++;
+
+		BUG_ON(grp->color <= 0 || grp->color > 2);
+		if (grp->color == 2) {
+			/* No need to update cur or end.  The removed
+			 * nodes are always before both.
+			 */
+			list_move_tail(&grp->node[0].entry, todo);
+			list_del_init(&grp->node[1].entry);
+		}
+	}
+
+	return cnt;
+}
+
+static int release_nodes(struct device *dev, struct list_head *first,
+			 struct list_head *end, unsigned long flags)
+{
+	LIST_HEAD(todo);
+	int cnt;
+	struct devres *dr, *tmp;
+
+	cnt = remove_nodes(dev, first, end, &todo);
+
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+
+	/* Release.  Note that both devres and devres_group are
+	 * handled as devres in the following loop.  This is safe.
+	 */
+	list_for_each_entry_safe_reverse(dr, tmp, &todo, node.entry) {
+		devres_log(dev, &dr->node, "REL");
+		dr->node.release(dev, dr->data);
+		kfree(dr);
+	}
+
+	return cnt;
+}
+
+/**
+ * devres_release_all - Release all resources
+ * @dev: Device to release resources for
+ *
+ * Release all resources associated with @dev.  This function is
+ * called on driver detach.
+ */
+int devres_release_all(struct device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+	return release_nodes(dev, dev->devres_head.next, &dev->devres_head,
+			     flags);
+}
+
+/**
+ * devres_open_group - Open a new devres group
+ * @dev: Device to open devres group for
+ * @id: Separator ID
+ * @gfp: Allocation flags
+ *
+ * Open a new devres group for @dev with @id.  For @id, using a
+ * pointer to an object which won't be used for another group is
+ * recommended.  If @id is NULL, address-wise unique ID is created.
+ *
+ * RETURNS:
+ * ID of the new group, NULL on failure.
+ */
+void * devres_open_group(struct device *dev, void *id, gfp_t gfp)
+{
+	struct devres_group *grp;
+	unsigned long flags;
+
+	grp = kmalloc(sizeof(*grp), gfp);
+	if (unlikely(!grp))
+		return NULL;
+
+	grp->node[0].release = &group_open_release;
+	grp->node[1].release = &group_close_release;
+	INIT_LIST_HEAD(&grp->node[0].entry);
+	INIT_LIST_HEAD(&grp->node[1].entry);
+	set_node_dbginfo(&grp->node[0], "grp<", 0);
+	set_node_dbginfo(&grp->node[1], "grp>", 0);
+	grp->id = grp;
+	if (id)
+		grp->id = id;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+	add_dr(dev, &grp->node[0]);
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+	return grp->id;
+}
+EXPORT_SYMBOL_GPL(devres_open_group);
+
+/* Find devres group with ID @id.  If @id is NULL, look for the latest. */
+static struct devres_group * find_group(struct device *dev, void *id)
+{
+	struct devres_node *node;
+
+	list_for_each_entry_reverse(node, &dev->devres_head, entry) {
+		struct devres_group *grp;
+
+		if (node->release != &group_open_release)
+			continue;
+
+		grp = container_of(node, struct devres_group, node[0]);
+
+		if (id) {
+			if (grp->id == id)
+				return grp;
+		} else if (list_empty(&grp->node[1].entry))
+			return grp;
+	}
+
+	return NULL;
+}
+
+/**
+ * devres_close_group - Close a devres group
+ * @dev: Device to close devres group for
+ * @id: ID of target group, can be NULL
+ *
+ * Close the group identified by @id.  If @id is NULL, the latest open
+ * group is selected.
+ */
+void devres_close_group(struct device *dev, void *id)
+{
+	struct devres_group *grp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+
+	grp = find_group(dev, id);
+	if (grp)
+		add_dr(dev, &grp->node[1]);
+	else
+		WARN_ON(1);
+
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+}
+EXPORT_SYMBOL_GPL(devres_close_group);
+
+/**
+ * devres_remove_group - Remove a devres group
+ * @dev: Device to remove group for
+ * @id: ID of target group, can be NULL
+ *
+ * Remove the group identified by @id.  If @id is NULL, the latest
+ * open group is selected.  Note that removing a group doesn't affect
+ * any other resources.
+ */
+void devres_remove_group(struct device *dev, void *id)
+{
+	struct devres_group *grp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+
+	grp = find_group(dev, id);
+	if (grp) {
+		list_del_init(&grp->node[0].entry);
+		list_del_init(&grp->node[1].entry);
+		devres_log(dev, &grp->node[0], "REM");
+	} else
+		WARN_ON(1);
+
+	spin_unlock_irqrestore(&dev->devres_lock, flags);
+
+	kfree(grp);
+}
+EXPORT_SYMBOL_GPL(devres_remove_group);
+
+/**
+ * devres_release_group - Release resources in a devres group
+ * @dev: Device to release group for
+ * @id: ID of target group, can be NULL
+ *
+ * Release all resources in the group identified by @id.  If @id is
+ * NULL, the latest open group is selected.  The selected group and
+ * groups properly nested inside the selected group are removed.
+ *
+ * RETURNS:
+ * The number of released non-group resources.
+ */
+int devres_release_group(struct device *dev, void *id)
+{
+	struct devres_group *grp;
+	unsigned long flags;
+	int cnt = 0;
+
+	spin_lock_irqsave(&dev->devres_lock, flags);
+
+	grp = find_group(dev, id);
+	if (grp) {
+		struct list_head *first = &grp->node[0].entry;
+		struct list_head *end = &dev->devres_head;
+
+		if (!list_empty(&grp->node[1].entry))
+			end = grp->node[1].entry.next;
+
+		cnt = release_nodes(dev, first, end, flags);
+	} else {
+		WARN_ON(1);
+		spin_unlock_irqrestore(&dev->devres_lock, flags);
+	}
+
+	return cnt;
+}
+EXPORT_SYMBOL_GPL(devres_release_group);
+
+/*
+ * Managed kzalloc/kfree
+ */
+static void devm_kzalloc_release(struct device *dev, void *res)
+{
+	/* noop */
+}
+
+static int devm_kzalloc_match(struct device *dev, void *res, void *data)
+{
+	return res == data;
+}
+
+/**
+ * devm_kzalloc - Managed kzalloc
+ * @dev: Device to allocate memory for
+ * @size: Allocation size
+ * @gfp: Allocation gfp flags
+ *
+ * Managed kzalloc.  Memory allocated with this function is
+ * automatically freed on driver detach.  Like all other devres
+ * resources, guaranteed alignment is unsigned long long.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+void * devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
+{
+	struct devres *dr;
+
+	/* use raw alloc_dr for kmalloc caller tracing */
+	dr = alloc_dr(devm_kzalloc_release, size, gfp);
+	if (unlikely(!dr))
+		return NULL;
+
+	set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
+	devres_add(dev, dr->data);
+	return dr->data;
+}
+EXPORT_SYMBOL_GPL(devm_kzalloc);
+
+/**
+ * devm_kfree - Managed kfree
+ * @dev: Device this memory belongs to
+ * @p: Memory to free
+ *
+ * Free memory allocated with dev_kzalloc().
+ */
+void devm_kfree(struct device *dev, void *p)
+{
+	int rc;
+
+	rc = devres_destroy(dev, devm_kzalloc_release, devm_kzalloc_match, p);
+	WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_kfree);
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
new file mode 100644
index 00000000000..ca9186f70a6
--- /dev/null
+++ b/drivers/base/dma-mapping.c
@@ -0,0 +1,218 @@
+/*
+ * drivers/base/dma-mapping.c - arch-independent dma-mapping routines
+ *
+ * Copyright (c) 2006  SUSE Linux Products GmbH
+ * Copyright (c) 2006  Tejun Heo <teheo@suse.de>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/dma-mapping.h>
+
+/*
+ * Managed DMA API
+ */
+struct dma_devres {
+	size_t		size;
+	void		*vaddr;
+	dma_addr_t	dma_handle;
+};
+
+static void dmam_coherent_release(struct device *dev, void *res)
+{
+	struct dma_devres *this = res;
+
+	dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle);
+}
+
+static void dmam_noncoherent_release(struct device *dev, void *res)
+{
+	struct dma_devres *this = res;
+
+	dma_free_noncoherent(dev, this->size, this->vaddr, this->dma_handle);
+}
+
+static int dmam_match(struct device *dev, void *res, void *match_data)
+{
+	struct dma_devres *this = res, *match = match_data;
+
+	if (this->vaddr == match->vaddr) {
+		WARN_ON(this->size != match->size ||
+			this->dma_handle != match->dma_handle);
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * dmam_alloc_coherent - Managed dma_alloc_coherent()
+ * @dev: Device to allocate coherent memory for
+ * @size: Size of allocation
+ * @dma_handle: Out argument for allocated DMA handle
+ * @gfp: Allocation flags
+ *
+ * Managed dma_alloc_coherent().  Memory allocated using this function
+ * will be automatically released on driver detach.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+void * dmam_alloc_coherent(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t gfp)
+{
+	struct dma_devres *dr;
+	void *vaddr;
+
+	dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp);
+	if (!dr)
+		return NULL;
+
+	vaddr = dma_alloc_coherent(dev, size, dma_handle, gfp);
+	if (!vaddr) {
+		devres_free(dr);
+		return NULL;
+	}
+
+	dr->vaddr = vaddr;
+	dr->dma_handle = *dma_handle;
+	dr->size = size;
+
+	devres_add(dev, dr);
+
+	return vaddr;
+}
+EXPORT_SYMBOL(dmam_alloc_coherent);
+
+/**
+ * dmam_free_coherent - Managed dma_free_coherent()
+ * @dev: Device to free coherent memory for
+ * @size: Size of allocation
+ * @vaddr: Virtual address of the memory to free
+ * @dma_handle: DMA handle of the memory to free
+ *
+ * Managed dma_free_coherent().
+ */
+void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
+			dma_addr_t dma_handle)
+{
+	struct dma_devres match_data = { size, vaddr, dma_handle };
+
+	dma_free_coherent(dev, size, vaddr, dma_handle);
+	WARN_ON(devres_destroy(dev, dmam_coherent_release, dmam_match,
+			       &match_data));
+}
+EXPORT_SYMBOL(dmam_free_coherent);
+
+/**
+ * dmam_alloc_non_coherent - Managed dma_alloc_non_coherent()
+ * @dev: Device to allocate non_coherent memory for
+ * @size: Size of allocation
+ * @dma_handle: Out argument for allocated DMA handle
+ * @gfp: Allocation flags
+ *
+ * Managed dma_alloc_non_coherent().  Memory allocated using this
+ * function will be automatically released on driver detach.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+void *dmam_alloc_noncoherent(struct device *dev, size_t size,
+			     dma_addr_t *dma_handle, gfp_t gfp)
+{
+	struct dma_devres *dr;
+	void *vaddr;
+
+	dr = devres_alloc(dmam_noncoherent_release, sizeof(*dr), gfp);
+	if (!dr)
+		return NULL;
+
+	vaddr = dma_alloc_noncoherent(dev, size, dma_handle, gfp);
+	if (!vaddr) {
+		devres_free(dr);
+		return NULL;
+	}
+
+	dr->vaddr = vaddr;
+	dr->dma_handle = *dma_handle;
+	dr->size = size;
+
+	devres_add(dev, dr);
+
+	return vaddr;
+}
+EXPORT_SYMBOL(dmam_alloc_noncoherent);
+
+/**
+ * dmam_free_coherent - Managed dma_free_noncoherent()
+ * @dev: Device to free noncoherent memory for
+ * @size: Size of allocation
+ * @vaddr: Virtual address of the memory to free
+ * @dma_handle: DMA handle of the memory to free
+ *
+ * Managed dma_free_noncoherent().
+ */
+void dmam_free_noncoherent(struct device *dev, size_t size, void *vaddr,
+			   dma_addr_t dma_handle)
+{
+	struct dma_devres match_data = { size, vaddr, dma_handle };
+
+	dma_free_noncoherent(dev, size, vaddr, dma_handle);
+	WARN_ON(!devres_destroy(dev, dmam_noncoherent_release, dmam_match,
+				&match_data));
+}
+EXPORT_SYMBOL(dmam_free_noncoherent);
+
+#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
+
+static void dmam_coherent_decl_release(struct device *dev, void *res)
+{
+	dma_release_declared_memory(dev);
+}
+
+/**
+ * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory()
+ * @dev: Device to declare coherent memory for
+ * @bus_addr: Bus address of coherent memory to be declared
+ * @device_addr: Device address of coherent memory to be declared
+ * @size: Size of coherent memory to be declared
+ * @flags: Flags
+ *
+ * Managed dma_declare_coherent_memory().
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+				 dma_addr_t device_addr, size_t size, int flags)
+{
+	void *res;
+	int rc;
+
+	res = devres_alloc(dmam_coherent_decl_release, 0, GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	rc = dma_declare_coherent_memory(dev, bus_addr, device_addr, size,
+					 flags);
+	if (rc == 0)
+		devres_add(dev, res);
+	else
+		devres_free(res);
+
+	return rc;
+}
+EXPORT_SYMBOL(dmam_declare_coherent_memory);
+
+/**
+ * dmam_release_declared_memory - Managed dma_release_declared_memory().
+ * @dev: Device to release declared coherent memory for
+ *
+ * Managed dmam_release_declared_memory().
+ */
+void dmam_release_declared_memory(struct device *dev)
+{
+	WARN_ON(devres_destroy(dev, dmam_coherent_decl_release, NULL, NULL));
+}
+EXPORT_SYMBOL(dmam_release_declared_memory);
+
+#endif
diff --git a/drivers/base/dmapool.c b/drivers/base/dmapool.c
index f95d5027727..cd467c9f33b 100644
--- a/drivers/base/dmapool.c
+++ b/drivers/base/dmapool.c
@@ -415,8 +415,67 @@ dma_pool_free (struct dma_pool *pool, void *vaddr, dma_addr_t dma)
 	spin_unlock_irqrestore (&pool->lock, flags);
 }
 
+/*
+ * Managed DMA pool
+ */
+static void dmam_pool_release(struct device *dev, void *res)
+{
+	struct dma_pool *pool = *(struct dma_pool **)res;
+
+	dma_pool_destroy(pool);
+}
+
+static int dmam_pool_match(struct device *dev, void *res, void *match_data)
+{
+	return *(struct dma_pool **)res == match_data;
+}
+
+/**
+ * dmam_pool_create - Managed dma_pool_create()
+ * @name: name of pool, for diagnostics
+ * @dev: device that will be doing the DMA
+ * @size: size of the blocks in this pool.
+ * @align: alignment requirement for blocks; must be a power of two
+ * @allocation: returned blocks won't cross this boundary (or zero)
+ *
+ * Managed dma_pool_create().  DMA pool created with this function is
+ * automatically destroyed on driver detach.
+ */
+struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
+				  size_t size, size_t align, size_t allocation)
+{
+	struct dma_pool **ptr, *pool;
+
+	ptr = devres_alloc(dmam_pool_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	pool = *ptr = dma_pool_create(name, dev, size, align, allocation);
+	if (pool)
+		devres_add(dev, ptr);
+	else
+		devres_free(ptr);
+
+	return pool;
+}
+
+/**
+ * dmam_pool_destroy - Managed dma_pool_destroy()
+ * @pool: dma pool that will be destroyed
+ *
+ * Managed dma_pool_destroy().
+ */
+void dmam_pool_destroy(struct dma_pool *pool)
+{
+	struct device *dev = pool->dev;
+
+	dma_pool_destroy(pool);
+	WARN_ON(devres_destroy(dev, dmam_pool_release, dmam_pool_match, pool));
+}
 
 EXPORT_SYMBOL (dma_pool_create);
 EXPORT_SYMBOL (dma_pool_destroy);
 EXPORT_SYMBOL (dma_pool_alloc);
 EXPORT_SYMBOL (dma_pool_free);
+EXPORT_SYMBOL (dmam_pool_create);
+EXPORT_SYMBOL (dmam_pool_destroy);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 84c757ba066..8b44cff2c17 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -744,6 +744,104 @@ int pci_enable_device(struct pci_dev *dev)
 	return pci_enable_device_bars(dev, (1 << PCI_NUM_RESOURCES) - 1);
 }
 
+/*
+ * Managed PCI resources.  This manages device on/off, intx/msi/msix
+ * on/off and BAR regions.  pci_dev itself records msi/msix status, so
+ * there's no need to track it separately.  pci_devres is initialized
+ * when a device is enabled using managed PCI device enable interface.
+ */
+struct pci_devres {
+	unsigned int disable:1;
+	unsigned int orig_intx:1;
+	unsigned int restore_intx:1;
+	u32 region_mask;
+};
+
+static void pcim_release(struct device *gendev, void *res)
+{
+	struct pci_dev *dev = container_of(gendev, struct pci_dev, dev);
+	struct pci_devres *this = res;
+	int i;
+
+	if (dev->msi_enabled)
+		pci_disable_msi(dev);
+	if (dev->msix_enabled)
+		pci_disable_msix(dev);
+
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++)
+		if (this->region_mask & (1 << i))
+			pci_release_region(dev, i);
+
+	if (this->restore_intx)
+		pci_intx(dev, this->orig_intx);
+
+	if (this->disable)
+		pci_disable_device(dev);
+}
+
+static struct pci_devres * get_pci_dr(struct pci_dev *pdev)
+{
+	struct pci_devres *dr, *new_dr;
+
+	dr = devres_find(&pdev->dev, pcim_release, NULL, NULL);
+	if (dr)
+		return dr;
+
+	new_dr = devres_alloc(pcim_release, sizeof(*new_dr), GFP_KERNEL);
+	if (!new_dr)
+		return NULL;
+	return devres_get(&pdev->dev, new_dr, NULL, NULL);
+}
+
+static struct pci_devres * find_pci_dr(struct pci_dev *pdev)
+{
+	if (pci_is_managed(pdev))
+		return devres_find(&pdev->dev, pcim_release, NULL, NULL);
+	return NULL;
+}
+
+/**
+ * pcim_enable_device - Managed pci_enable_device()
+ * @pdev: PCI device to be initialized
+ *
+ * Managed pci_enable_device().
+ */
+int pcim_enable_device(struct pci_dev *pdev)
+{
+	struct pci_devres *dr;
+	int rc;
+
+	dr = get_pci_dr(pdev);
+	if (unlikely(!dr))
+		return -ENOMEM;
+	WARN_ON(!!dr->disable);
+
+	rc = pci_enable_device(pdev);
+	if (!rc) {
+		pdev->is_managed = 1;
+		dr->disable = 1;
+	}
+	return rc;
+}
+
+/**
+ * pcim_pin_device - Pin managed PCI device
+ * @pdev: PCI device to pin
+ *
+ * Pin managed PCI device @pdev.  Pinned device won't be disabled on
+ * driver detach.  @pdev must have been enabled with
+ * pcim_enable_device().
+ */
+void pcim_pin_device(struct pci_dev *pdev)
+{
+	struct pci_devres *dr;
+
+	dr = find_pci_dr(pdev);
+	WARN_ON(!dr || !dr->disable);
+	if (dr)
+		dr->disable = 0;
+}
+
 /**
  * pcibios_disable_device - disable arch specific PCI resources for device dev
  * @dev: the PCI device to disable
@@ -767,8 +865,13 @@ void __attribute__ ((weak)) pcibios_disable_device (struct pci_dev *dev) {}
 void
 pci_disable_device(struct pci_dev *dev)
 {
+	struct pci_devres *dr;
 	u16 pci_command;
 
+	dr = find_pci_dr(dev);
+	if (dr)
+		dr->disable = 0;
+
 	if (atomic_sub_return(1, &dev->enable_cnt) != 0)
 		return;
 
@@ -867,6 +970,8 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
  */
 void pci_release_region(struct pci_dev *pdev, int bar)
 {
+	struct pci_devres *dr;
+
 	if (pci_resource_len(pdev, bar) == 0)
 		return;
 	if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
@@ -875,6 +980,10 @@ void pci_release_region(struct pci_dev *pdev, int bar)
 	else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
 		release_mem_region(pci_resource_start(pdev, bar),
 				pci_resource_len(pdev, bar));
+
+	dr = find_pci_dr(pdev);
+	if (dr)
+		dr->region_mask &= ~(1 << bar);
 }
 
 /**
@@ -893,6 +1002,8 @@ void pci_release_region(struct pci_dev *pdev, int bar)
  */
 int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
 {
+	struct pci_devres *dr;
+
 	if (pci_resource_len(pdev, bar) == 0)
 		return 0;
 		
@@ -906,7 +1017,11 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
 				        pci_resource_len(pdev, bar), res_name))
 			goto err_out;
 	}
-	
+
+	dr = find_pci_dr(pdev);
+	if (dr)
+		dr->region_mask |= 1 << bar;
+
 	return 0;
 
 err_out:
@@ -1144,7 +1259,15 @@ pci_intx(struct pci_dev *pdev, int enable)
 	}
 
 	if (new != pci_command) {
+		struct pci_devres *dr;
+
 		pci_write_config_word(pdev, PCI_COMMAND, new);
+
+		dr = find_pci_dr(pdev);
+		if (dr && !dr->restore_intx) {
+			dr->restore_intx = 1;
+			dr->orig_intx = !enable;
+		}
 	}
 }
 
@@ -1226,6 +1349,8 @@ device_initcall(pci_init);
 EXPORT_SYMBOL_GPL(pci_restore_bars);
 EXPORT_SYMBOL(pci_enable_device_bars);
 EXPORT_SYMBOL(pci_enable_device);
+EXPORT_SYMBOL(pcim_enable_device);
+EXPORT_SYMBOL(pcim_pin_device);
 EXPORT_SYMBOL(pci_disable_device);
 EXPORT_SYMBOL(pci_find_capability);
 EXPORT_SYMBOL(pci_bus_find_capability);
diff --git a/include/linux/device.h b/include/linux/device.h
index 5ca1cdba563..26e4692f2d1 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -354,6 +354,41 @@ extern int __must_check device_create_bin_file(struct device *dev,
 					       struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
 				   struct bin_attribute *attr);
+
+/* device resource management */
+typedef void (*dr_release_t)(struct device *dev, void *res);
+typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data);
+
+#ifdef CONFIG_DEBUG_DEVRES
+extern void * __devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
+			     const char *name);
+#define devres_alloc(release, size, gfp) \
+	__devres_alloc(release, size, gfp, #release)
+#else
+extern void * devres_alloc(dr_release_t release, size_t size, gfp_t gfp);
+#endif
+extern void devres_free(void *res);
+extern void devres_add(struct device *dev, void *res);
+extern void * devres_find(struct device *dev, dr_release_t release,
+			  dr_match_t match, void *match_data);
+extern void * devres_get(struct device *dev, void *new_res,
+			 dr_match_t match, void *match_data);
+extern void * devres_remove(struct device *dev, dr_release_t release,
+			    dr_match_t match, void *match_data);
+extern int devres_destroy(struct device *dev, dr_release_t release,
+			  dr_match_t match, void *match_data);
+
+/* devres group */
+extern void * __must_check devres_open_group(struct device *dev, void *id,
+					     gfp_t gfp);
+extern void devres_close_group(struct device *dev, void *id);
+extern void devres_remove_group(struct device *dev, void *id);
+extern int devres_release_group(struct device *dev, void *id);
+
+/* managed kzalloc/kfree for device drivers, no kmalloc, always use kzalloc */
+extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp);
+extern void devm_kfree(struct device *dev, void *p);
+
 struct device {
 	struct klist		klist_children;
 	struct klist_node	knode_parent;		/* node in sibling list */
@@ -397,6 +432,9 @@ struct device {
 	/* arch specific additions */
 	struct dev_archdata	archdata;
 
+	spinlock_t		devres_lock;
+	struct list_head	devres_head;
+
 	/* class_device migration path */
 	struct list_head	node;
 	struct class		*class;
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ff203c465fe..9a663c6db16 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -66,6 +66,33 @@ dma_mark_declared_memory_occupied(struct device *dev,
 }
 #endif
 
-#endif
+/*
+ * Managed DMA API
+ */
+extern void *dmam_alloc_coherent(struct device *dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t gfp);
+extern void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
+			       dma_addr_t dma_handle);
+extern void *dmam_alloc_noncoherent(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t gfp);
+extern void dmam_free_noncoherent(struct device *dev, size_t size, void *vaddr,
+				  dma_addr_t dma_handle);
+#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
+extern int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+					dma_addr_t device_addr, size_t size,
+					int flags);
+extern void dmam_release_declared_memory(struct device *dev);
+#else /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
+static inline int dmam_declare_coherent_memory(struct device *dev,
+				dma_addr_t bus_addr, dma_addr_t device_addr,
+				size_t size, gfp_t gfp)
+{
+	return 0;
+}
 
+static inline void dmam_release_declared_memory(struct device *dev)
+{
+}
+#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
 
+#endif
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index 76f12f46db7..022e34fcbd1 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -24,5 +24,12 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 
 void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr);
 
+/*
+ * Managed DMA pool
+ */
+struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
+				  size_t size, size_t align, size_t allocation);
+void dmam_pool_destroy(struct dma_pool *pool);
+
 #endif
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index e36e86c869f..5a8ba0b8ccb 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/irqflags.h>
 #include <linux/bottom_half.h>
+#include <linux/device.h>
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
@@ -83,6 +84,11 @@ extern int request_irq(unsigned int, irq_handler_t handler,
 		       unsigned long, const char *, void *);
 extern void free_irq(unsigned int, void *);
 
+extern int devm_request_irq(struct device *dev, unsigned int irq,
+			    irq_handler_t handler, unsigned long irqflags,
+			    const char *devname, void *dev_id);
+extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
+
 /*
  * On lockdep we dont want to enable hardirqs in hardirq
  * context. Use local_irq_enable_in_hardirq() to annotate
diff --git a/include/linux/io.h b/include/linux/io.h
index 81877ea3930..f5edf9c5de0 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -28,6 +28,23 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
 int ioremap_page_range(unsigned long addr, unsigned long end,
 		       unsigned long phys_addr, pgprot_t prot);
 
+/*
+ * Managed iomap interface
+ */
+void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
+			       unsigned int nr);
+void devm_ioport_unmap(struct device *dev, void __iomem *addr);
+
+void __iomem * devm_ioremap(struct device *dev, unsigned long offset,
+			    unsigned long size);
+void __iomem * devm_ioremap_nocache(struct device *dev, unsigned long offset,
+				    unsigned long size);
+void devm_iounmap(struct device *dev, void __iomem *addr);
+
+void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
+void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
+void __iomem * const * pcim_iomap_table(struct pci_dev *pdev);
+
 /**
  *	check_signature		-	find BIOS signatures
  *	@io_addr: mmio address to check
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 15228d79c5b..6859a3b1408 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -137,4 +137,24 @@ static inline int __deprecated check_region(resource_size_t s,
 {
 	return __check_region(&ioport_resource, s, n);
 }
+
+/* Wrappers for managed devices */
+struct device;
+#define devm_request_region(dev,start,n,name) \
+	__devm_request_region(dev, &ioport_resource, (start), (n), (name))
+#define devm_request_mem_region(dev,start,n,name) \
+	__devm_request_region(dev, &iomem_resource, (start), (n), (name))
+
+extern struct resource * __devm_request_region(struct device *dev,
+				struct resource *parent, resource_size_t start,
+				resource_size_t n, const char *name);
+
+#define devm_release_region(start,n) \
+	__devm_release_region(dev, &ioport_resource, (start), (n))
+#define devm_release_mem_region(start,n) \
+	__devm_release_region(dev, &iomem_resource, (start), (n))
+
+extern void __devm_release_region(struct device *dev, struct resource *parent,
+				  resource_size_t start, resource_size_t n);
+
 #endif	/* _LINUX_IOPORT_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 805412cc687..9e3042e7e1c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -167,6 +167,7 @@ struct pci_dev {
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int 	msi_enabled:1;
 	unsigned int	msix_enabled:1;
+	unsigned int	is_managed:1;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
 	u32		saved_config_space[16]; /* config space saved at suspend time */
@@ -528,6 +529,14 @@ static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val
 
 int __must_check pci_enable_device(struct pci_dev *dev);
 int __must_check pci_enable_device_bars(struct pci_dev *dev, int mask);
+int __must_check pcim_enable_device(struct pci_dev *pdev);
+void pcim_pin_device(struct pci_dev *pdev);
+
+static inline int pci_is_managed(struct pci_dev *pdev)
+{
+	return pdev->is_managed;
+}
+
 void pci_disable_device(struct pci_dev *dev);
 void pci_set_master(struct pci_dev *dev);
 #define HAVE_PCI_SET_MWI
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 8b961adc3bd..c4b7ed1cebf 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -482,3 +482,89 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	return retval;
 }
 EXPORT_SYMBOL(request_irq);
+
+/*
+ * Device resource management aware IRQ request/free implementation.
+ */
+struct irq_devres {
+	unsigned int irq;
+	void *dev_id;
+};
+
+static void devm_irq_release(struct device *dev, void *res)
+{
+	struct irq_devres *this = res;
+
+	free_irq(this->irq, this->dev_id);
+}
+
+static int devm_irq_match(struct device *dev, void *res, void *data)
+{
+	struct irq_devres *this = res, *match = data;
+
+	return this->irq == match->irq && this->dev_id == match->dev_id;
+}
+
+/**
+ *	devm_request_irq - allocate an interrupt line for a managed device
+ *	@dev: device to request interrupt for
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs
+ *	@irqflags: Interrupt type flags
+ *	@devname: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	Except for the extra @dev argument, this function takes the
+ *	same arguments and performs the same function as
+ *	request_irq().  IRQs requested with this function will be
+ *	automatically freed on driver detach.
+ *
+ *	If an IRQ allocated with this function needs to be freed
+ *	separately, dev_free_irq() must be used.
+ */
+int devm_request_irq(struct device *dev, unsigned int irq,
+		     irq_handler_t handler, unsigned long irqflags,
+		     const char *devname, void *dev_id)
+{
+	struct irq_devres *dr;
+	int rc;
+
+	dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres),
+			  GFP_KERNEL);
+	if (!dr)
+		return -ENOMEM;
+
+	rc = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (rc) {
+		kfree(dr);
+		return rc;
+	}
+
+	dr->irq = irq;
+	dr->dev_id = dev_id;
+	devres_add(dev, dr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_request_irq);
+
+/**
+ *	devm_free_irq - free an interrupt
+ *	@dev: device to free interrupt for
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Except for the extra @dev argument, this function takes the
+ *	same arguments and performs the same function as free_irq().
+ *	This function instead of free_irq() should be used to manually
+ *	free IRQs allocated with dev_request_irq().
+ */
+void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
+{
+	struct irq_devres match_data = { irq, dev_id };
+
+	free_irq(irq, dev_id);
+	WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match,
+			       &match_data));
+}
+EXPORT_SYMBOL(devm_free_irq);
diff --git a/kernel/resource.c b/kernel/resource.c
index 7b9a497419d..2a3f8863658 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/device.h>
 #include <asm/io.h>
 
 
@@ -617,6 +618,67 @@ void __release_region(struct resource *parent, resource_size_t start,
 }
 EXPORT_SYMBOL(__release_region);
 
+/*
+ * Managed region resource
+ */
+struct region_devres {
+	struct resource *parent;
+	resource_size_t start;
+	resource_size_t n;
+};
+
+static void devm_region_release(struct device *dev, void *res)
+{
+	struct region_devres *this = res;
+
+	__release_region(this->parent, this->start, this->n);
+}
+
+static int devm_region_match(struct device *dev, void *res, void *match_data)
+{
+	struct region_devres *this = res, *match = match_data;
+
+	return this->parent == match->parent &&
+		this->start == match->start && this->n == match->n;
+}
+
+struct resource * __devm_request_region(struct device *dev,
+				struct resource *parent, resource_size_t start,
+				resource_size_t n, const char *name)
+{
+	struct region_devres *dr = NULL;
+	struct resource *res;
+
+	dr = devres_alloc(devm_region_release, sizeof(struct region_devres),
+			  GFP_KERNEL);
+	if (!dr)
+		return NULL;
+
+	dr->parent = parent;
+	dr->start = start;
+	dr->n = n;
+
+	res = __request_region(parent, start, n, name);
+	if (res)
+		devres_add(dev, dr);
+	else
+		devres_free(dr);
+
+	return res;
+}
+EXPORT_SYMBOL(__devm_request_region);
+
+void __devm_release_region(struct device *dev, struct resource *parent,
+			   resource_size_t start, resource_size_t n)
+{
+	struct region_devres match_data = { parent, start, n };
+
+	__release_region(parent, start, n);
+	WARN_ON(devres_destroy(dev, devm_region_release, devm_region_match,
+			       &match_data));
+}
+EXPORT_SYMBOL(__devm_release_region);
+
 /*
  * Called from init/main.c to reserve IO ports.
  */
diff --git a/lib/Makefile b/lib/Makefile
index 77b4bad7d44..29b2e9912bb 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -5,7 +5,7 @@
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
 	 idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
-	 sha1.o irq_regs.o reciprocal_div.o
+	 sha1.o irq_regs.o reciprocal_div.o iomap.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -41,7 +41,6 @@ obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC32)	+= crc32.o
 obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
-obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
diff --git a/lib/iomap.c b/lib/iomap.c
index d6ccdd85df5..3214028141d 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -4,8 +4,10 @@
  * (C) Copyright 2004 Linus Torvalds
  */
 #include <linux/pci.h>
+#include <linux/io.h>
+
+#ifdef CONFIG_GENERIC_IOMAP
 #include <linux/module.h>
-#include <asm/io.h>
 
 /*
  * Read/write from/to an (offsettable) iomem cookie. It might be a PIO
@@ -254,3 +256,245 @@ void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 }
 EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
+
+#endif /* CONFIG_GENERIC_IOMAP */
+
+/*
+ * Generic iomap devres
+ */
+static void devm_ioport_map_release(struct device *dev, void *res)
+{
+	ioport_unmap(*(void __iomem **)res);
+}
+
+static int devm_ioport_map_match(struct device *dev, void *res,
+				 void *match_data)
+{
+	return *(void **)res == match_data;
+}
+
+/**
+ * devm_ioport_map - Managed ioport_map()
+ * @dev: Generic device to map ioport for
+ * @port: Port to map
+ * @nr: Number of ports to map
+ *
+ * Managed ioport_map().  Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
+			       unsigned int nr)
+{
+	void __iomem **ptr, *addr;
+
+	ptr = devres_alloc(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = ioport_map(port, nr);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_ioport_map);
+
+/**
+ * devm_ioport_unmap - Managed ioport_unmap()
+ * @dev: Generic device to unmap for
+ * @addr: Address to unmap
+ *
+ * Managed ioport_unmap().  @addr must have been mapped using
+ * devm_ioport_map().
+ */
+void devm_ioport_unmap(struct device *dev, void __iomem *addr)
+{
+	ioport_unmap(addr);
+	WARN_ON(devres_destroy(dev, devm_ioport_map_release,
+			       devm_ioport_map_match, (void *)addr));
+}
+EXPORT_SYMBOL(devm_ioport_unmap);
+
+static void devm_ioremap_release(struct device *dev, void *res)
+{
+	iounmap(*(void __iomem **)res);
+}
+
+static int devm_ioremap_match(struct device *dev, void *res, void *match_data)
+{
+	return *(void **)res == match_data;
+}
+
+/**
+ * devm_ioremap - Managed ioremap()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ *
+ * Managed ioremap().  Map is automatically unmapped on driver detach.
+ */
+void __iomem *devm_ioremap(struct device *dev, unsigned long offset,
+			   unsigned long size)
+{
+	void __iomem **ptr, *addr;
+
+	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = ioremap(offset, size);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_ioremap);
+
+/**
+ * devm_ioremap_nocache - Managed ioremap_nocache()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ *
+ * Managed ioremap_nocache().  Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem *devm_ioremap_nocache(struct device *dev, unsigned long offset,
+				   unsigned long size)
+{
+	void __iomem **ptr, *addr;
+
+	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = ioremap_nocache(offset, size);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_ioremap_nocache);
+
+/**
+ * devm_iounmap - Managed iounmap()
+ * @dev: Generic device to unmap for
+ * @addr: Address to unmap
+ *
+ * Managed iounmap().  @addr must have been mapped using devm_ioremap*().
+ */
+void devm_iounmap(struct device *dev, void __iomem *addr)
+{
+	iounmap(addr);
+	WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match,
+			       (void *)addr));
+}
+EXPORT_SYMBOL(devm_iounmap);
+
+/*
+ * PCI iomap devres
+ */
+#define PCIM_IOMAP_MAX	PCI_ROM_RESOURCE
+
+struct pcim_iomap_devres {
+	void __iomem *table[PCIM_IOMAP_MAX];
+};
+
+static void pcim_iomap_release(struct device *gendev, void *res)
+{
+	struct pci_dev *dev = container_of(gendev, struct pci_dev, dev);
+	struct pcim_iomap_devres *this = res;
+	int i;
+
+	for (i = 0; i < PCIM_IOMAP_MAX; i++)
+		if (this->table[i])
+			pci_iounmap(dev, this->table[i]);
+}
+
+/**
+ * pcim_iomap_table - access iomap allocation table
+ * @pdev: PCI device to access iomap table for
+ *
+ * Access iomap allocation table for @dev.  If iomap table doesn't
+ * exist and @pdev is managed, it will be allocated.  All iomaps
+ * recorded in the iomap table are automatically unmapped on driver
+ * detach.
+ *
+ * This function might sleep when the table is first allocated but can
+ * be safely called without context and guaranteed to succed once
+ * allocated.
+ */
+void __iomem * const * pcim_iomap_table(struct pci_dev *pdev)
+{
+	struct pcim_iomap_devres *dr, *new_dr;
+
+	dr = devres_find(&pdev->dev, pcim_iomap_release, NULL, NULL);
+	if (dr)
+		return dr->table;
+
+	new_dr = devres_alloc(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL);
+	if (!new_dr)
+		return NULL;
+	dr = devres_get(&pdev->dev, new_dr, NULL, NULL);
+	return dr->table;
+}
+EXPORT_SYMBOL(pcim_iomap_table);
+
+/**
+ * pcim_iomap - Managed pcim_iomap()
+ * @pdev: PCI device to iomap for
+ * @bar: BAR to iomap
+ * @maxlen: Maximum length of iomap
+ *
+ * Managed pci_iomap().  Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
+{
+	void __iomem **tbl;
+
+	BUG_ON(bar >= PCIM_IOMAP_MAX);
+
+	tbl = (void __iomem **)pcim_iomap_table(pdev);
+	if (!tbl || tbl[bar])	/* duplicate mappings not allowed */
+		return NULL;
+
+	tbl[bar] = pci_iomap(pdev, bar, maxlen);
+	return tbl[bar];
+}
+EXPORT_SYMBOL(pcim_iomap);
+
+/**
+ * pcim_iounmap - Managed pci_iounmap()
+ * @pdev: PCI device to iounmap for
+ * @addr: Address to unmap
+ *
+ * Managed pci_iounmap().  @addr must have been mapped using pcim_iomap().
+ */
+void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+	void __iomem **tbl;
+	int i;
+
+	pci_iounmap(pdev, addr);
+
+	tbl = (void __iomem **)pcim_iomap_table(pdev);
+	BUG_ON(!tbl);
+
+	for (i = 0; i < PCIM_IOMAP_MAX; i++)
+		if (tbl[i] == addr) {
+			tbl[i] = NULL;
+			return;
+		}
+	WARN_ON(1);
+}
+EXPORT_SYMBOL(pcim_iounmap);
-- 
cgit v1.2.3-70-g09d2


From d23ad42324cc4378132e51f2fc5c9ba6cbe75182 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sat, 10 Feb 2007 01:43:02 -0800
Subject: [PATCH] Use ZVC for free_pages

This is again simplifies some of the VM counter calculations through the use
of the ZVC consolidated counters.

[michal.k.k.piotrowski@gmail.com: build fix]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h  |  2 +-
 kernel/power/snapshot.c |  4 ++--
 kernel/power/swsusp.c   |  5 +++--
 mm/highmem.c            |  3 ++-
 mm/page_alloc.c         | 37 +++++++++++++------------------------
 mm/vmstat.c             | 20 ++++----------------
 6 files changed, 25 insertions(+), 46 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9137d1b9735..824279c7884 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -47,6 +47,7 @@ struct zone_padding {
 #endif
 
 enum zone_stat_item {
+	NR_FREE_PAGES,
 	NR_INACTIVE,
 	NR_ACTIVE,
 	NR_ANON_PAGES,	/* Mapped anonymous pages */
@@ -157,7 +158,6 @@ enum zone_type {
 
 struct zone {
 	/* Fields commonly accessed by the page allocator */
-	unsigned long		free_pages;
 	unsigned long		pages_min, pages_low, pages_high;
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index c024606221c..fc53ad06812 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -591,7 +591,7 @@ static unsigned int count_free_highmem_pages(void)
 
 	for_each_zone(zone)
 		if (populated_zone(zone) && is_highmem(zone))
-			cnt += zone->free_pages;
+			cnt += zone_page_state(zone, NR_FREE_PAGES);
 
 	return cnt;
 }
@@ -869,7 +869,7 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
 	for_each_zone(zone) {
 		meta += snapshot_additional_pages(zone);
 		if (!is_highmem(zone))
-			free += zone->free_pages;
+			free += zone_page_state(zone, NR_FREE_PAGES);
 	}
 
 	nr_pages += count_pages_for_highmem(nr_highmem);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 31aa0390c77..7fb834397a0 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -230,9 +230,10 @@ int swsusp_shrink_memory(void)
 		for_each_zone (zone)
 			if (populated_zone(zone)) {
 				if (is_highmem(zone)) {
-					highmem_size -= zone->free_pages;
+					highmem_size -=
+					zone_page_state(zone, NR_FREE_PAGES);
 				} else {
-					tmp -= zone->free_pages;
+					tmp -= zone_page_state(zone, NR_FREE_PAGES);
 					tmp += zone->lowmem_reserve[ZONE_NORMAL];
 					tmp += snapshot_additional_pages(zone);
 				}
diff --git a/mm/highmem.c b/mm/highmem.c
index 0206e7e5018..51e1c1995fe 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -47,7 +47,8 @@ unsigned int nr_free_highpages (void)
 	unsigned int pages = 0;
 
 	for_each_online_pgdat(pgdat)
-		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+		pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+			NR_FREE_PAGES);
 
 	return pages;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07c954e5327..ba62d8789f7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -395,7 +395,7 @@ static inline void __free_one_page(struct page *page,
 	VM_BUG_ON(page_idx & (order_size - 1));
 	VM_BUG_ON(bad_range(zone, page));
 
-	zone->free_pages += order_size;
+	__mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
 	while (order < MAX_ORDER-1) {
 		unsigned long combined_idx;
 		struct free_area *area;
@@ -631,7 +631,7 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
 		list_del(&page->lru);
 		rmv_page_order(page);
 		area->nr_free--;
-		zone->free_pages -= 1UL << order;
+		__mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
 		expand(zone, page, order, current_order, area);
 		return page;
 	}
@@ -989,7 +989,8 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		      int classzone_idx, int alloc_flags)
 {
 	/* free_pages my go negative - that's OK */
-	long min = mark, free_pages = z->free_pages - (1 << order) + 1;
+	long min = mark;
+	long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
 	int o;
 
 	if (alloc_flags & ALLOC_HIGH)
@@ -1444,13 +1445,7 @@ EXPORT_SYMBOL(free_pages);
  */
 unsigned int nr_free_pages(void)
 {
-	unsigned int sum = 0;
-	struct zone *zone;
-
-	for_each_zone(zone)
-		sum += zone->free_pages;
-
-	return sum;
+	return global_page_state(NR_FREE_PAGES);
 }
 
 EXPORT_SYMBOL(nr_free_pages);
@@ -1458,13 +1453,7 @@ EXPORT_SYMBOL(nr_free_pages);
 #ifdef CONFIG_NUMA
 unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
 {
-	unsigned int sum = 0;
-	enum zone_type i;
-
-	for (i = 0; i < MAX_NR_ZONES; i++)
-		sum += pgdat->node_zones[i].free_pages;
-
-	return sum;
+	return node_page_state(pgdat->node_id, NR_FREE_PAGES);
 }
 #endif
 
@@ -1514,7 +1503,7 @@ void si_meminfo(struct sysinfo *val)
 {
 	val->totalram = totalram_pages;
 	val->sharedram = 0;
-	val->freeram = nr_free_pages();
+	val->freeram = global_page_state(NR_FREE_PAGES);
 	val->bufferram = nr_blockdev_pages();
 	val->totalhigh = totalhigh_pages;
 	val->freehigh = nr_free_highpages();
@@ -1529,10 +1518,11 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 	pg_data_t *pgdat = NODE_DATA(nid);
 
 	val->totalram = pgdat->node_present_pages;
-	val->freeram = nr_free_pages_pgdat(pgdat);
+	val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
 	val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
-	val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+	val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+			NR_FREE_PAGES);
 #else
 	val->totalhigh = 0;
 	val->freehigh = 0;
@@ -1580,13 +1570,13 @@ void show_free_areas(void)
 	get_zone_counts(&active, &inactive, &free);
 
 	printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
-		" free:%u slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
+		" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
 		active,
 		inactive,
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
-		nr_free_pages(),
+		global_page_state(NR_FREE_PAGES),
 		global_page_state(NR_SLAB_RECLAIMABLE) +
 			global_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_page_state(NR_FILE_MAPPED),
@@ -1612,7 +1602,7 @@ void show_free_areas(void)
 			" all_unreclaimable? %s"
 			"\n",
 			zone->name,
-			K(zone->free_pages),
+			K(zone_page_state(zone, NR_FREE_PAGES)),
 			K(zone->pages_min),
 			K(zone->pages_low),
 			K(zone->pages_high),
@@ -2675,7 +2665,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		spin_lock_init(&zone->lru_lock);
 		zone_seqlock_init(zone);
 		zone->zone_pgdat = pgdat;
-		zone->free_pages = 0;
 
 		zone->prev_priority = DEF_PRIORITY;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 5462106725d..2386716f175 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -16,30 +16,17 @@
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat)
 {
-	struct zone *zones = pgdat->node_zones;
-	int i;
-
 	*active = node_page_state(pgdat->node_id, NR_ACTIVE);
 	*inactive = node_page_state(pgdat->node_id, NR_INACTIVE);
-	*free = 0;
-	for (i = 0; i < MAX_NR_ZONES; i++) {
-		*free += zones[i].free_pages;
-	}
+	*free = node_page_state(pgdat->node_id, NR_FREE_PAGES);
 }
 
 void get_zone_counts(unsigned long *active,
 		unsigned long *inactive, unsigned long *free)
 {
-	struct pglist_data *pgdat;
-
 	*active = global_page_state(NR_ACTIVE);
 	*inactive = global_page_state(NR_INACTIVE);
-	*free = 0;
-	for_each_online_pgdat(pgdat) {
-		unsigned long l, m, n;
-		__get_zone_counts(&l, &m, &n, pgdat);
-		*free += n;
-	}
+	*free = global_page_state(NR_FREE_PAGES);
 }
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
@@ -454,6 +441,7 @@ const struct seq_operations fragmentation_op = {
 
 static const char * const vmstat_text[] = {
 	/* Zoned VM counters */
+	"nr_free_pages",
 	"nr_active",
 	"nr_inactive",
 	"nr_anon_pages",
@@ -534,7 +522,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
 			   "\n        scanned  %lu (a: %lu i: %lu)"
 			   "\n        spanned  %lu"
 			   "\n        present  %lu",
-			   zone->free_pages,
+			   zone_page_state(zone, NR_FREE_PAGES),
 			   zone->pages_min,
 			   zone->pages_low,
 			   zone->pages_high,
-- 
cgit v1.2.3-70-g09d2


From 96177299416dbccb73b54e6b344260154a445375 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sat, 10 Feb 2007 01:43:03 -0800
Subject: [PATCH] Drop free_pages()

nr_free_pages is now a simple access to a global variable.  Make it a macro
instead of a function.

The nr_free_pages now requires vmstat.h to be included.  There is one
occurrence in power management where we need to add the include.  Directly
refrer to global_page_state() there to clarify why the #include was added.

[akpm@osdl.org: arm build fix]
[akpm@osdl.org: sparc64 build fix]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc64/mm/init.c |  4 ++--
 include/linux/swap.h   |  5 ++++-
 include/linux/vmstat.h |  1 +
 kernel/power/main.c    |  4 +++-
 mm/page_alloc.c        | 10 ----------
 5 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'kernel')

diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 054822a3e05..09d434f41e4 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -416,7 +416,7 @@ void show_mem(void)
 	printk("Free swap:       %6ldkB\n",
 	       nr_swap_pages << (PAGE_SHIFT-10));
 	printk("%ld pages of RAM\n", num_physpages);
-	printk("%d free pages\n", nr_free_pages());
+	printk("%lu free pages\n", nr_free_pages());
 }
 
 void mmu_info(struct seq_file *m)
@@ -1593,7 +1593,7 @@ void __init mem_init(void)
 	initpages = (((unsigned long) __init_end) - ((unsigned long) __init_begin));
 	initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT;
 
-	printk("Memory: %uk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n",
+	printk("Memory: %luk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n",
 	       nr_free_pages() << (PAGE_SHIFT-10),
 	       codepages << (PAGE_SHIFT-10),
 	       datapages << (PAGE_SHIFT-10), 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5423559a44a..817e1b47007 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -170,11 +170,14 @@ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
 extern long nr_swap_pages;
-extern unsigned int nr_free_pages(void);
 extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat);
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
 
+/* Definition of global_page_state not available yet */
+#define nr_free_pages() global_page_state(NR_FREE_PAGES)
+
+
 /* linux/mm/swap.c */
 extern void FASTCALL(lru_cache_add(struct page *));
 extern void FASTCALL(lru_cache_add_active(struct page *));
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index c8d55bcc09b..77caf911969 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/percpu.h>
+#include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <asm/atomic.h>
 
diff --git a/kernel/power/main.c b/kernel/power/main.c
index ff3a6182f5f..47ca5a2b653 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -20,6 +20,7 @@
 #include <linux/cpu.h>
 #include <linux/resume-trace.h>
 #include <linux/freezer.h>
+#include <linux/vmstat.h>
 
 #include "power.h"
 
@@ -72,7 +73,8 @@ static int suspend_prepare(suspend_state_t state)
 		goto Thaw;
 	}
 
-	if ((free_pages = nr_free_pages()) < FREE_PAGE_NUMBER) {
+	if ((free_pages = global_page_state(NR_FREE_PAGES))
+			< FREE_PAGE_NUMBER) {
 		pr_debug("PM: free some memory\n");
 		shrink_all_memory(FREE_PAGE_NUMBER - free_pages);
 		if (nr_free_pages() < FREE_PAGE_NUMBER) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ba62d8789f7..f1e320b3a98 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1440,16 +1440,6 @@ fastcall void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
-/*
- * Total amount of free (allocatable) RAM:
- */
-unsigned int nr_free_pages(void)
-{
-	return global_page_state(NR_FREE_PAGES);
-}
-
-EXPORT_SYMBOL(nr_free_pages);
-
 #ifdef CONFIG_NUMA
 unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
 {
-- 
cgit v1.2.3-70-g09d2


From 6ff1b4426e3afc61dcb67299709fde9041d59265 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Sat, 10 Feb 2007 01:43:19 -0800
Subject: [PATCH] make reading /proc/sys/kernel/cap-bould not require
 CAP_SYS_MODULE

Reading /proc/sys/kernel/cap-bound requires CAP_SYS_MODULE.  (see
proc_dointvec_bset in kernel/sysctl.c)

sysctl appears to drive all over proc reading everything it can get it's
hands on and is complaining when it is being denied access to read
cap-bound.  Clearly writing to cap-bound should be a sensitive operation
but requiring CAP_SYS_MODULE to read cap-bound seems a bit to strong.  I
believe the information could with reasonable certainty be obtained by
looking at a bunch of the output of /proc/pid/status which has very low
security protection, so at best we are just getting a little obfuscation of
information.

Currently SELinux policy has to 'dontaudit' capability checks for
CAP_SYS_MODULE for things like sysctl which just want to read cap-bound.
In doing so we also as a byproduct have to hide warnings of potential
exploits such as if at some time that sysctl actually tried to load a
module.  I wondered if anyone would have a problem opening cap-bound up to
read from anyone?

Acked-by: Chris Wright <chrisw@sous-sol.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 600b33358de..41bbba1a15d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1961,7 +1961,7 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
 {
 	int op;
 
-	if (!capable(CAP_SYS_MODULE)) {
+	if (write && !capable(CAP_SYS_MODULE)) {
 		return -EPERM;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From e3c7db621bed4afb8e231cb005057f2feb5db557 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 10 Feb 2007 01:43:31 -0800
Subject: [PATCH] PM: Change code ordering in main.c

As indicated in a recent thread on Linux-PM, it's necessary to call
pm_ops->finish() before devce_resume(), but enable_nonboot_cpus() has to be
called before pm_ops->finish() (cf.
http://lists.osdl.org/pipermail/linux-pm/2006-November/004164.html).  For
consistency, it seems reasonable to call disable_nonboot_cpus() after
device_suspend().

This way the suspend code will remain symmetrical with respect to the resume
code and it may allow us to speed up things in the future by suspending and
resuming devices and/or saving the suspend image in many threads.

The following series of patches reorders the suspend and resume code so that
nonboot CPUs are disabled after devices have been suspended and enabled before
the devices are resumed.  It also causes pm_ops->finish() to be called after
enable_nonboot_cpus() wherever necessary.

This patch:

Change the ordering of code in kernel/power/main.c so that device_suspend()
is called before disable_nonboot_cpus() and pm_ops->finish() is called after
enable_nonboot_cpus() and before device_resume(), as indicated by recent
discussion on Linux-PM
(cf. http://lists.osdl.org/pipermail/linux-pm/2006-November/004164.html).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Greg KH <greg@kroah.com>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/main.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/main.c b/kernel/power/main.c
index 47ca5a2b653..e1c41312046 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -44,6 +44,11 @@ void pm_set_ops(struct pm_ops * ops)
 	mutex_unlock(&pm_mutex);
 }
 
+static inline void pm_finish(suspend_state_t state)
+{
+	if (pm_ops->finish)
+		pm_ops->finish(state);
+}
 
 /**
  *	suspend_prepare - Do prep work before entering low-power state.
@@ -64,10 +69,6 @@ static int suspend_prepare(suspend_state_t state)
 
 	pm_prepare_console();
 
-	error = disable_nonboot_cpus();
-	if (error)
-		goto Enable_cpu;
-
 	if (freeze_processes()) {
 		error = -EAGAIN;
 		goto Thaw;
@@ -90,18 +91,22 @@ static int suspend_prepare(suspend_state_t state)
 	}
 
 	suspend_console();
-	if ((error = device_suspend(PMSG_SUSPEND))) {
+	error = device_suspend(PMSG_SUSPEND);
+	if (error) {
 		printk(KERN_ERR "Some devices failed to suspend\n");
-		goto Finish;
+		goto Resume_devices;
 	}
-	return 0;
- Finish:
-	if (pm_ops->finish)
-		pm_ops->finish(state);
+	error = disable_nonboot_cpus();
+	if (!error)
+		return 0;
+
+	enable_nonboot_cpus();
+ Resume_devices:
+	pm_finish(state);
+	device_resume();
+	resume_console();
  Thaw:
 	thaw_processes();
- Enable_cpu:
-	enable_nonboot_cpus();
 	pm_restore_console();
 	return error;
 }
@@ -136,12 +141,11 @@ int suspend_enter(suspend_state_t state)
 
 static void suspend_finish(suspend_state_t state)
 {
+	enable_nonboot_cpus();
+	pm_finish(state);
 	device_resume();
 	resume_console();
 	thaw_processes();
-	enable_nonboot_cpus();
-	if (pm_ops && pm_ops->finish)
-		pm_ops->finish(state);
 	pm_restore_console();
 }
 
-- 
cgit v1.2.3-70-g09d2


From ed746e3b18f4df18afa3763155972c5835f284c5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 10 Feb 2007 01:43:32 -0800
Subject: [PATCH] swsusp: Change code ordering in disk.c

Change the ordering of code in kernel/power/disk.c so that device_suspend() is
called before disable_nonboot_cpus() and platform_finish() is called after
enable_nonboot_cpus() and before device_resume(), as indicated by the recent
discussion on Linux-PM (cf.
http://lists.osdl.org/pipermail/linux-pm/2006-November/004164.html).

The changes here only affect the built-in swsusp.

[alexey.y.starikovskiy@linux.intel.com: fix LED blinking during image load]
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Greg KH <greg@kroah.com>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: Patrick Mochel <mochel@digitalimplant.org>
Cc: Alexey Starikovskiy <alexey.y.starikovskiy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpu.c        |   2 +
 kernel/power/disk.c | 115 ++++++++++++++++++++++++++--------------------------
 2 files changed, 60 insertions(+), 57 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 7406fe6966f..3d4206ada5c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -309,6 +309,8 @@ void enable_nonboot_cpus(void)
 	mutex_lock(&cpu_add_remove_lock);
 	cpu_hotplug_disabled = 0;
 	mutex_unlock(&cpu_add_remove_lock);
+	if (cpus_empty(frozen_cpus))
+		return;
 
 	printk("Enabling non-boot CPUs ...\n");
 	for_each_cpu_mask(cpu, frozen_cpus) {
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 88fc5d7ac73..406b20adb27 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -87,52 +87,24 @@ static inline void platform_finish(void)
 	}
 }
 
+static void unprepare_processes(void)
+{
+	thaw_processes();
+	pm_restore_console();
+}
+
 static int prepare_processes(void)
 {
 	int error = 0;
 
 	pm_prepare_console();
-
-	error = disable_nonboot_cpus();
-	if (error)
-		goto enable_cpus;
-
 	if (freeze_processes()) {
 		error = -EBUSY;
-		goto thaw;
+		unprepare_processes();
 	}
-
-	if (pm_disk_mode == PM_DISK_TESTPROC) {
-		printk("swsusp debug: Waiting for 5 seconds.\n");
-		mdelay(5000);
-		goto thaw;
-	}
-
-	error = platform_prepare();
-	if (error)
-		goto thaw;
-
-	/* Free memory before shutting down devices. */
-	if (!(error = swsusp_shrink_memory()))
-		return 0;
-
-	platform_finish();
- thaw:
-	thaw_processes();
- enable_cpus:
-	enable_nonboot_cpus();
-	pm_restore_console();
 	return error;
 }
 
-static void unprepare_processes(void)
-{
-	platform_finish();
-	thaw_processes();
-	enable_nonboot_cpus();
-	pm_restore_console();
-}
-
 /**
  *	pm_suspend_disk - The granpappy of hibernation power management.
  *
@@ -150,29 +122,45 @@ int pm_suspend_disk(void)
 	if (error)
 		return error;
 
-	if (pm_disk_mode == PM_DISK_TESTPROC)
-		return 0;
+	if (pm_disk_mode == PM_DISK_TESTPROC) {
+		printk("swsusp debug: Waiting for 5 seconds.\n");
+		mdelay(5000);
+		goto Thaw;
+	}
+	/* Free memory before shutting down devices. */
+	error = swsusp_shrink_memory();
+	if (error)
+		goto Thaw;
+
+	error = platform_prepare();
+	if (error)
+		goto Thaw;
 
 	suspend_console();
 	error = device_suspend(PMSG_FREEZE);
 	if (error) {
-		resume_console();
-		printk("Some devices failed to suspend\n");
-		goto Thaw;
+		printk(KERN_ERR "PM: Some devices failed to suspend\n");
+		goto Resume_devices;
 	}
+	error = disable_nonboot_cpus();
+	if (error)
+		goto Enable_cpus;
 
 	if (pm_disk_mode == PM_DISK_TEST) {
 		printk("swsusp debug: Waiting for 5 seconds.\n");
 		mdelay(5000);
-		goto Done;
+		goto Enable_cpus;
 	}
 
 	pr_debug("PM: snapshotting memory.\n");
 	in_suspend = 1;
-	if ((error = swsusp_suspend()))
-		goto Done;
+	error = swsusp_suspend();
+	if (error)
+		goto Enable_cpus;
 
 	if (in_suspend) {
+		enable_nonboot_cpus();
+		platform_finish();
 		device_resume();
 		resume_console();
 		pr_debug("PM: writing image.\n");
@@ -188,7 +176,10 @@ int pm_suspend_disk(void)
 	}
 
 	swsusp_free();
- Done:
+ Enable_cpus:
+	enable_nonboot_cpus();
+ Resume_devices:
+	platform_finish();
 	device_resume();
 	resume_console();
  Thaw:
@@ -237,19 +228,28 @@ static int software_resume(void)
 
 	pr_debug("PM: Checking swsusp image.\n");
 
-	if ((error = swsusp_check()))
+	error = swsusp_check();
+	if (error)
 		goto Done;
 
 	pr_debug("PM: Preparing processes for restore.\n");
 
-	if ((error = prepare_processes())) {
+	error = prepare_processes();
+	if (error) {
 		swsusp_close();
 		goto Done;
 	}
 
+	error = platform_prepare();
+	if (error) {
+		swsusp_free();
+		goto Thaw;
+	}
+
 	pr_debug("PM: Reading swsusp image.\n");
 
-	if ((error = swsusp_read())) {
+	error = swsusp_read();
+	if (error) {
 		swsusp_free();
 		goto Thaw;
 	}
@@ -257,21 +257,22 @@ static int software_resume(void)
 	pr_debug("PM: Preparing devices for restore.\n");
 
 	suspend_console();
-	if ((error = device_suspend(PMSG_PRETHAW))) {
-		resume_console();
-		printk("Some devices failed to suspend\n");
-		swsusp_free();
-		goto Thaw;
-	}
+	error = device_suspend(PMSG_PRETHAW);
+	if (error)
+		goto Free;
 
-	mb();
+	error = disable_nonboot_cpus();
+	if (!error)
+		swsusp_resume();
 
-	pr_debug("PM: Restoring saved image.\n");
-	swsusp_resume();
-	pr_debug("PM: Restore failed, recovering.n");
+	enable_nonboot_cpus();
+ Free:
+	swsusp_free();
+	platform_finish();
 	device_resume();
 	resume_console();
  Thaw:
+	printk(KERN_ERR "PM: Restore failed, recovering.\n");
 	unprepare_processes();
  Done:
 	/* For success case, the suspend path will release the lock */
-- 
cgit v1.2.3-70-g09d2


From 259130526c267550bc365d3015917d90667732f1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 10 Feb 2007 01:43:33 -0800
Subject: [PATCH] swsusp: Change code ordering in user.c

Change the ordering of code in kernel/power/user.c so that device_suspend() is
called before disable_nonboot_cpus() and device_resume() is called after
enable_nonboot_cpus().  This is needed to make the userland suspend call
pm_ops->finish() after enable_nonboot_cpus() and before device_resume(), as
indicated by the recent discussion on Linux-PM (cf.
http://lists.osdl.org/pipermail/linux-pm/2006-November/004164.html).

The changes here only affect the userland interface of swsusp.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Greg KH <greg@kroah.com>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/user.c | 92 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 58 insertions(+), 34 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/user.c b/kernel/power/user.c
index f7b7a785a5c..4f217683455 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -122,6 +122,59 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 	return res;
 }
 
+static inline int snapshot_suspend(void)
+{
+	int error;
+
+	mutex_lock(&pm_mutex);
+	/* Free memory before shutting down devices. */
+	error = swsusp_shrink_memory();
+	if (error)
+		goto Finish;
+
+	suspend_console();
+	error = device_suspend(PMSG_FREEZE);
+	if (error)
+		goto Resume_devices;
+
+	error = disable_nonboot_cpus();
+	if (!error) {
+		in_suspend = 1;
+		error = swsusp_suspend();
+	}
+	enable_nonboot_cpus();
+ Resume_devices:
+	device_resume();
+	resume_console();
+ Finish:
+	mutex_unlock(&pm_mutex);
+	return error;
+}
+
+static inline int snapshot_restore(void)
+{
+	int error;
+
+	mutex_lock(&pm_mutex);
+	pm_prepare_console();
+	suspend_console();
+	error = device_suspend(PMSG_PRETHAW);
+	if (error)
+		goto Resume_devices;
+
+	error = disable_nonboot_cpus();
+	if (!error)
+		error = swsusp_resume();
+
+	enable_nonboot_cpus();
+ Resume_devices:
+	device_resume();
+	resume_console();
+	pm_restore_console();
+	mutex_unlock(&pm_mutex);
+	return error;
+}
+
 static int snapshot_ioctl(struct inode *inode, struct file *filp,
                           unsigned int cmd, unsigned long arg)
 {
@@ -145,14 +198,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 		if (data->frozen)
 			break;
 		mutex_lock(&pm_mutex);
-		error = disable_nonboot_cpus();
-		if (!error) {
-			error = freeze_processes();
-			if (error) {
-				thaw_processes();
-				enable_nonboot_cpus();
-				error = -EBUSY;
-			}
+		if (freeze_processes()) {
+			thaw_processes();
+			error = -EBUSY;
 		}
 		mutex_unlock(&pm_mutex);
 		if (!error)
@@ -164,7 +212,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			break;
 		mutex_lock(&pm_mutex);
 		thaw_processes();
-		enable_nonboot_cpus();
 		mutex_unlock(&pm_mutex);
 		data->frozen = 0;
 		break;
@@ -174,20 +221,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			error = -EPERM;
 			break;
 		}
-		mutex_lock(&pm_mutex);
-		/* Free memory before shutting down devices. */
-		error = swsusp_shrink_memory();
-		if (!error) {
-			suspend_console();
-			error = device_suspend(PMSG_FREEZE);
-			if (!error) {
-				in_suspend = 1;
-				error = swsusp_suspend();
-				device_resume();
-			}
-			resume_console();
-		}
-		mutex_unlock(&pm_mutex);
+		error = snapshot_suspend();
 		if (!error)
 			error = put_user(in_suspend, (unsigned int __user *)arg);
 		if (!error)
@@ -201,17 +235,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			error = -EPERM;
 			break;
 		}
-		mutex_lock(&pm_mutex);
-		pm_prepare_console();
-		suspend_console();
-		error = device_suspend(PMSG_PRETHAW);
-		if (!error) {
-			error = swsusp_resume();
-			device_resume();
-		}
-		resume_console();
-		pm_restore_console();
-		mutex_unlock(&pm_mutex);
+		error = snapshot_restore();
 		break;
 
 	case SNAPSHOT_FREE:
-- 
cgit v1.2.3-70-g09d2


From d12c610e08022a1b84d6bd4412c189214d32e713 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 10 Feb 2007 01:43:34 -0800
Subject: [PATCH] swsusp-change-code-ordering-in-userc-sanity

The compiler will do that.  And if it doesn't, we don't want to either ;)

Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Greg KH <greg@kroah.com>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/user.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/user.c b/kernel/power/user.c
index 4f217683455..b70d83d6b16 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -122,7 +122,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 	return res;
 }
 
-static inline int snapshot_suspend(void)
+static int snapshot_suspend(void)
 {
 	int error;
 
@@ -151,7 +151,7 @@ static inline int snapshot_suspend(void)
 	return error;
 }
 
-static inline int snapshot_restore(void)
+static int snapshot_restore(void)
 {
 	int error;
 
-- 
cgit v1.2.3-70-g09d2


From 2b5b09b3b576d7323d8b4244429a83f16dc5446a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 10 Feb 2007 01:43:35 -0800
Subject: [PATCH] swsusp: Change pm_ops handling by userland interface

Make the userland interface of swsusp call pm_ops->finish() after
enable_nonboot_cpus() and before resume_device(), as indicated by the recent
discussion on Linux-PM (cf.
http://lists.osdl.org/pipermail/linux-pm/2006-November/004164.html).

This patch changes the SNAPSHOT_PMOPS ioctl so that its first function,
PMOPS_PREPARE, only sets a switch turning the platform suspend mode on, and
its last function, PMOPS_FINISH, only checks if the platform mode is enabled.
This should allow the older userland tools to work with new kernels without
any modifications.

The changes here only affect the userland interface of swsusp.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Greg KH <greg@kroah.com>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/power/user.c | 71 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 59 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/user.c b/kernel/power/user.c
index b70d83d6b16..dd09efe7df5 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -37,6 +37,7 @@ static struct snapshot_data {
 	int mode;
 	char frozen;
 	char ready;
+	char platform_suspend;
 } snapshot_state;
 
 static atomic_t device_available = ATOMIC_INIT(1);
@@ -66,6 +67,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	data->bitmap = NULL;
 	data->frozen = 0;
 	data->ready = 0;
+	data->platform_suspend = 0;
 
 	return 0;
 }
@@ -122,7 +124,23 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 	return res;
 }
 
-static int snapshot_suspend(void)
+static inline int platform_prepare(void)
+{
+	int error = 0;
+
+	if (pm_ops && pm_ops->prepare)
+		error = pm_ops->prepare(PM_SUSPEND_DISK);
+
+	return error;
+}
+
+static inline void platform_finish(void)
+{
+	if (pm_ops && pm_ops->finish)
+		pm_ops->finish(PM_SUSPEND_DISK);
+}
+
+static inline int snapshot_suspend(int platform_suspend)
 {
 	int error;
 
@@ -132,6 +150,11 @@ static int snapshot_suspend(void)
 	if (error)
 		goto Finish;
 
+	if (platform_suspend) {
+		error = platform_prepare();
+		if (error)
+			goto Finish;
+	}
 	suspend_console();
 	error = device_suspend(PMSG_FREEZE);
 	if (error)
@@ -144,6 +167,9 @@ static int snapshot_suspend(void)
 	}
 	enable_nonboot_cpus();
  Resume_devices:
+	if (platform_suspend)
+		platform_finish();
+
 	device_resume();
 	resume_console();
  Finish:
@@ -151,12 +177,17 @@ static int snapshot_suspend(void)
 	return error;
 }
 
-static int snapshot_restore(void)
+static inline int snapshot_restore(int platform_suspend)
 {
 	int error;
 
 	mutex_lock(&pm_mutex);
 	pm_prepare_console();
+	if (platform_suspend) {
+		error = platform_prepare();
+		if (error)
+			goto Finish;
+	}
 	suspend_console();
 	error = device_suspend(PMSG_PRETHAW);
 	if (error)
@@ -168,8 +199,12 @@ static int snapshot_restore(void)
 
 	enable_nonboot_cpus();
  Resume_devices:
+	if (platform_suspend)
+		platform_finish();
+
 	device_resume();
 	resume_console();
+ Finish:
 	pm_restore_console();
 	mutex_unlock(&pm_mutex);
 	return error;
@@ -221,7 +256,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			error = -EPERM;
 			break;
 		}
-		error = snapshot_suspend();
+		error = snapshot_suspend(data->platform_suspend);
 		if (!error)
 			error = put_user(in_suspend, (unsigned int __user *)arg);
 		if (!error)
@@ -235,7 +270,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			error = -EPERM;
 			break;
 		}
-		error = snapshot_restore();
+		error = snapshot_restore(data->platform_suspend);
 		break;
 
 	case SNAPSHOT_FREE:
@@ -306,6 +341,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 		break;
 
 	case SNAPSHOT_S2RAM:
+		if (!pm_ops) {
+			error = -ENOSYS;
+			break;
+		}
+
 		if (!data->frozen) {
 			error = -EPERM;
 			break;
@@ -343,28 +383,35 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 		break;
 
 	case SNAPSHOT_PMOPS:
+		error = -EINVAL;
+
 		switch (arg) {
 
 		case PMOPS_PREPARE:
-			if (pm_ops->prepare) {
-				error = pm_ops->prepare(PM_SUSPEND_DISK);
+			if (pm_ops && pm_ops->enter) {
+				data->platform_suspend = 1;
+				error = 0;
+			} else {
+				error = -ENOSYS;
 			}
 			break;
 
 		case PMOPS_ENTER:
-			kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
-			error = pm_ops->enter(PM_SUSPEND_DISK);
+			if (data->platform_suspend) {
+				kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
+				error = pm_ops->enter(PM_SUSPEND_DISK);
+				error = 0;
+			}
 			break;
 
 		case PMOPS_FINISH:
-			if (pm_ops && pm_ops->finish) {
-				pm_ops->finish(PM_SUSPEND_DISK);
-			}
+			if (data->platform_suspend)
+				error = 0;
+
 			break;
 
 		default:
 			printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg);
-			error = -EINVAL;
 
 		}
 		break;
-- 
cgit v1.2.3-70-g09d2


From dc29a3657b52ac687970d81d7194cf4238702124 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Sat, 10 Feb 2007 01:43:43 -0800
Subject: [PATCH] kernel/time/clocksource.c needs struct task_struct on m68k

kernel/time/clocksource.c needs struct task_struct on m68k.

Because it uses spin_unlock_irq(), which, on m68k, uses hardirq_count(), which
uses preempt_count(), which needs to dereference struct task_struct, we
have to include sched.h. Because it would cause a loop inclusion, we
cannot include sched.h in any other of asm-m68k/system.h,
linux/thread_info.h, linux/hardirq.h, which leaves this ugly include in
a C file as the only simple solution.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/time/clocksource.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 22504afc0d3..d9ef176c4e0 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -28,6 +28,7 @@
 #include <linux/sysdev.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
 
 /* XXX - Would like a better way for initializing curr_clocksource */
 extern struct clocksource clocksource_jiffies;
-- 
cgit v1.2.3-70-g09d2


From 3ee75ac3c0f4904633322b7d9b111566fbc4a7d3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@openvz.org>
Date: Sat, 10 Feb 2007 01:44:39 -0800
Subject: [PATCH] sysctl_{,ms_}jiffies: fix oldlen semantics

currently it's
1) if *oldlenp == 0,
	don't writeback anything

2) if *oldlenp >= table->maxlen,
	don't writeback more than table->maxlen bytes and rewrite *oldlenp
	don't look at underlying type granularity

3) if 0 < *oldlenp < table->maxlen,
		*cough*
	string sysctls don't writeback more than *oldlenp bytes.
	OK, that's because sizeof(char) == 1

	int sysctls writeback anything in (0, table->maxlen] range
	Though accept integers divisible by sizeof(int) for writing.

sysctl_jiffies and sysctl_ms_jiffies don't writeback anything but
sizeof(int), which violates 1) and 2).

So, make sysctl_jiffies and sysctl_ms_jiffies accept
a) *oldlenp == 0, not doing writeback
b) *oldlenp >= sizeof(int), writing one integer.

-EINVAL still returned for *oldlenp == 1, 2, 3.

Signed-off-by: Alexey Dobriyan <adobriyan@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 44 ++++++++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 41bbba1a15d..16ef870fa75 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2553,17 +2553,23 @@ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
-	if (oldval) {
+	if (oldval && oldlenp) {
 		size_t olen;
-		if (oldlenp) { 
-			if (get_user(olen, oldlenp))
+
+		if (get_user(olen, oldlenp))
+			return -EFAULT;
+		if (olen) {
+			int val;
+
+			if (olen < sizeof(int))
+				return -EINVAL;
+
+			val = *(int *)(table->data) / HZ;
+			if (put_user(val, (int __user *)oldval))
+				return -EFAULT;
+			if (put_user(sizeof(int), oldlenp))
 				return -EFAULT;
-			if (olen!=sizeof(int))
-				return -EINVAL; 
 		}
-		if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) ||
-		    (oldlenp && put_user(sizeof(int),oldlenp)))
-			return -EFAULT;
 	}
 	if (newval && newlen) { 
 		int new;
@@ -2581,17 +2587,23 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
-	if (oldval) {
+	if (oldval && oldlenp) {
 		size_t olen;
-		if (oldlenp) { 
-			if (get_user(olen, oldlenp))
+
+		if (get_user(olen, oldlenp))
+			return -EFAULT;
+		if (olen) {
+			int val;
+
+			if (olen < sizeof(int))
+				return -EINVAL;
+
+			val = jiffies_to_msecs(*(int *)(table->data));
+			if (put_user(val, (int __user *)oldval))
+				return -EFAULT;
+			if (put_user(sizeof(int), oldlenp))
 				return -EFAULT;
-			if (olen!=sizeof(int))
-				return -EINVAL; 
 		}
-		if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) ||
-		    (oldlenp && put_user(sizeof(int),oldlenp)))
-			return -EFAULT;
 	}
 	if (newval && newlen) { 
 		int new;
-- 
cgit v1.2.3-70-g09d2


From 0c12b51712ced2c0d89a8ec3d546ed810f86d33e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sat, 10 Feb 2007 01:44:56 -0800
Subject: [PATCH] kill_pid_info: kill acquired_tasklist_lock

Kill acquired_tasklist_lock, sig_needs_tasklist() is very cheap nowadays.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 5630255d2e2..ea4632bd40a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1119,19 +1119,18 @@ kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
 {
 	int error;
-	int acquired_tasklist_lock = 0;
 	struct task_struct *p;
 
 	rcu_read_lock();
-	if (unlikely(sig_needs_tasklist(sig))) {
+	if (unlikely(sig_needs_tasklist(sig)))
 		read_lock(&tasklist_lock);
-		acquired_tasklist_lock = 1;
-	}
+
 	p = pid_task(pid, PIDTYPE_PID);
 	error = -ESRCH;
 	if (p)
 		error = group_send_sig_info(sig, info, p);
-	if (unlikely(acquired_tasklist_lock))
+
+	if (unlikely(sig_needs_tasklist(sig)))
 		read_unlock(&tasklist_lock);
 	rcu_read_unlock();
 	return error;
-- 
cgit v1.2.3-70-g09d2


From 381a229209aa6f7f72375797b7bcfcfe2ae6fcbb Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@o2.pl>
Date: Sat, 10 Feb 2007 01:44:58 -0800
Subject: [PATCH] lockdep: more unlock-on-error fixes

- returns after DEBUG_LOCKS_WARN_ON added in 3 places

- debug_locks checking after lookup_chain_cache() added in
  __lock_acquire()

- locking for testing and changing global variable max_lockdep_depth
  added in __lock_acquire()

From: Ingo Molnar <mingo@elte.hu>

My __acquire_lock() cleanup introduced a locking bug: on SMP systems we'd
release a non-owned graph lock.  Fix this by moving the graph unlock back,
and by leaving the max_lockdep_depth variable update possibly racy.  (we
dont care, it's just statistics)

Also add some minimal debugging code to graph_unlock()/graph_lock(),
which caught this locking bug.

Signed-off-by: Jarek Poplawski <jarkao2@o2.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/lockdep.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 509efd49540..2d616f4d853 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -70,6 +70,9 @@ static int graph_lock(void)
 
 static inline int graph_unlock(void)
 {
+	if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
+		return DEBUG_LOCKS_WARN_ON(1);
+
 	__raw_spin_unlock(&lockdep_lock);
 	return 0;
 }
@@ -712,6 +715,9 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
 	struct lock_list *entry;
 	int ret;
 
+	if (!__raw_spin_is_locked(&lockdep_lock))
+		return DEBUG_LOCKS_WARN_ON(1);
+
 	if (depth > max_recursion_depth)
 		max_recursion_depth = depth;
 	if (depth >= RECURSION_LIMIT)
@@ -1293,7 +1299,8 @@ out_unlock_set:
 	if (!subclass || force)
 		lock->class_cache = class;
 
-	DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
+	if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass))
+		return NULL;
 
 	return class;
 }
@@ -1308,7 +1315,8 @@ static inline int lookup_chain_cache(u64 chain_key, struct lock_class *class)
 	struct list_head *hash_head = chainhashentry(chain_key);
 	struct lock_chain *chain;
 
-	DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return 0;
 	/*
 	 * We can walk it lock-free, because entries only get added
 	 * to the hash:
@@ -1394,7 +1402,9 @@ static void check_chain_key(struct task_struct *curr)
 			return;
 		}
 		id = hlock->class - lock_classes;
-		DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS);
+		if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+			return;
+
 		if (prev_hlock && (prev_hlock->irq_context !=
 							hlock->irq_context))
 			chain_key = 0;
@@ -2205,7 +2215,11 @@ out_calc_hash:
 			if (!check_prevs_add(curr, hlock))
 				return 0;
 		graph_unlock();
-	}
+	} else
+		/* after lookup_chain_cache(): */
+		if (unlikely(!debug_locks))
+			return 0;
+
 	curr->lockdep_depth++;
 	check_chain_key(curr);
 	if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
@@ -2214,6 +2228,7 @@ out_calc_hash:
 		printk("turning off the locking correctness validator.\n");
 		return 0;
 	}
+
 	if (unlikely(curr->lockdep_depth > max_lockdep_depth))
 		max_lockdep_depth = curr->lockdep_depth;
 
-- 
cgit v1.2.3-70-g09d2


From 068135e63518314d4efd711142f674ad0841599e Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Sat, 10 Feb 2007 01:44:59 -0800
Subject: [PATCH] lockdep: add graph depth information to /proc/lockdep

Generate locking graph information into /proc/lockdep, for lock hierarchy
documentation and visualization purposes.

sample output:

 c089fd5c OPS:     138 FD:   14 BD:    1 --..: &tty->termios_mutex
  -> [c07a3430] tty_ldisc_lock
  -> [c07a37f0] &port_lock_key
  -> [c07afdc0] &rq->rq_lock_key#2

The lock classes listed are all the first-hop lock dependencies that
lockdep has seen so far.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h |  1 +
 kernel/lockdep.c        | 19 ++++++++++++-------
 kernel/lockdep_proc.c   | 41 +++++++++++++++++++++++++++++------------
 3 files changed, 42 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index ea097dddc44..7e1160dde5e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -132,6 +132,7 @@ struct lock_list {
 	struct list_head		entry;
 	struct lock_class		*class;
 	struct stack_trace		trace;
+	int				distance;
 };
 
 /*
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 2d616f4d853..592c576d77a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -490,7 +490,7 @@ static void print_lock_dependencies(struct lock_class *class, int depth)
  * Add a new dependency to the head of the list:
  */
 static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
-			    struct list_head *head, unsigned long ip)
+			    struct list_head *head, unsigned long ip, int distance)
 {
 	struct lock_list *entry;
 	/*
@@ -502,6 +502,7 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
 		return 0;
 
 	entry->class = this;
+	entry->distance = distance;
 	if (!save_trace(&entry->trace))
 		return 0;
 
@@ -906,7 +907,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-	       struct held_lock *next)
+	       struct held_lock *next, int distance)
 {
 	struct lock_list *entry;
 	int ret;
@@ -984,8 +985,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	 *  L2 added to its dependency list, due to the first chain.)
 	 */
 	list_for_each_entry(entry, &prev->class->locks_after, entry) {
-		if (entry->class == next->class)
+		if (entry->class == next->class) {
+			if (distance == 1)
+				entry->distance = 1;
 			return 2;
+		}
 	}
 
 	/*
@@ -993,12 +997,13 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	 * to the previous lock's dependency list:
 	 */
 	ret = add_lock_to_list(prev->class, next->class,
-			       &prev->class->locks_after, next->acquire_ip);
+			       &prev->class->locks_after, next->acquire_ip, distance);
+
 	if (!ret)
 		return 0;
 
 	ret = add_lock_to_list(next->class, prev->class,
-			       &next->class->locks_before, next->acquire_ip);
+			       &next->class->locks_before, next->acquire_ip, distance);
 	if (!ret)
 		return 0;
 
@@ -1046,13 +1051,14 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 		goto out_bug;
 
 	for (;;) {
+		int distance = curr->lockdep_depth - depth + 1;
 		hlock = curr->held_locks + depth-1;
 		/*
 		 * Only non-recursive-read entries get new dependencies
 		 * added:
 		 */
 		if (hlock->read != 2) {
-			if (!check_prev_add(curr, hlock, next))
+			if (!check_prev_add(curr, hlock, next, distance))
 				return 0;
 			/*
 			 * Stop after the first non-trylock entry,
@@ -2779,4 +2785,3 @@ void debug_show_held_locks(struct task_struct *task)
 }
 
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
-
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index b554b40a4aa..57a547a2da3 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -77,12 +77,29 @@ static unsigned long count_backward_deps(struct lock_class *class)
 	return ret;
 }
 
+static void print_name(struct seq_file *m, struct lock_class *class)
+{
+	char str[128];
+	const char *name = class->name;
+
+	if (!name) {
+		name = __get_key_name(class->key, str);
+		seq_printf(m, "%s", name);
+	} else{
+		seq_printf(m, "%s", name);
+		if (class->name_version > 1)
+			seq_printf(m, "#%d", class->name_version);
+		if (class->subclass)
+			seq_printf(m, "/%d", class->subclass);
+	}
+}
+
 static int l_show(struct seq_file *m, void *v)
 {
 	unsigned long nr_forward_deps, nr_backward_deps;
 	struct lock_class *class = m->private;
-	char str[128], c1, c2, c3, c4;
-	const char *name;
+	struct lock_list *entry;
+	char c1, c2, c3, c4;
 
 	seq_printf(m, "%p", class->key);
 #ifdef CONFIG_DEBUG_LOCKDEP
@@ -97,16 +114,16 @@ static int l_show(struct seq_file *m, void *v)
 	get_usage_chars(class, &c1, &c2, &c3, &c4);
 	seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
 
-	name = class->name;
-	if (!name) {
-		name = __get_key_name(class->key, str);
-		seq_printf(m, ": %s", name);
-	} else{
-		seq_printf(m, ": %s", name);
-		if (class->name_version > 1)
-			seq_printf(m, "#%d", class->name_version);
-		if (class->subclass)
-			seq_printf(m, "/%d", class->subclass);
+	seq_printf(m, ": ");
+	print_name(m, class);
+	seq_puts(m, "\n");
+
+	list_for_each_entry(entry, &class->locks_after, entry) {
+		if (entry->distance == 1) {
+			seq_printf(m, " -> [%p] ", entry->class);
+			print_name(m, entry->class);
+			seq_puts(m, "\n");
+		}
 	}
 	seq_puts(m, "\n");
 
-- 
cgit v1.2.3-70-g09d2


From c376222960ae91d5ffb9197ee36771aaed1d9f90 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sat, 10 Feb 2007 01:45:03 -0800
Subject: [PATCH] Transform kmem_cache_alloc()+memset(0) ->
 kmem_cache_zalloc().

Replace appropriate pairs of "kmem_cache_alloc()" + "memset(0)" with the
corresponding "kmem_cache_zalloc()" call.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Roland McGrath <roland@redhat.com>
Cc: James Bottomley <James.Bottomley@steeleye.com>
Cc: Greg KH <greg@kroah.com>
Acked-by: Joel Becker <Joel.Becker@oracle.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Jan Kara <jack@ucw.cz>
Cc: Michael Halcrow <mhalcrow@us.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Cc: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/ia32/binfmt_elf32.c          | 13 ++++---------
 arch/ia64/kernel/perfmon.c             |  3 +--
 arch/ia64/mm/init.c                    |  6 ++----
 arch/x86_64/ia32/ia32_binfmt.c         |  4 +---
 drivers/infiniband/hw/ehca/ehca_cq.c   |  3 +--
 drivers/infiniband/hw/ehca/ehca_mrmw.c |  6 ++----
 drivers/infiniband/hw/ehca/ehca_pd.c   |  3 +--
 drivers/infiniband/hw/ehca/ehca_qp.c   |  3 +--
 drivers/scsi/aic94xx/aic94xx_hwi.c     |  3 +--
 drivers/scsi/scsi_lib.c                |  3 +--
 drivers/usb/host/hc_crisv10.c          |  3 +--
 drivers/usb/host/uhci-q.c              |  4 +---
 fs/aio.c                               |  3 +--
 fs/configfs/dir.c                      |  3 +--
 fs/dlm/memory.c                        |  4 +---
 fs/dquot.c                             |  3 +--
 fs/ecryptfs/crypto.c                   |  4 ++--
 fs/ecryptfs/file.c                     |  3 +--
 fs/ecryptfs/inode.c                    |  5 ++---
 fs/ecryptfs/keystore.c                 |  4 +---
 fs/ecryptfs/main.c                     |  8 ++------
 fs/exec.c                              |  4 +---
 fs/gfs2/meta_io.c                      |  3 +--
 fs/namespace.c                         |  3 +--
 fs/smbfs/request.c                     |  3 +--
 fs/sysfs/dir.c                         |  3 +--
 include/scsi/libsas.h                  |  3 +--
 kernel/posix-timers.c                  |  3 +--
 net/core/dst.c                         |  3 +--
 net/core/neighbour.c                   |  4 +---
 net/decnet/dn_table.c                  |  4 +---
 net/ipv4/ipmr.c                        |  6 ++----
 net/ipv4/ipvs/ip_vs_conn.c             |  3 +--
 net/ipv4/netfilter/ip_conntrack_core.c |  3 +--
 net/ipv6/ip6_fib.c                     |  3 +--
 net/sctp/sm_make_chunk.c               |  3 +--
 security/selinux/avc.c                 |  3 +--
 security/selinux/hooks.c               |  3 +--
 security/selinux/ss/avtab.c            |  3 +--
 39 files changed, 48 insertions(+), 103 deletions(-)

(limited to 'kernel')

diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index 578737ec762..c05bda66236 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -91,9 +91,8 @@ ia64_elf32_init (struct pt_regs *regs)
 	 * it with privilege level 3 because the IVE uses non-privileged accesses to these
 	 * tables.  IA-32 segmentation is used to protect against IA-32 accesses to them.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
-		memset(vma, 0, sizeof(*vma));
 		vma->vm_mm = current->mm;
 		vma->vm_start = IA32_GDT_OFFSET;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
@@ -117,9 +116,8 @@ ia64_elf32_init (struct pt_regs *regs)
 	 * code is locked in specific gate page, which is pointed by pretcode
 	 * when setup_frame_ia32
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
-		memset(vma, 0, sizeof(*vma));
 		vma->vm_mm = current->mm;
 		vma->vm_start = IA32_GATE_OFFSET;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
@@ -142,9 +140,8 @@ ia64_elf32_init (struct pt_regs *regs)
 	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
 	 * until a task modifies them via modify_ldt().
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
-		memset(vma, 0, sizeof(*vma));
 		vma->vm_mm = current->mm;
 		vma->vm_start = IA32_LDT_OFFSET;
 		vma->vm_end = vma->vm_start + PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
@@ -214,12 +211,10 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!mpnt)
 		return -ENOMEM;
 
-	memset(mpnt, 0, sizeof(*mpnt));
-
 	down_write(&current->mm->mmap_sem);
 	{
 		mpnt->vm_mm = current->mm;
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index aa94f60fa8e..86e144f321f 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2301,12 +2301,11 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
 
 	/* allocate vma */
-	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
 	}
-	memset(vma, 0, sizeof(*vma));
 
 	/*
 	 * partially initialize the vma for the sampling buffer
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 760dda4d5b6..f225dd72968 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -176,9 +176,8 @@ ia64_init_addr_space (void)
 	 * the problem.  When the process attempts to write to the register backing store
 	 * for the first time, it will get a SEGFAULT in this case.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
-		memset(vma, 0, sizeof(*vma));
 		vma->vm_mm = current->mm;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
@@ -195,9 +194,8 @@ ia64_init_addr_space (void)
 
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
-		vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+		vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 		if (vma) {
-			memset(vma, 0, sizeof(*vma));
 			vma->vm_mm = current->mm;
 			vma->vm_end = PAGE_SIZE;
 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 5ce0bd486bb..6efe04f3cbc 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -300,12 +300,10 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!mpnt) 
 		return -ENOMEM; 
 
-	memset(mpnt, 0, sizeof(*mpnt));
-
 	down_write(&mm->mmap_sem);
 	{
 		mpnt->vm_mm = mm;
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 9291a86ca05..6ebfa27e4e1 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -134,14 +134,13 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
 	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
 		return ERR_PTR(-EINVAL);
 
-	my_cq = kmem_cache_alloc(cq_cache, GFP_KERNEL);
+	my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
 	if (!my_cq) {
 		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
 			 device);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memset(my_cq, 0, sizeof(struct ehca_cq));
 	memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
 
 	spin_lock_init(&my_cq->spinlock);
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index cfb362a1029..d22ab563633 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -53,9 +53,8 @@ static struct ehca_mr *ehca_mr_new(void)
 {
 	struct ehca_mr *me;
 
-	me = kmem_cache_alloc(mr_cache, GFP_KERNEL);
+	me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
 	if (me) {
-		memset(me, 0, sizeof(struct ehca_mr));
 		spin_lock_init(&me->mrlock);
 	} else
 		ehca_gen_err("alloc failed");
@@ -72,9 +71,8 @@ static struct ehca_mw *ehca_mw_new(void)
 {
 	struct ehca_mw *me;
 
-	me = kmem_cache_alloc(mw_cache, GFP_KERNEL);
+	me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
 	if (me) {
-		memset(me, 0, sizeof(struct ehca_mw));
 		spin_lock_init(&me->mwlock);
 	} else
 		ehca_gen_err("alloc failed");
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
index d5345e5b3cd..79d0591a804 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -50,14 +50,13 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
 {
 	struct ehca_pd *pd;
 
-	pd = kmem_cache_alloc(pd_cache, GFP_KERNEL);
+	pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
 	if (!pd) {
 		ehca_err(device, "device=%p context=%p out of memory",
 			 device, context);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memset(pd, 0, sizeof(struct ehca_pd));
 	pd->ownpid = current->tgid;
 
 	/*
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 95efef921f1..df0516f2437 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -450,13 +450,12 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 	if (pd->uobject && udata)
 		context = pd->uobject->context;
 
-	my_qp = kmem_cache_alloc(qp_cache, GFP_KERNEL);
+	my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
 	if (!my_qp) {
 		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memset(my_qp, 0, sizeof(struct ehca_qp));
 	memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
 	spin_lock_init(&my_qp->spinlock_s);
 	spin_lock_init(&my_qp->spinlock_r);
diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c
index da94e126ca8..0cd7eed9196 100644
--- a/drivers/scsi/aic94xx/aic94xx_hwi.c
+++ b/drivers/scsi/aic94xx/aic94xx_hwi.c
@@ -1052,10 +1052,9 @@ static inline struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha,
 	struct asd_ascb *ascb;
 	unsigned long flags;
 
-	ascb = kmem_cache_alloc(asd_ascb_cache, gfp_flags);
+	ascb = kmem_cache_zalloc(asd_ascb_cache, gfp_flags);
 
 	if (ascb) {
-		memset(ascb, 0, sizeof(*ascb));
 		ascb->dma_scb.size = sizeof(struct scb);
 		ascb->dma_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool,
 						     gfp_flags,
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f02f48a882a..a1cd6e6a292 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -388,10 +388,9 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
 	int err = 0;
 	int write = (data_direction == DMA_TO_DEVICE);
 
-	sioc = kmem_cache_alloc(scsi_io_context_cache, gfp);
+	sioc = kmem_cache_zalloc(scsi_io_context_cache, gfp);
 	if (!sioc)
 		return DRIVER_ERROR << 24;
-	memset(sioc, 0, sizeof(*sioc));
 
 	req = blk_get_request(sdev->request_queue, write, gfp);
 	if (!req)
diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c
index 282d82efc0b..f0ffb8907f2 100644
--- a/drivers/usb/host/hc_crisv10.c
+++ b/drivers/usb/host/hc_crisv10.c
@@ -2163,9 +2163,8 @@ static void etrax_usb_add_to_bulk_sb_list(struct urb *urb, int epid)
 
 	maxlen = usb_maxpacket(urb->dev, urb->pipe, usb_pipeout(urb->pipe));
 
-	sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, SLAB_FLAG);
+	sb_desc = kmem_cache_zalloc(usb_desc_cache, SLAB_FLAG);
 	assert(sb_desc != NULL);
-	memset(sb_desc, 0, sizeof(USB_SB_Desc_t));
 
 
 	if (usb_pipeout(urb->pipe)) {
diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c
index 2cbb239e63f..68e66b33e72 100644
--- a/drivers/usb/host/uhci-q.c
+++ b/drivers/usb/host/uhci-q.c
@@ -624,12 +624,10 @@ static inline struct urb_priv *uhci_alloc_urb_priv(struct uhci_hcd *uhci,
 {
 	struct urb_priv *urbp;
 
-	urbp = kmem_cache_alloc(uhci_up_cachep, GFP_ATOMIC);
+	urbp = kmem_cache_zalloc(uhci_up_cachep, GFP_ATOMIC);
 	if (!urbp)
 		return NULL;
 
-	memset((void *)urbp, 0, sizeof(*urbp));
-
 	urbp->urb = urb;
 	urb->hcpriv = urbp;
 	
diff --git a/fs/aio.c b/fs/aio.c
index ee662589e5e..0b4ee0a5c83 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -211,11 +211,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 	if ((unsigned long)nr_events > aio_max_nr)
 		return ERR_PTR(-EAGAIN);
 
-	ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
+	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
 	if (!ctx)
 		return ERR_PTR(-ENOMEM);
 
-	memset(ctx, 0, sizeof(*ctx));
 	ctx->max_reqs = nr_events;
 	mm = ctx->mm = current->mm;
 	atomic_inc(&mm->mm_count);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 1814ba44680..9371ee20995 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -72,11 +72,10 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
 {
 	struct configfs_dirent * sd;
 
-	sd = kmem_cache_alloc(configfs_dir_cachep, GFP_KERNEL);
+	sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL);
 	if (!sd)
 		return NULL;
 
-	memset(sd, 0, sizeof(*sd));
 	atomic_set(&sd->s_count, 1);
 	INIT_LIST_HEAD(&sd->s_links);
 	INIT_LIST_HEAD(&sd->s_children);
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 5352b03ff5a..f858fef6e41 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -76,9 +76,7 @@ struct dlm_lkb *allocate_lkb(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb;
 
-	lkb = kmem_cache_alloc(lkb_cache, GFP_KERNEL);
-	if (lkb)
-		memset(lkb, 0, sizeof(*lkb));
+	lkb = kmem_cache_zalloc(lkb_cache, GFP_KERNEL);
 	return lkb;
 }
 
diff --git a/fs/dquot.c b/fs/dquot.c
index 0952cc474d9..a561fb29e20 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -600,11 +600,10 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 {
 	struct dquot *dquot;
 
-	dquot = kmem_cache_alloc(dquot_cachep, GFP_NOFS);
+	dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
 	if(!dquot)
 		return NODQUOT;
 
-	memset((caddr_t)dquot, 0, sizeof(struct dquot));
 	mutex_init(&dquot->dq_lock);
 	INIT_LIST_HEAD(&dquot->dq_free);
 	INIT_LIST_HEAD(&dquot->dq_inuse);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a86a55ccf87..75bbfae5508 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1332,13 +1332,13 @@ int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
 		goto out;
 	}
 	/* Released in this function */
-	page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, GFP_USER);
+	page_virt = kmem_cache_zalloc(ecryptfs_header_cache_0, GFP_USER);
 	if (!page_virt) {
 		ecryptfs_printk(KERN_ERR, "Out of memory\n");
 		rc = -ENOMEM;
 		goto out;
 	}
-	memset(page_virt, 0, PAGE_CACHE_SIZE);
+
 	rc = ecryptfs_write_headers_virt(page_virt, crypt_stat,
 					 ecryptfs_dentry);
 	if (unlikely(rc)) {
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c5a2e5298f1..779c3477d93 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -251,7 +251,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	int lower_flags;
 
 	/* Released in ecryptfs_release or end of function if failure */
-	file_info = kmem_cache_alloc(ecryptfs_file_info_cache, GFP_KERNEL);
+	file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
 	ecryptfs_set_file_private(file, file_info);
 	if (!file_info) {
 		ecryptfs_printk(KERN_ERR,
@@ -259,7 +259,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 		rc = -ENOMEM;
 		goto out;
 	}
-	memset(file_info, 0, sizeof(*file_info));
 	lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
 	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
 	mount_crypt_stat = &ecryptfs_superblock_to_private(
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 11f5e5076ae..d4f02f3e18d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -361,8 +361,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 	}
 	/* Released in this function */
-	page_virt =
-	    (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
+	page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2,
 				     GFP_USER);
 	if (!page_virt) {
 		rc = -ENOMEM;
@@ -370,7 +369,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 				"Cannot ecryptfs_kmalloc a page\n");
 		goto out_dput;
 	}
-	memset(page_virt, 0, PAGE_CACHE_SIZE);
+
 	rc = ecryptfs_read_header_region(page_virt, lower_dentry, nd->mnt);
 	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
 	if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED))
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 745c0f1bfbb..80bccd5ff8e 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -207,14 +207,12 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
 	/* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
 	 * at end of function upon failure */
 	auth_tok_list_item =
-	    kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
+	    kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
 	if (!auth_tok_list_item) {
 		ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
 		rc = -ENOMEM;
 		goto out;
 	}
-	memset(auth_tok_list_item, 0,
-	       sizeof(struct ecryptfs_auth_tok_list_item));
 	(*new_auth_tok) = &auth_tok_list_item->auth_tok;
 
 	/* check for body size - one to two bytes */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d0541ae8fab..fe41ab1566e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -378,15 +378,13 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 
 	/* Released in ecryptfs_put_super() */
 	ecryptfs_set_superblock_private(sb,
-					kmem_cache_alloc(ecryptfs_sb_info_cache,
+					kmem_cache_zalloc(ecryptfs_sb_info_cache,
 							 GFP_KERNEL));
 	if (!ecryptfs_superblock_to_private(sb)) {
 		ecryptfs_printk(KERN_WARNING, "Out of memory\n");
 		rc = -ENOMEM;
 		goto out;
 	}
-	memset(ecryptfs_superblock_to_private(sb), 0,
-	       sizeof(struct ecryptfs_sb_info));
 	sb->s_op = &ecryptfs_sops;
 	/* Released through deactivate_super(sb) from get_sb_nodev */
 	sb->s_root = d_alloc(NULL, &(const struct qstr) {
@@ -402,7 +400,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 	/* Released in d_release when dput(sb->s_root) is called */
 	/* through deactivate_super(sb) from get_sb_nodev() */
 	ecryptfs_set_dentry_private(sb->s_root,
-				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
+				    kmem_cache_zalloc(ecryptfs_dentry_info_cache,
 						     GFP_KERNEL));
 	if (!ecryptfs_dentry_to_private(sb->s_root)) {
 		ecryptfs_printk(KERN_ERR,
@@ -410,8 +408,6 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 		rc = -ENOMEM;
 		goto out;
 	}
-	memset(ecryptfs_dentry_to_private(sb->s_root), 0,
-	       sizeof(struct ecryptfs_dentry_info));
 	rc = 0;
 out:
 	/* Should be able to rely on deactivate_super called from
diff --git a/fs/exec.c b/fs/exec.c
index 11fe93f7363..7e36c6f6f53 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -405,12 +405,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!mpnt)
 		return -ENOMEM;
 
-	memset(mpnt, 0, sizeof(*mpnt));
-
 	down_write(&mm->mmap_sem);
 	{
 		mpnt->vm_mm = mm;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0e34d991897..e62d4f620c5 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -282,8 +282,7 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
 		return;
 	}
 
-	bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
-	memset(bd, 0, sizeof(struct gfs2_bufdata));
+	bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
 	bd->bd_bh = bh;
 	bd->bd_gl = gl;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 5ef336c1103..fd999cab7b5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -53,9 +53,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
-	struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
+	struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
 	if (mnt) {
-		memset(mnt, 0, sizeof(struct vfsmount));
 		atomic_set(&mnt->mnt_count, 1);
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index a4bcae8a9af..42261dbdf60 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -61,7 +61,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
 	struct smb_request *req;
 	unsigned char *buf = NULL;
 
-	req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
+	req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
 	VERBOSE("allocating request: %p\n", req);
 	if (!req)
 		goto out;
@@ -74,7 +74,6 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
 		}
 	}
 
-	memset(req, 0, sizeof(struct smb_request));
 	req->rq_buffer = buf;
 	req->rq_bufsize = bufsize;
 	req->rq_server = server;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 9dcdf556c99..9e95e7abaf6 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -37,11 +37,10 @@ static struct sysfs_dirent * __sysfs_new_dirent(void * element)
 {
 	struct sysfs_dirent * sd;
 
-	sd = kmem_cache_alloc(sysfs_dir_cachep, GFP_KERNEL);
+	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
 	if (!sd)
 		return NULL;
 
-	memset(sd, 0, sizeof(*sd));
 	atomic_set(&sd->s_count, 1);
 	atomic_set(&sd->s_event, 1);
 	INIT_LIST_HEAD(&sd->s_children);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 0c775fceb67..0689e004a28 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -558,10 +558,9 @@ struct sas_task {
 static inline struct sas_task *sas_alloc_task(gfp_t flags)
 {
 	extern struct kmem_cache *sas_task_cache;
-	struct sas_task *task = kmem_cache_alloc(sas_task_cache, flags);
+	struct sas_task *task = kmem_cache_zalloc(sas_task_cache, flags);
 
 	if (task) {
-		memset(task, 0, sizeof(*task));
 		INIT_LIST_HEAD(&task->list);
 		spin_lock_init(&task->task_state_lock);
 		task->task_state_flags = SAS_TASK_STATE_PENDING;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5fe87de10ff..a1bf6161783 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -399,10 +399,9 @@ EXPORT_SYMBOL_GPL(register_posix_clock);
 static struct k_itimer * alloc_posix_timer(void)
 {
 	struct k_itimer *tmr;
-	tmr = kmem_cache_alloc(posix_timers_cache, GFP_KERNEL);
+	tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
 	if (!tmr)
 		return tmr;
-	memset(tmr, 0, sizeof (struct k_itimer));
 	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
 		kmem_cache_free(posix_timers_cache, tmr);
 		tmr = NULL;
diff --git a/net/core/dst.c b/net/core/dst.c
index 1a53fb39b7e..f9eace78d35 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -132,10 +132,9 @@ void * dst_alloc(struct dst_ops * ops)
 		if (ops->gc())
 			return NULL;
 	}
-	dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
+	dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
 	if (!dst)
 		return NULL;
-	memset(dst, 0, ops->entry_size);
 	atomic_set(&dst->__refcnt, 0);
 	dst->ops = ops;
 	dst->lastuse = jiffies;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 054d46493d2..efb673ad185 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -251,12 +251,10 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 			goto out_entries;
 	}
 
-	n = kmem_cache_alloc(tbl->kmem_cachep, GFP_ATOMIC);
+	n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
 	if (!n)
 		goto out_entries;
 
-	memset(n, 0, tbl->entry_size);
-
 	skb_queue_head_init(&n->arp_queue);
 	rwlock_init(&n->lock);
 	n->updated	  = n->used = now;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index c1f0cc1b1c6..720501e1ae2 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -593,12 +593,10 @@ create:
 
 replace:
 	err = -ENOBUFS;
-	new_f = kmem_cache_alloc(dn_hash_kmem, GFP_KERNEL);
+	new_f = kmem_cache_zalloc(dn_hash_kmem, GFP_KERNEL);
 	if (new_f == NULL)
 		goto out;
 
-	memset(new_f, 0, sizeof(struct dn_fib_node));
-
 	new_f->fn_key = key;
 	new_f->fn_type = type;
 	new_f->fn_scope = r->rtm_scope;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ecb5422ea23..d7e1e60f51d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -479,20 +479,18 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
  */
 static struct mfc_cache *ipmr_cache_alloc(void)
 {
-	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
+	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if(c==NULL)
 		return NULL;
-	memset(c, 0, sizeof(*c));
 	c->mfc_un.res.minvif = MAXVIFS;
 	return c;
 }
 
 static struct mfc_cache *ipmr_cache_alloc_unres(void)
 {
-	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
+	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if(c==NULL)
 		return NULL;
-	memset(c, 0, sizeof(*c));
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10*HZ;
 	return c;
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 8086787a2c5..3aec4ac66e3 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -603,13 +603,12 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
 
-	cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
+	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
 	if (cp == NULL) {
 		IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
 		return NULL;
 	}
 
-	memset(cp, 0, sizeof(*cp));
 	INIT_LIST_HEAD(&cp->c_list);
 	init_timer(&cp->timer);
 	cp->timer.data     = (unsigned long)cp;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 8556a4f4f60..62be2eb3769 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -638,14 +638,13 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
 		}
 	}
 
-	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+	conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
 	if (!conntrack) {
 		DEBUGP("Can't allocate conntrack.\n");
 		atomic_dec(&ip_conntrack_count);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memset(conntrack, 0, sizeof(*conntrack));
 	atomic_set(&conntrack->ct_general.use, 1);
 	conntrack->ct_general.destroy = destroy_conntrack;
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 96d8310ae9c..827f8842b57 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -150,8 +150,7 @@ static __inline__ struct fib6_node * node_alloc(void)
 {
 	struct fib6_node *fn;
 
-	if ((fn = kmem_cache_alloc(fib6_node_kmem, GFP_ATOMIC)) != NULL)
-		memset(fn, 0, sizeof(struct fib6_node));
+	fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
 
 	return fn;
 }
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 0b1ddb1005a..78348186017 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -979,11 +979,10 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
 {
 	struct sctp_chunk *retval;
 
-	retval = kmem_cache_alloc(sctp_chunk_cachep, GFP_ATOMIC);
+	retval = kmem_cache_zalloc(sctp_chunk_cachep, GFP_ATOMIC);
 
 	if (!retval)
 		goto nodata;
-	memset(retval, 0, sizeof(struct sctp_chunk));
 
 	if (!sk) {
 		SCTP_DEBUG_PRINTK("chunkifying skb %p w/o an sk\n", skb);
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index e7c0b5e2066..da8caf10ef9 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -332,11 +332,10 @@ static struct avc_node *avc_alloc_node(void)
 {
 	struct avc_node *node;
 
-	node = kmem_cache_alloc(avc_node_cachep, GFP_ATOMIC);
+	node = kmem_cache_zalloc(avc_node_cachep, GFP_ATOMIC);
 	if (!node)
 		goto out;
 
-	memset(node, 0, sizeof(*node));
 	INIT_RCU_HEAD(&node->rhead);
 	INIT_LIST_HEAD(&node->list);
 	atomic_set(&node->ae.used, 1);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 65fb5e8ea94..9eeab82719a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -181,11 +181,10 @@ static int inode_alloc_security(struct inode *inode)
 	struct task_security_struct *tsec = current->security;
 	struct inode_security_struct *isec;
 
-	isec = kmem_cache_alloc(sel_inode_cache, GFP_KERNEL);
+	isec = kmem_cache_zalloc(sel_inode_cache, GFP_KERNEL);
 	if (!isec)
 		return -ENOMEM;
 
-	memset(isec, 0, sizeof(*isec));
 	mutex_init(&isec->lock);
 	INIT_LIST_HEAD(&isec->list);
 	isec->inode = inode;
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index ebb993c5c24..9142073319c 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -36,10 +36,9 @@ avtab_insert_node(struct avtab *h, int hvalue,
 		  struct avtab_key *key, struct avtab_datum *datum)
 {
 	struct avtab_node * newnode;
-	newnode = kmem_cache_alloc(avtab_node_cachep, GFP_KERNEL);
+	newnode = kmem_cache_zalloc(avtab_node_cachep, GFP_KERNEL);
 	if (newnode == NULL)
 		return NULL;
-	memset(newnode, 0, sizeof(struct avtab_node));
 	newnode->key = *key;
 	newnode->datum = *datum;
 	if (prev) {
-- 
cgit v1.2.3-70-g09d2


From 23c887522e912ca494950796a95df8dd210f4b01 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Sat, 10 Feb 2007 01:45:05 -0800
Subject: [PATCH] Relay: add CPU hotplug support

Mathieu originally needed to add this for tracing Xen, but it's something
that's needed for any application that can be tracing while cpus are added.

unplug isn't supported by this patch.  The thought was that at minumum a new
buffer needs to be added when a cpu comes up, but it wasn't worth the effort
to remove buffers on cpu down since they'd be freed soon anyway when the
channel was closed.

[zanussi@us.ibm.com: avoid lock_cpu_hotplug deadlock]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Tom Zanussi <zanussi@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/relay.txt |   9 +-
 block/blktrace.c                    |   3 +-
 include/linux/relay.h               |   9 +-
 kernel/relay.c                      | 180 +++++++++++++++++++++++++-----------
 4 files changed, 142 insertions(+), 59 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt
index d6788dae034..7fbb6ffe576 100644
--- a/Documentation/filesystems/relay.txt
+++ b/Documentation/filesystems/relay.txt
@@ -157,7 +157,7 @@ TBD(curr. line MT:/API/)
   channel management functions:
 
     relay_open(base_filename, parent, subbuf_size, n_subbufs,
-               callbacks)
+               callbacks, private_data)
     relay_close(chan)
     relay_flush(chan)
     relay_reset(chan)
@@ -251,7 +251,7 @@ static struct rchan_callbacks relay_callbacks =
 
 And an example relay_open() invocation using them:
 
-  chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks);
+  chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks, NULL);
 
 If the create_buf_file() callback fails, or isn't defined, channel
 creation and thus relay_open() will fail.
@@ -289,6 +289,11 @@ they use the proper locking for such a buffer, either by wrapping
 writes in a spinlock, or by copying a write function from relay.h and
 creating a local version that internally does the proper locking.
 
+The private_data passed into relay_open() allows clients to associate
+user-defined data with a channel, and is immediately available
+(including in create_buf_file()) via chan->private_data or
+buf->chan->private_data.
+
 Channel 'modes'
 ---------------
 
diff --git a/block/blktrace.c b/block/blktrace.c
index d3679dd1d22..d36b32ed22f 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -363,10 +363,9 @@ static int blk_trace_setup(request_queue_t *q, struct block_device *bdev,
 	if (!bt->dropped_file)
 		goto err;
 
-	bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks);
+	bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks, bt);
 	if (!bt->rchan)
 		goto err;
-	bt->rchan->private_data = bt;
 
 	bt->act_mask = buts.act_mask;
 	if (!bt->act_mask)
diff --git a/include/linux/relay.h b/include/linux/relay.h
index c6a48bfc8b1..759a0f97bec 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -24,7 +24,7 @@
 /*
  * Tracks changes to rchan/rchan_buf structs
  */
-#define RELAYFS_CHANNEL_VERSION		6
+#define RELAYFS_CHANNEL_VERSION		7
 
 /*
  * Per-cpu relay channel buffer
@@ -64,6 +64,10 @@ struct rchan
 	void *private_data;		/* for user-defined data */
 	size_t last_toobig;		/* tried to log event > subbuf size */
 	struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
+	int is_global;			/* One global buffer ? */
+	struct list_head list;		/* for channel list */
+	struct dentry *parent;		/* parent dentry passed to open */
+	char base_filename[NAME_MAX];	/* saved base filename */
 };
 
 /*
@@ -162,7 +166,8 @@ struct rchan *relay_open(const char *base_filename,
 			 struct dentry *parent,
 			 size_t subbuf_size,
 			 size_t n_subbufs,
-			 struct rchan_callbacks *cb);
+			 struct rchan_callbacks *cb,
+			 void *private_data);
 extern void relay_close(struct rchan *chan);
 extern void relay_flush(struct rchan *chan);
 extern void relay_subbufs_consumed(struct rchan *chan,
diff --git a/kernel/relay.c b/kernel/relay.c
index 284e2e8b4ee..ef923f6de2e 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -7,6 +7,8 @@
  * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
  *
  * Moved to kernel/relay.c by Paul Mundt, 2006.
+ * November 2006 - CPU hotplug support by Mathieu Desnoyers
+ * 	(mathieu.desnoyers@polymtl.ca)
  *
  * This file is released under the GPL.
  */
@@ -18,6 +20,11 @@
 #include <linux/relay.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
+#include <linux/cpu.h>
+
+/* list of open channels, for cpu hotplug */
+static DEFINE_MUTEX(relay_channels_mutex);
+static LIST_HEAD(relay_channels);
 
 /*
  * close() vm_op implementation for relay file mapping.
@@ -187,6 +194,7 @@ void relay_destroy_buf(struct rchan_buf *buf)
 			__free_page(buf->page_array[i]);
 		kfree(buf->page_array);
 	}
+	chan->buf[buf->cpu] = NULL;
 	kfree(buf->padding);
 	kfree(buf);
 	kref_put(&chan->kref, relay_destroy_channel);
@@ -362,51 +370,69 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
 void relay_reset(struct rchan *chan)
 {
 	unsigned int i;
-	struct rchan_buf *prev = NULL;
 
 	if (!chan)
 		return;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		__relay_reset(chan->buf[i], 0);
-		prev = chan->buf[i];
+ 	if (chan->is_global && chan->buf[0]) {
+		__relay_reset(chan->buf[0], 0);
+		return;
 	}
+
+	mutex_lock(&relay_channels_mutex);
+	for_each_online_cpu(i)
+		if (chan->buf[i])
+			__relay_reset(chan->buf[i], 0);
+	mutex_unlock(&relay_channels_mutex);
 }
 EXPORT_SYMBOL_GPL(relay_reset);
 
 /*
  *	relay_open_buf - create a new relay channel buffer
  *
- *	Internal - used by relay_open().
+ *	used by relay_open() and CPU hotplug.
  */
-static struct rchan_buf *relay_open_buf(struct rchan *chan,
-					const char *filename,
-					struct dentry *parent,
-					int *is_global)
+static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu)
 {
-	struct rchan_buf *buf;
+ 	struct rchan_buf *buf = NULL;
 	struct dentry *dentry;
+ 	char *tmpname;
 
-	if (*is_global)
+ 	if (chan->is_global)
 		return chan->buf[0];
 
+	tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL);
+ 	if (!tmpname)
+ 		goto end;
+ 	snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu);
+
 	buf = relay_create_buf(chan);
 	if (!buf)
-		return NULL;
+ 		goto free_name;
+
+ 	buf->cpu = cpu;
+ 	__relay_reset(buf, 1);
 
 	/* Create file in fs */
-	dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
-					   buf, is_global);
-	if (!dentry) {
-		relay_destroy_buf(buf);
-		return NULL;
-	}
+ 	dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR,
+ 					   buf, &chan->is_global);
+ 	if (!dentry)
+ 		goto free_buf;
 
 	buf->dentry = dentry;
-	__relay_reset(buf, 1);
 
+ 	if(chan->is_global) {
+ 		chan->buf[0] = buf;
+ 		buf->cpu = 0;
+  	}
+
+ 	goto free_name;
+
+free_buf:
+ 	relay_destroy_buf(buf);
+free_name:
+ 	kfree(tmpname);
+end:
 	return buf;
 }
 
@@ -447,6 +473,47 @@ static void setup_callbacks(struct rchan *chan,
 	chan->cb = cb;
 }
 
+/**
+ *
+ * 	relay_hotcpu_callback - CPU hotplug callback
+ * 	@nb: notifier block
+ * 	@action: hotplug action to take
+ * 	@hcpu: CPU number
+ *
+ * 	Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD)
+ */
+static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
+				unsigned long action,
+				void *hcpu)
+{
+	unsigned int hotcpu = (unsigned long)hcpu;
+	struct rchan *chan;
+
+	switch(action) {
+	case CPU_UP_PREPARE:
+		mutex_lock(&relay_channels_mutex);
+		list_for_each_entry(chan, &relay_channels, list) {
+			if (chan->buf[hotcpu])
+				continue;
+			chan->buf[hotcpu] = relay_open_buf(chan, hotcpu);
+			if(!chan->buf[hotcpu]) {
+				printk(KERN_ERR
+					"relay_hotcpu_callback: cpu %d buffer "
+					"creation failed\n", hotcpu);
+				mutex_unlock(&relay_channels_mutex);
+				return NOTIFY_BAD;
+			}
+		}
+		mutex_unlock(&relay_channels_mutex);
+		break;
+	case CPU_DEAD:
+		/* No need to flush the cpu : will be flushed upon
+		 * final relay_flush() call. */
+		break;
+	}
+	return NOTIFY_OK;
+}
+
 /**
  *	relay_open - create a new relay channel
  *	@base_filename: base name of files to create
@@ -454,6 +521,7 @@ static void setup_callbacks(struct rchan *chan,
  *	@subbuf_size: size of sub-buffers
  *	@n_subbufs: number of sub-buffers
  *	@cb: client callback functions
+ *	@private_data: user-defined data
  *
  *	Returns channel pointer if successful, %NULL otherwise.
  *
@@ -466,13 +534,11 @@ struct rchan *relay_open(const char *base_filename,
 			 struct dentry *parent,
 			 size_t subbuf_size,
 			 size_t n_subbufs,
-			 struct rchan_callbacks *cb)
+			 struct rchan_callbacks *cb,
+			 void *private_data)
 {
 	unsigned int i;
 	struct rchan *chan;
-	char *tmpname;
-	int is_global = 0;
-
 	if (!base_filename)
 		return NULL;
 
@@ -487,38 +553,32 @@ struct rchan *relay_open(const char *base_filename,
 	chan->n_subbufs = n_subbufs;
 	chan->subbuf_size = subbuf_size;
 	chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
+	chan->parent = parent;
+	chan->private_data = private_data;
+	strlcpy(chan->base_filename, base_filename, NAME_MAX);
 	setup_callbacks(chan, cb);
 	kref_init(&chan->kref);
 
-	tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
-	if (!tmpname)
-		goto free_chan;
-
+	mutex_lock(&relay_channels_mutex);
 	for_each_online_cpu(i) {
-		sprintf(tmpname, "%s%d", base_filename, i);
-		chan->buf[i] = relay_open_buf(chan, tmpname, parent,
-					      &is_global);
+		chan->buf[i] = relay_open_buf(chan, i);
 		if (!chan->buf[i])
 			goto free_bufs;
-
-		chan->buf[i]->cpu = i;
 	}
+	list_add(&chan->list, &relay_channels);
+	mutex_unlock(&relay_channels_mutex);
 
-	kfree(tmpname);
 	return chan;
 
 free_bufs:
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_online_cpu(i) {
 		if (!chan->buf[i])
 			break;
 		relay_close_buf(chan->buf[i]);
-		if (is_global)
-			break;
 	}
-	kfree(tmpname);
 
-free_chan:
 	kref_put(&chan->kref, relay_destroy_channel);
+	mutex_unlock(&relay_channels_mutex);
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(relay_open);
@@ -619,24 +679,26 @@ EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
 void relay_close(struct rchan *chan)
 {
 	unsigned int i;
-	struct rchan_buf *prev = NULL;
 
 	if (!chan)
 		return;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		relay_close_buf(chan->buf[i]);
-		prev = chan->buf[i];
-	}
+	mutex_lock(&relay_channels_mutex);
+	if (chan->is_global && chan->buf[0])
+		relay_close_buf(chan->buf[0]);
+	else
+		for_each_possible_cpu(i)
+			if (chan->buf[i])
+				relay_close_buf(chan->buf[i]);
 
 	if (chan->last_toobig)
 		printk(KERN_WARNING "relay: one or more items not logged "
 		       "[item size (%Zd) > sub-buffer size (%Zd)]\n",
 		       chan->last_toobig, chan->subbuf_size);
 
+	list_del(&chan->list);
 	kref_put(&chan->kref, relay_destroy_channel);
+	mutex_unlock(&relay_channels_mutex);
 }
 EXPORT_SYMBOL_GPL(relay_close);
 
@@ -649,17 +711,20 @@ EXPORT_SYMBOL_GPL(relay_close);
 void relay_flush(struct rchan *chan)
 {
 	unsigned int i;
-	struct rchan_buf *prev = NULL;
 
 	if (!chan)
 		return;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		if (!chan->buf[i] || chan->buf[i] == prev)
-			break;
-		relay_switch_subbuf(chan->buf[i], 0);
-		prev = chan->buf[i];
+	if (chan->is_global && chan->buf[0]) {
+		relay_switch_subbuf(chan->buf[0], 0);
+		return;
 	}
+
+	mutex_lock(&relay_channels_mutex);
+	for_each_possible_cpu(i)
+		if (chan->buf[i])
+			relay_switch_subbuf(chan->buf[i], 0);
+	mutex_unlock(&relay_channels_mutex);
 }
 EXPORT_SYMBOL_GPL(relay_flush);
 
@@ -1022,3 +1087,12 @@ const struct file_operations relay_file_operations = {
 	.sendfile       = relay_file_sendfile,
 };
 EXPORT_SYMBOL_GPL(relay_file_operations);
+
+static __init int relay_init(void)
+{
+
+	hotcpu_notifier(relay_hotcpu_callback, 0);
+	return 0;
+}
+
+module_init(relay_init);
-- 
cgit v1.2.3-70-g09d2


From b035b6de24932ffd4a2b1c6619a2f5711da6920f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@openvz.org>
Date: Sat, 10 Feb 2007 01:45:10 -0800
Subject: [PATCH] Consolidate default sched_clock()

Use attribute(weak).

Signed-off-by: Alexey Dobriyan <adobriyan@openvz.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/time.c     | 11 -----------
 arch/arm/kernel/time.c       | 10 ----------
 arch/arm26/kernel/time.c     |  8 --------
 arch/avr32/kernel/time.c     |  9 ---------
 arch/cris/kernel/time.c      |  8 --------
 arch/h8300/kernel/time.c     |  6 ------
 arch/m32r/kernel/time.c      |  8 --------
 arch/m68k/kernel/time.c      |  9 ---------
 arch/m68knommu/kernel/time.c |  9 ---------
 arch/mips/kernel/time.c      |  5 -----
 arch/parisc/kernel/time.c    | 11 -----------
 arch/sh/kernel/time.c        |  8 --------
 arch/sh64/kernel/time.c      |  9 ---------
 arch/sparc/kernel/time.c     |  9 ---------
 arch/v850/kernel/time.c      |  8 --------
 arch/xtensa/kernel/time.c    |  9 ---------
 kernel/sched.c               | 10 ++++++++++
 17 files changed, 10 insertions(+), 137 deletions(-)

(limited to 'kernel')

diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index d7053eb4ffc..4748e14a28b 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -90,17 +90,6 @@ static inline __u32 rpcc(void)
     return result;
 }
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- *
- * Copied from ARM code for expediency... ;-}
- */
-unsigned long long sched_clock(void)
-{
-        return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
-
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 3c8cdcfe8d4..ee47c532e21 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -77,16 +77,6 @@ static unsigned long dummy_gettimeoffset(void)
 }
 #endif
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- * This is the default implementation.  Sub-architecture
- * implementations can override this.
- */
-unsigned long long __attribute__((weak)) sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 /*
  * An implementation of printk_clock() independent from
  * sched_clock().  This avoids non-bootable kernels when
diff --git a/arch/arm26/kernel/time.c b/arch/arm26/kernel/time.c
index 1206469b2b8..0f1d57fbd3d 100644
--- a/arch/arm26/kernel/time.c
+++ b/arch/arm26/kernel/time.c
@@ -89,14 +89,6 @@ static unsigned long gettimeoffset(void)
         return (offset + LATCH/2) / LATCH;
 }
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 static unsigned long next_rtc_update;
 
 /*
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index 5a247ba71a7..a2f74affaa9 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -109,15 +109,6 @@ static void avr32_hpt_init(unsigned int count)
 	sysreg_write(COUNT, count);
 }
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	/* There must be better ways...? */
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 /*
  * local_timer_interrupt() does profiling and process accounting on a
  * per-CPU basis.
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index 22a1aa511ae..acfd0455940 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -216,14 +216,6 @@ cris_do_profile(struct pt_regs* regs)
 #endif
 }
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 static int
 __init init_udelay(void)
 {
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
index 8abab3bc2b6..d1ef615ba89 100644
--- a/arch/h8300/kernel/time.c
+++ b/arch/h8300/kernel/time.c
@@ -118,9 +118,3 @@ int do_settimeofday(struct timespec *tv)
 }
 
 EXPORT_SYMBOL(do_settimeofday);
-
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-
-}
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index a09038282c7..3858c9f39ba 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -286,11 +286,3 @@ void __init time_init(void)
 #error no chip configuration
 #endif
 }
-
-/*
- *  Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 2a599c3ed78..4c065f9ceff 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -159,12 +159,3 @@ int do_settimeofday(struct timespec *tv)
 }
 
 EXPORT_SYMBOL(do_settimeofday);
-
-/*
- * Scheduler clock - returns current time in ns units.
- */
-unsigned long long sched_clock(void)
-{
-       return (unsigned long long)jiffies*(1000000000/HZ);
-}
-
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index 11ea217ed5c..467053da2d0 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -174,13 +174,4 @@ int do_settimeofday(struct timespec *tv)
 	clock_was_set();
 	return 0;
 }
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 EXPORT_SYMBOL(do_settimeofday);
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 8aa544f73a5..545fcbc8cea 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -455,8 +455,3 @@ EXPORT_SYMBOL(rtc_lock);
 EXPORT_SYMBOL(to_tm);
 EXPORT_SYMBOL(rtc_mips_set_time);
 EXPORT_SYMBOL(rtc_mips_get_time);
-
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies*(1000000000/HZ);
-}
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index bad7d1eb62b..5f1b51af06a 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -288,17 +288,6 @@ do_settimeofday (struct timespec *tv)
 }
 EXPORT_SYMBOL(do_settimeofday);
 
-/*
- * XXX: We can do better than this.
- * Returns nanoseconds
- */
-
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
-
 void __init start_cpu_itimer(void)
 {
 	unsigned int cpu = smp_processor_id();
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index c206c9504c4..d47e775962e 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -41,14 +41,6 @@ static int null_rtc_set_time(const time_t secs)
 void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
 int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long __attribute__ ((weak)) sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 #ifndef CONFIG_GENERIC_TIME
 void do_gettimeofday(struct timeval *tv)
 {
diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c
index 9c4a38a8698..390b40de7ce 100644
--- a/arch/sh64/kernel/time.c
+++ b/arch/sh64/kernel/time.c
@@ -579,12 +579,3 @@ void enter_deep_standby(void)
 	asm __volatile__ ("nop");
 	panic("Unexpected wakeup!\n");
 }
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c
index 2fcce000d87..9bb1240aaf8 100644
--- a/arch/sparc/kernel/time.c
+++ b/arch/sparc/kernel/time.c
@@ -436,15 +436,6 @@ static inline unsigned long do_gettimeoffset(void)
 	return (*master_l10_counter >> 10) & 0x1fffff;
 }
 
-/*
- * Returns nanoseconds
- * XXX This is a suboptimal implementation.
- */
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 /* Ok, my cute asm atomicity trick doesn't work anymore.
  * There are just too many variables that need to be protected
  * now (both members of xtime, et al.)
diff --git a/arch/v850/kernel/time.c b/arch/v850/kernel/time.c
index cd06f47c0ea..486e3a441c8 100644
--- a/arch/v850/kernel/time.c
+++ b/arch/v850/kernel/time.c
@@ -27,14 +27,6 @@
 
 #define TICK_SIZE	(tick_nsec / 1000)
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index a350431363a..22949be4a5d 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -38,15 +38,6 @@ unsigned long ccount_nsec;		/* nsec per ccount increment */
 unsigned int last_ccount_stamp;
 static long last_rtc_update = 0;
 
-/*
- * Scheduler clock - returns current tim in nanosec units.
- */
-
-unsigned long long sched_clock(void)
-{
-	return (unsigned long long)jiffies * (1000000000 / HZ);
-}
-
 static irqreturn_t timer_interrupt(int irq, void *dev_id);
 static struct irqaction timer_irqaction = {
 	.handler =	timer_interrupt,
diff --git a/kernel/sched.c b/kernel/sched.c
index cca93cc0dd7..1cd4ee769e2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -56,6 +56,16 @@
 
 #include <asm/unistd.h>
 
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * This is default implementation.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __attribute__((weak)) sched_clock(void)
+{
+	return (unsigned long long)jiffies * (1000000000 / HZ);
+}
+
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-- 
cgit v1.2.3-70-g09d2


From 34f5a39899f3f3e815da64f48ddb72942d86c366 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 10 Feb 2007 01:45:24 -0800
Subject: [PATCH] Add TAINT_USER and ability to set taint flags from userspace

Allow taint flags to be set from userspace by writing to
/proc/sys/kernel/tainted, and add a new taint flag, TAINT_USER, to be used
when userspace has potentially done something dangerous that might
compromise the kernel.  This will allow support personnel to ask further
questions about what may have caused the user taint flag to have been set.

For example, they might examine the logs of the realtime JVM to see if the
Java program has used the really silly, stupid, dangerous, and
completely-non-portable direct access to physical memory feature which MUST
be implemented according to the Real-Time Specification for Java (RTSJ).
Sigh.  What were those silly people at Sun thinking?

[akpm@osdl.org: build fix]
[bunk@stusta.de: cleanup]
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  1 +
 kernel/panic.c         |  6 ++++--
 kernel/sysctl.c        | 27 +++++++++++++++++++++++++--
 3 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 63fb18dcac3..e1a429ada97 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -200,6 +200,7 @@ extern enum system_states {
 #define TAINT_FORCED_RMMOD		(1<<3)
 #define TAINT_MACHINE_CHECK		(1<<4)
 #define TAINT_BAD_PAGE			(1<<5)
+#define TAINT_USER			(1<<6)
 
 extern void dump_stack(void);
 
diff --git a/kernel/panic.c b/kernel/panic.c
index 525e365f723..623d1828259 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -150,6 +150,7 @@ EXPORT_SYMBOL(panic);
  *  'R' - User forced a module unload.
  *  'M' - Machine had a machine check experience.
  *  'B' - System has hit bad_page.
+ *  'U' - Userspace-defined naughtiness.
  *
  *	The string is overwritten by the next call to print_taint().
  */
@@ -158,13 +159,14 @@ const char *print_tainted(void)
 {
 	static char buf[20];
 	if (tainted) {
-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
+		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c",
 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
 			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
 			tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
  			tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
-			tainted & TAINT_BAD_PAGE ? 'B' : ' ');
+			tainted & TAINT_BAD_PAGE ? 'B' : ' ',
+			tainted & TAINT_USER ? 'U' : ' ');
 	}
 	else
 		snprintf(buf, sizeof(buf), "Not tainted");
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 16ef870fa75..7733ef58aac 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -151,6 +151,8 @@ static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
+static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
+			       void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
 static ctl_table root_table[];
@@ -174,6 +176,7 @@ extern ctl_table inotify_table[];
 int sysctl_legacy_va_layout;
 #endif
 
+
 static void *get_uts(ctl_table *table, int write)
 {
 	char *which = table->data;
@@ -344,14 +347,16 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dostring,
 		.strategy	= &sysctl_string,
 	},
+#ifdef CONFIG_PROC_SYSCTL
 	{
 		.ctl_name	= KERN_TAINTED,
 		.procname	= "tainted",
 		.data		= &tainted,
 		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_taint,
 	},
+#endif
 	{
 		.ctl_name	= KERN_CAP_BSET,
 		.procname	= "cap-bound",
@@ -1927,6 +1932,7 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
 
 #define OP_SET	0
 #define OP_AND	1
+#define OP_OR	2
 
 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
 				      int *valp,
@@ -1938,6 +1944,7 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
 		switch(op) {
 		case OP_SET:	*valp = val; break;
 		case OP_AND:	*valp &= val; break;
+		case OP_OR:	*valp |= val; break;
 		}
 	} else {
 		int val = *valp;
@@ -1970,6 +1977,22 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
 				do_proc_dointvec_bset_conv,&op);
 }
 
+/*
+ *	Taint values can only be increased
+ */
+static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
+			       void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int op;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	op = OP_OR;
+	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+				do_proc_dointvec_bset_conv,&op);
+}
+
 struct do_proc_dointvec_minmax_conv_param {
 	int *min;
 	int *max;
-- 
cgit v1.2.3-70-g09d2


From 3db5db4fcdafc85b99d171336a7d2f25765ccd13 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 10 Feb 2007 01:45:40 -0800
Subject: [PATCH] use cycle_t instead of u64 in struct time_interpolator

The 32bit and 64bit PARISC Linux kernels suffers from the problem, that the
gettimeofday() call sometimes returns non-monotonic times.

The easiest way to fix this, is to drop the PARISC-specific implementation
and switch over to the generic TIME_INTERPOLATION framework.

But in order to make it even compile on 32bit PARISC, the patch below which
touches the generic Linux code, is mandatory.

More information and the full patch with the parisc-specific changes is included in this thread: http://lists.parisc-linux.org/pipermail/parisc-linux/2006-December/031003.html

As far as I could see, this patch does not change anything for the existing
architectures which use this framework (IA64 and SPARC64), since "cycles_t"
is defined there as unsigned 64bit-integer anyway (which then makes this
patch a no-change for them).

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <linux-arch@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/timex.h | 4 ++--
 kernel/timer.c        | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index db501dc23c2..9a24e500c31 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -255,10 +255,10 @@ struct time_interpolator {
 	u8 jitter;			/* if set compensate for fluctuations */
 	u32 nsec_per_cyc;		/* set by register_time_interpolator() */
 	void *addr;			/* address of counter or function */
-	u64 mask;			/* mask the valid bits of the counter */
+	cycles_t mask;			/* mask the valid bits of the counter */
 	unsigned long offset;		/* nsec offset at last update of interpolator */
 	u64 last_counter;		/* counter value in units of the counter at last update */
-	u64 last_cycle;			/* Last timer value if TIME_SOURCE_JITTER is set */
+	cycles_t last_cycle;		/* Last timer value if TIME_SOURCE_JITTER is set */
 	u64 frequency;			/* frequency in counts/second */
 	long drift;			/* drift in parts-per-million (or -1) */
 	unsigned long skips;		/* skips forward */
diff --git a/kernel/timer.c b/kernel/timer.c
index c2a8ccfc288..d38801a9586 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1624,7 +1624,7 @@ struct time_interpolator *time_interpolator __read_mostly;
 static struct time_interpolator *time_interpolator_list __read_mostly;
 static DEFINE_SPINLOCK(time_interpolator_lock);
 
-static inline u64 time_interpolator_get_cycles(unsigned int src)
+static inline cycles_t time_interpolator_get_cycles(unsigned int src)
 {
 	unsigned long (*x)(void);
 
@@ -1650,8 +1650,8 @@ static inline u64 time_interpolator_get_counter(int writelock)
 
 	if (time_interpolator->jitter)
 	{
-		u64 lcycle;
-		u64 now;
+		cycles_t lcycle;
+		cycles_t now;
 
 		do {
 			lcycle = time_interpolator->last_cycle;
-- 
cgit v1.2.3-70-g09d2


From cb799b8988e40a7871ae8e976248c33c562e3555 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 10 Feb 2007 01:45:51 -0800
Subject: [PATCH] sysctl warning fix

kernel/sysctl.c:2816: warning: 'sysctl_ipc_data' defined but not used

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7733ef58aac..84cab0ce44d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2767,12 +2767,14 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 {
 	return -ENOSYS;
 }
+#ifdef CONFIG_SYSVIPC
 static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
+#endif
 #endif /* CONFIG_SYSCTL_SYSCALL */
 
 /*
-- 
cgit v1.2.3-70-g09d2


From b653d081c17e26101980c858a9808740533b78b4 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 10 Feb 2007 01:45:54 -0800
Subject: [PATCH] proc: remove useless (and buggy) ->nlink settings

Bug: pnx8550 code creates directory but resets ->nlink to 1.

create_proc_entry() et al will correctly set ->nlink for you.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Corey Minyard <minyard@acm.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Greg KH <greg@kroah.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/philips/pnx8550/common/proc.c   | 7 +------
 arch/powerpc/kernel/proc_ppc64.c          | 1 -
 arch/powerpc/kernel/rtas_flash.c          | 1 -
 arch/powerpc/platforms/iseries/mf.c       | 4 ----
 arch/powerpc/platforms/pseries/reconfig.c | 1 -
 arch/um/drivers/ubd_kern.c                | 1 -
 drivers/char/ipmi/ipmi_msghandler.c       | 1 -
 drivers/ide/ide-proc.c                    | 1 -
 drivers/macintosh/via-pmu.c               | 1 -
 drivers/misc/hdpuftrs/hdpu_nexus.c        | 2 --
 drivers/parisc/led.c                      | 2 --
 drivers/s390/crypto/zcrypt_api.c          | 1 -
 drivers/usb/gadget/rndis.c                | 1 -
 kernel/irq/proc.c                         | 1 -
 kernel/profile.c                          | 1 -
 15 files changed, 1 insertion(+), 25 deletions(-)

(limited to 'kernel')

diff --git a/arch/mips/philips/pnx8550/common/proc.c b/arch/mips/philips/pnx8550/common/proc.c
index 72a016767e0..3f097558ef1 100644
--- a/arch/mips/philips/pnx8550/common/proc.c
+++ b/arch/mips/philips/pnx8550/common/proc.c
@@ -79,10 +79,7 @@ static int pnx8550_proc_init( void )
 
 	// Create /proc/pnx8550
         pnx8550_dir = create_proc_entry("pnx8550", S_IFDIR|S_IRUGO, NULL);
-        if (pnx8550_dir){
-                pnx8550_dir->nlink = 1;
-        }
-        else {
+        if (!pnx8550_dir) {
                 printk(KERN_ERR "Can't create pnx8550 proc dir\n");
                 return -1;
         }
@@ -90,7 +87,6 @@ static int pnx8550_proc_init( void )
 	// Create /proc/pnx8550/timers
         pnx8550_timers = create_proc_entry("timers", S_IFREG|S_IRUGO, pnx8550_dir );
         if (pnx8550_timers){
-                pnx8550_timers->nlink = 1;
                 pnx8550_timers->read_proc = pnx8550_timers_read;
         }
         else {
@@ -100,7 +96,6 @@ static int pnx8550_proc_init( void )
 	// Create /proc/pnx8550/registers
         pnx8550_registers = create_proc_entry("registers", S_IFREG|S_IRUGO, pnx8550_dir );
         if (pnx8550_registers){
-                pnx8550_registers->nlink = 1;
                 pnx8550_registers->read_proc = pnx8550_registers_read;
         }
         else {
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
index dd7001cacf7..3d437c32546 100644
--- a/arch/powerpc/kernel/proc_ppc64.c
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -71,7 +71,6 @@ static int __init proc_ppc64_init(void)
 	pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
 	if (!pde)
 		return 1;
-	pde->nlink = 1;
 	pde->data = vdso_data;
 	pde->size = PAGE_SIZE;
 	pde->proc_fops = &page_map_fops;
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 0c4fcd34bfe..65e4ebe8db3 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -708,7 +708,6 @@ static struct proc_dir_entry *create_flash_pde(const char *filename,
 
 	ent = create_proc_entry(filename, S_IRUSR | S_IWUSR, NULL);
 	if (ent != NULL) {
-		ent->nlink = 1;
 		ent->proc_fops = fops;
 		ent->owner = THIS_MODULE;
 	}
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index 1ad0e4aaad1..90d3d49f713 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -1253,7 +1253,6 @@ static int __init mf_proc_init(void)
 		ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf);
 		if (!ent)
 			return 1;
-		ent->nlink = 1;
 		ent->data = (void *)(long)i;
 		ent->read_proc = proc_mf_dump_cmdline;
 		ent->write_proc = proc_mf_change_cmdline;
@@ -1264,7 +1263,6 @@ static int __init mf_proc_init(void)
 		ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf);
 		if (!ent)
 			return 1;
-		ent->nlink = 1;
 		ent->data = (void *)(long)i;
 		ent->proc_fops = &proc_vmlinux_operations;
 	}
@@ -1272,7 +1270,6 @@ static int __init mf_proc_init(void)
 	ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
 	if (!ent)
 		return 1;
-	ent->nlink = 1;
 	ent->data = (void *)0;
 	ent->read_proc = proc_mf_dump_side;
 	ent->write_proc = proc_mf_change_side;
@@ -1280,7 +1277,6 @@ static int __init mf_proc_init(void)
 	ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
 	if (!ent)
 		return 1;
-	ent->nlink = 1;
 	ent->data = (void *)0;
 	ent->read_proc = proc_mf_dump_src;
 	ent->write_proc = proc_mf_change_src;
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 4ad33e41b00..789a5e99aef 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -513,7 +513,6 @@ static int proc_ppc64_create_ofdt(void)
 
 	ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
 	if (ent) {
-		ent->nlink = 1;
 		ent->data = NULL;
 		ent->size = 0;
 		ent->proc_fops = &ofdt_fops;
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index da0badcd755..f98d26e5138 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -235,7 +235,6 @@ static void make_ide_entries(char *dev_name)
 
 	ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
 	if(!ent) return;
-	ent->nlink = 1;
 	ent->data = NULL;
 	ent->read_proc = proc_ide_read_media;
 	ent->write_proc = NULL;
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 230064ede08..3aff5e99b67 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -1887,7 +1887,6 @@ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name,
 		kfree(entry);
 		rv = -ENOMEM;
 	} else {
-		file->nlink = 1;
 		file->data = data;
 		file->read_proc = read_proc;
 		file->write_proc = write_proc;
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index aa049dab3d9..ad49bd823eb 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -413,7 +413,6 @@ void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void
 	while (p->name != NULL) {
 		ent = create_proc_entry(p->name, p->mode, dir);
 		if (!ent) return;
-		ent->nlink = 1;
 		ent->data = data;
 		ent->read_proc = p->read_proc;
 		ent->write_proc = p->write_proc;
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 8ca75e52f63..eb6653f69ce 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -516,7 +516,6 @@ static int __init via_pmu_dev_init(void)
 					proc_get_irqstats, NULL);
 		proc_pmu_options = create_proc_entry("options", 0600, proc_pmu_root);
 		if (proc_pmu_options) {
-			proc_pmu_options->nlink = 1;
 			proc_pmu_options->read_proc = proc_read_options;
 			proc_pmu_options->write_proc = proc_write_options;
 		}
diff --git a/drivers/misc/hdpuftrs/hdpu_nexus.c b/drivers/misc/hdpuftrs/hdpu_nexus.c
index ea9d5f233c8..6a51e99a807 100644
--- a/drivers/misc/hdpuftrs/hdpu_nexus.c
+++ b/drivers/misc/hdpuftrs/hdpu_nexus.c
@@ -72,11 +72,9 @@ static int hdpu_nexus_probe(struct platform_device *pdev)
 		printk("Could not map slot id\n");
 	hdpu_slot_id = create_proc_entry("sky_slot_id", 0666, &proc_root);
 	hdpu_slot_id->read_proc = hdpu_slot_id_read;
-	hdpu_slot_id->nlink = 1;
 
 	hdpu_chassis_id = create_proc_entry("sky_chassis_id", 0666, &proc_root);
 	hdpu_chassis_id->read_proc = hdpu_chassis_id_read;
-	hdpu_chassis_id->nlink = 1;
 	return 0;
 }
 
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 8dac2ba82bb..9a731c101d1 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -252,7 +252,6 @@ static int __init led_create_procfs(void)
 	proc_pdc_root->owner = THIS_MODULE;
 	ent = create_proc_entry("led", S_IFREG|S_IRUGO|S_IWUSR, proc_pdc_root);
 	if (!ent) return -1;
-	ent->nlink = 1;
 	ent->data = (void *)LED_NOLCD; /* LED */
 	ent->read_proc = led_proc_read;
 	ent->write_proc = led_proc_write;
@@ -262,7 +261,6 @@ static int __init led_create_procfs(void)
 	{
 		ent = create_proc_entry("lcd", S_IFREG|S_IRUGO|S_IWUSR, proc_pdc_root);
 		if (!ent) return -1;
-		ent->nlink = 1;
 		ent->data = (void *)LED_HASLCD; /* LCD */
 		ent->read_proc = led_proc_read;
 		ent->write_proc = led_proc_write;
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index b9e59bc9435..2c785148d21 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -1063,7 +1063,6 @@ int __init zcrypt_api_init(void)
 		rc = -ENOMEM;
 		goto out_misc;
 	}
-	zcrypt_entry->nlink = 1;
 	zcrypt_entry->data = NULL;
 	zcrypt_entry->read_proc = zcrypt_status_read;
 	zcrypt_entry->write_proc = zcrypt_status_write;
diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index 408c3380d60..6ec8cf1a3cc 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -1419,7 +1419,6 @@ int __devinit rndis_init (void)
 			return -EIO;
 		}
 
-		rndis_connect_state [i]->nlink = 1;
 		rndis_connect_state [i]->write_proc = rndis_proc_write;
 		rndis_connect_state [i]->read_proc = rndis_proc_read;
 		rndis_connect_state [i]->data = (void *)
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 61f5c717a8f..6d3be06e8ce 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -136,7 +136,6 @@ void register_irq_proc(unsigned int irq)
 		entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
 
 		if (entry) {
-			entry->nlink = 1;
 			entry->data = (void *)(long)irq;
 			entry->read_proc = irq_affinity_read_proc;
 			entry->write_proc = irq_affinity_write_proc;
diff --git a/kernel/profile.c b/kernel/profile.c
index d6579d51106..9bfadb248dd 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -449,7 +449,6 @@ void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
 	/* create /proc/irq/prof_cpu_mask */
 	if (!(entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir)))
 		return;
-	entry->nlink = 1;
 	entry->data = (void *)&prof_cpu_mask;
 	entry->read_proc = prof_cpu_mask_read_proc;
 	entry->write_proc = prof_cpu_mask_write_proc;
-- 
cgit v1.2.3-70-g09d2


From 72fd4a35a824331d7a0f4168d7576502d95d34b3 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sat, 10 Feb 2007 01:45:59 -0800
Subject: [PATCH] Numerous fixes to kernel-doc info in source files.

A variety of (mostly) innocuous fixes to the embedded kernel-doc content in
source files, including:

  * make multi-line initial descriptions single line
  * denote some function names, constants and structs as such
  * change erroneous opening '/*' to '/**' in a few places
  * reword some text for clarity

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-i386/atomic.h |  4 ++--
 include/asm-i386/bitops.h |  4 ++--
 include/linux/init.h      |  2 +-
 include/linux/kfifo.h     |  6 +++---
 include/linux/ktime.h     |  6 +++---
 include/linux/list.h      | 11 ++++++-----
 ipc/util.c                | 21 ++++++++++-----------
 kernel/exit.c             |  3 +--
 kernel/hrtimer.c          |  6 +++---
 kernel/kfifo.c            | 10 +++++-----
 kernel/kthread.c          |  6 +++---
 kernel/printk.c           |  2 +-
 kernel/relay.c            | 12 ++++++------
 kernel/sched.c            |  9 ++++-----
 kernel/signal.c           |  2 +-
 kernel/sys.c              | 10 +++++-----
 kernel/timer.c            | 20 ++++++++++----------
 kernel/workqueue.c        |  6 ++----
 lib/bitmap.c              |  8 ++++----
 lib/cmdline.c             |  8 ++++----
 lib/idr.c                 |  4 ++--
 lib/kobject.c             |  5 +++--
 lib/sha1.c                |  9 ++++-----
 lib/sort.c                |  2 +-
 lib/string.c              |  8 +++-----
 lib/textsearch.c          |  2 +-
 lib/vsprintf.c            | 12 ++++++------
 mm/filemap.c              |  4 ++--
 mm/memory.c               |  4 +---
 mm/mempool.c              |  6 +++---
 mm/page-writeback.c       |  5 +----
 mm/slab.c                 |  2 +-
 mm/vmalloc.c              |  2 +-
 33 files changed, 105 insertions(+), 116 deletions(-)

(limited to 'kernel')

diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
index c57441bb290..4dd27233136 100644
--- a/include/asm-i386/atomic.h
+++ b/include/asm-i386/atomic.h
@@ -211,12 +211,12 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * atomic_add_unless - add unless the number is a given value
+ * atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
  *
- * Atomically adds @a to @v, so long as it was not @u.
+ * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns non-zero if @v was not @u, and zero otherwise.
  */
 #define atomic_add_unless(v, a, u)				\
diff --git a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h
index 1c780fa1e76..273b5062935 100644
--- a/include/asm-i386/bitops.h
+++ b/include/asm-i386/bitops.h
@@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word)
  *
  * This is defined the same way as
  * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
+ * differs in spirit from the above ffz() (man ffs).
  */
 static inline int ffs(int x)
 {
@@ -388,7 +388,7 @@ static inline int ffs(int x)
  * fls - find last bit set
  * @x: the word to search
  *
- * This is defined the same way as ffs.
+ * This is defined the same way as ffs().
  */
 static inline int fls(int x)
 {
diff --git a/include/linux/init.h b/include/linux/init.h
index 5a593a1dec1..c65f5107d51 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -172,7 +172,7 @@ void __init parse_early_param(void);
  * module_init() - driver initialization entry point
  * @x: function to be run at kernel boot time or module insertion
  * 
- * module_init() will either be called during do_initcalls (if
+ * module_init() will either be called during do_initcalls() (if
  * builtin) or at module insertion time (if a module).  There can only
  * be one per module.
  */
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 48eccd865bd..404f4464cb1 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -74,7 +74,7 @@ static inline void kfifo_reset(struct kfifo *fifo)
  * @buffer: the data to be added.
  * @len: the length of the data to be added.
  *
- * This function copies at most 'len' bytes from the 'buffer' into
+ * This function copies at most @len bytes from the @buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
@@ -99,8 +99,8 @@ static inline unsigned int kfifo_put(struct kfifo *fifo,
  * @buffer: where the data must be copied.
  * @len: the size of the destination buffer.
  *
- * This function copies at most 'len' bytes from the FIFO into the
- * 'buffer' and returns the number of copied bytes.
+ * This function copies at most @len bytes from the FIFO into the
+ * @buffer and returns the number of copied bytes.
  */
 static inline unsigned int kfifo_get(struct kfifo *fifo,
 				     unsigned char *buffer, unsigned int len)
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 611f17f79ee..7444a632623 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -163,7 +163,7 @@ static inline ktime_t ktime_sub(const ktime_t lhs, const ktime_t rhs)
  * @add1:	addend1
  * @add2:	addend2
  *
- * Returns the sum of addend1 and addend2
+ * Returns the sum of @add1 and @add2.
  */
 static inline ktime_t ktime_add(const ktime_t add1, const ktime_t add2)
 {
@@ -189,7 +189,7 @@ static inline ktime_t ktime_add(const ktime_t add1, const ktime_t add2)
  * @kt:		addend
  * @nsec:	the scalar nsec value to add
  *
- * Returns the sum of kt and nsec in ktime_t format
+ * Returns the sum of @kt and @nsec in ktime_t format
  */
 extern ktime_t ktime_add_ns(const ktime_t kt, u64 nsec);
 
@@ -246,7 +246,7 @@ static inline struct timeval ktime_to_timeval(const ktime_t kt)
  * ktime_to_ns - convert a ktime_t variable to scalar nanoseconds
  * @kt:		the ktime_t variable to convert
  *
- * Returns the scalar nanoseconds representation of kt
+ * Returns the scalar nanoseconds representation of @kt
  */
 static inline s64 ktime_to_ns(const ktime_t kt)
 {
diff --git a/include/linux/list.h b/include/linux/list.h
index cdc96559e5a..f9d71eab05e 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -161,7 +161,7 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
 /**
  * list_del - deletes entry from list.
  * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is
+ * Note: list_empty() on entry does not return true after this, the entry is
  * in an undefined state.
  */
 #ifndef CONFIG_DEBUG_LIST
@@ -179,7 +179,7 @@ extern void list_del(struct list_head *entry);
  * list_del_rcu - deletes entry from list without re-initialization
  * @entry: the element to delete from the list.
  *
- * Note: list_empty on entry does not return true after this,
+ * Note: list_empty() on entry does not return true after this,
  * the entry is in an undefined state. It is useful for RCU based
  * lockfree traversal.
  *
@@ -209,7 +209,8 @@ static inline void list_del_rcu(struct list_head *entry)
  * list_replace - replace old entry by new one
  * @old : the element to be replaced
  * @new : the new element to insert
- * Note: if 'old' was empty, it will be overwritten.
+ *
+ * If @old was empty, it will be overwritten.
  */
 static inline void list_replace(struct list_head *old,
 				struct list_head *new)
@@ -488,12 +489,12 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	     pos = list_entry(pos->member.prev, typeof(*pos), member))
 
 /**
- * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
  * @pos:	the type * to use as a start point
  * @head:	the head of the list
  * @member:	the name of the list_struct within the struct.
  *
- * Prepares a pos entry for use as a start point in list_for_each_entry_continue.
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
  */
 #define list_prepare_entry(pos, head, member) \
 	((pos) ? : list_entry(head, typeof(*pos), member))
diff --git a/ipc/util.c b/ipc/util.c
index a9b7a227b8d..0c97cb74616 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -150,7 +150,7 @@ void free_ipc_ns(struct kref *kref)
  *	ipc_init	-	initialise IPC subsystem
  *
  *	The various system5 IPC resources (semaphores, messages and shared
- *	memory are initialised
+ *	memory) are initialised
  */
  
 static int __init ipc_init(void)
@@ -207,8 +207,7 @@ void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size)
 #ifdef CONFIG_PROC_FS
 static struct file_operations sysvipc_proc_fops;
 /**
- *	ipc_init_proc_interface	-  Create a proc interface for sysipc types
- *				   using a seq_file interface.
+ *	ipc_init_proc_interface	-  Create a proc interface for sysipc types using a seq_file interface.
  *	@path: Path in procfs
  *	@header: Banner to be printed at the beginning of the file.
  *	@ids: ipc id table to iterate.
@@ -417,7 +416,7 @@ void* ipc_alloc(int size)
  *	@ptr: pointer returned by ipc_alloc
  *	@size: size of block
  *
- *	Free a block created with ipc_alloc. The caller must know the size
+ *	Free a block created with ipc_alloc(). The caller must know the size
  *	used in the allocation call.
  */
 
@@ -524,7 +523,7 @@ static void ipc_do_vfree(struct work_struct *work)
  * @head: RCU callback structure for queued work
  * 
  * Since RCU callback function is called in bh,
- * we need to defer the vfree to schedule_work
+ * we need to defer the vfree to schedule_work().
  */
 static void ipc_schedule_free(struct rcu_head *head)
 {
@@ -541,7 +540,7 @@ static void ipc_schedule_free(struct rcu_head *head)
  * ipc_immediate_free - free ipc + rcu space
  * @head: RCU callback structure that contains pointer to be freed
  *
- * Free from the RCU callback context
+ * Free from the RCU callback context.
  */
 static void ipc_immediate_free(struct rcu_head *head)
 {
@@ -603,8 +602,8 @@ int ipcperms (struct kern_ipc_perm *ipcp, short flag)
  *	@in: kernel permissions
  *	@out: new style IPC permissions
  *
- *	Turn the kernel object 'in' into a set of permissions descriptions
- *	for returning to userspace (out).
+ *	Turn the kernel object @in into a set of permissions descriptions
+ *	for returning to userspace (@out).
  */
  
 
@@ -624,8 +623,8 @@ void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
  *	@in: new style IPC permissions
  *	@out: old style IPC permissions
  *
- *	Turn the new style permissions object in into a compatibility
- *	object and store it into the 'out' pointer.
+ *	Turn the new style permissions object @in into a compatibility
+ *	object and store it into the @out pointer.
  */
  
 void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
@@ -722,7 +721,7 @@ int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid)
  *	@cmd: pointer to command
  *
  *	Return IPC_64 for new style IPC and IPC_OLD for old style IPC. 
- *	The cmd value is turned from an encoding command and version into
+ *	The @cmd value is turned from an encoding command and version into
  *	just the command code.
  */
  
diff --git a/kernel/exit.c b/kernel/exit.c
index fec12eb1247..bc71fdfcd8a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -257,8 +257,7 @@ static int has_stopped_jobs(int pgrp)
 }
 
 /**
- * reparent_to_init - Reparent the calling kernel thread to the init task
- * of the pid space that the thread belongs to.
+ * reparent_to_init - Reparent the calling kernel thread to the init task of the pid space that the thread belongs to.
  *
  * If a kernel thread is launched as a result of a system call, or if
  * it ever exits, it should generally reparent itself to init so that
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d0ba190dfeb..f44e499e8fc 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -102,7 +102,7 @@ static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) =
  *
  * The function calculates the monotonic clock from the realtime
  * clock and the wall_to_monotonic offset and stores the result
- * in normalized timespec format in the variable pointed to by ts.
+ * in normalized timespec format in the variable pointed to by @ts.
  */
 void ktime_get_ts(struct timespec *ts)
 {
@@ -583,8 +583,8 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
  * @which_clock: which clock to query
  * @tp:		 pointer to timespec variable to store the resolution
  *
- * Store the resolution of the clock selected by which_clock in the
- * variable pointed to by tp.
+ * Store the resolution of the clock selected by @which_clock in the
+ * variable pointed to by @tp.
  */
 int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 {
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 5d1d907378a..cee419143fd 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -32,8 +32,8 @@
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
- * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the
- * struct kfifo with kfree().
+ * Do NOT pass the kfifo to kfifo_free() after use! Simply free the
+ * &struct kfifo with kfree().
  */
 struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
 			 gfp_t gfp_mask, spinlock_t *lock)
@@ -108,7 +108,7 @@ EXPORT_SYMBOL(kfifo_free);
  * @buffer: the data to be added.
  * @len: the length of the data to be added.
  *
- * This function copies at most 'len' bytes from the 'buffer' into
+ * This function copies at most @len bytes from the @buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  *
@@ -155,8 +155,8 @@ EXPORT_SYMBOL(__kfifo_put);
  * @buffer: where the data must be copied.
  * @len: the size of the destination buffer.
  *
- * This function copies at most 'len' bytes from the FIFO into the
- * 'buffer' and returns the number of copied bytes.
+ * This function copies at most @len bytes from the FIFO into the
+ * @buffer and returns the number of copied bytes.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1db8c72d0d3..87c50ccd1d4 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -50,7 +50,7 @@ static struct kthread_stop_info kthread_stop_info;
 /**
  * kthread_should_stop - should this kthread return now?
  *
- * When someone calls kthread_stop on your kthread, it will be woken
+ * When someone calls kthread_stop() on your kthread, it will be woken
  * and this will return true.  You should then return, and your return
  * value will be passed through to kthread_stop().
  */
@@ -143,7 +143,7 @@ static void keventd_create_kthread(struct work_struct *work)
  * it.  See also kthread_run(), kthread_create_on_cpu().
  *
  * When woken, the thread will run @threadfn() with @data as its
- * argument. @threadfn can either call do_exit() directly if it is a
+ * argument. @threadfn() can either call do_exit() directly if it is a
  * standalone thread for which noone will call kthread_stop(), or
  * return when 'kthread_should_stop()' is true (which means
  * kthread_stop() has been called).  The return value should be zero
@@ -192,7 +192,7 @@ EXPORT_SYMBOL(kthread_create);
  *
  * Description: This function is equivalent to set_cpus_allowed(),
  * except that @cpu doesn't need to be online, and the thread must be
- * stopped (i.e., just returned from kthread_create().
+ * stopped (i.e., just returned from kthread_create()).
  */
 void kthread_bind(struct task_struct *k, unsigned int cpu)
 {
diff --git a/kernel/printk.c b/kernel/printk.c
index c770e1a4e88..3e79e18dce3 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -483,7 +483,7 @@ static int have_callable_console(void)
  * printk - print a kernel message
  * @fmt: format string
  *
- * This is printk.  It can be called from any context.  We want it to work.
+ * This is printk().  It can be called from any context.  We want it to work.
  *
  * We try to grab the console_sem.  If we succeed, it's easy - we log the output and
  * call the console drivers.  If we fail to get the semaphore we place the output
diff --git a/kernel/relay.c b/kernel/relay.c
index ef923f6de2e..ef8a935710a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -328,7 +328,7 @@ static void wakeup_readers(struct work_struct *work)
  *	@buf: the channel buffer
  *	@init: 1 if this is a first-time initialization
  *
- *	See relay_reset for description of effect.
+ *	See relay_reset() for description of effect.
  */
 static void __relay_reset(struct rchan_buf *buf, unsigned int init)
 {
@@ -364,7 +364,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
  *	and restarting the channel in its initial state.  The buffers
  *	are not freed, so any mappings are still in effect.
  *
- *	NOTE: Care should be taken that the channel isn't actually
+ *	NOTE. Care should be taken that the channel isn't actually
  *	being used by anything when this call is made.
  */
 void relay_reset(struct rchan *chan)
@@ -528,7 +528,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
  *	Creates a channel buffer for each cpu using the sizes and
  *	attributes specified.  The created channel buffer files
  *	will be named base_filename0...base_filenameN-1.  File
- *	permissions will be S_IRUSR.
+ *	permissions will be %S_IRUSR.
  */
 struct rchan *relay_open(const char *base_filename,
 			 struct dentry *parent,
@@ -648,7 +648,7 @@ EXPORT_SYMBOL_GPL(relay_switch_subbuf);
  *	subbufs_consumed should be the number of sub-buffers newly consumed,
  *	not the total consumed.
  *
- *	NOTE: Kernel clients don't need to call this function if the channel
+ *	NOTE. Kernel clients don't need to call this function if the channel
  *	mode is 'overwrite'.
  */
 void relay_subbufs_consumed(struct rchan *chan,
@@ -749,7 +749,7 @@ static int relay_file_open(struct inode *inode, struct file *filp)
  *	@filp: the file
  *	@vma: the vma describing what to map
  *
- *	Calls upon relay_mmap_buf to map the file into user space.
+ *	Calls upon relay_mmap_buf() to map the file into user space.
  */
 static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
 {
@@ -891,7 +891,7 @@ static size_t relay_file_read_subbuf_avail(size_t read_pos,
  *	@read_pos: file read position
  *	@buf: relay channel buffer
  *
- *	If the read_pos is in the middle of padding, return the
+ *	If the @read_pos is in the middle of padding, return the
  *	position of the first actually available byte, otherwise
  *	return the original value.
  */
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cd4ee769e2..1fd67e16cd3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4203,13 +4203,12 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
 }
 
 /**
- * sched_setscheduler - change the scheduling policy and/or RT priority of
- * a thread.
+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
  * @p: the task in question.
  * @policy: new policy.
  * @param: structure containing the new RT priority.
  *
- * NOTE: the task may be already dead
+ * NOTE that the task may be already dead.
  */
 int sched_setscheduler(struct task_struct *p, int policy,
 		       struct sched_param *param)
@@ -4577,7 +4576,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
 /**
  * sys_sched_yield - yield the current processor to other threads.
  *
- * this function yields the current CPU by moving the calling thread
+ * This function yields the current CPU by moving the calling thread
  * to the expired array. If there are no other threads running on this
  * CPU then this function will return.
  */
@@ -4704,7 +4703,7 @@ EXPORT_SYMBOL(cond_resched_softirq);
 /**
  * yield - yield the current processor to other threads.
  *
- * this is a shortcut for kernel-space yielding - it marks the
+ * This is a shortcut for kernel-space yielding - it marks the
  * thread runnable and calls sys_sched_yield().
  */
 void __sched yield(void)
diff --git a/kernel/signal.c b/kernel/signal.c
index ea4632bd40a..228fdb5c01d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2282,7 +2282,7 @@ static int do_tkill(int tgid, int pid, int sig)
  *  @pid: the PID of the thread
  *  @sig: signal to be sent
  *
- *  This syscall also checks the tgid and returns -ESRCH even if the PID
+ *  This syscall also checks the @tgid and returns -ESRCH even if the PID
  *  exists but it's not belonging to the target process anymore. This
  *  method solves the problem of threads exiting and PIDs getting reused.
  */
diff --git a/kernel/sys.c b/kernel/sys.c
index 6e2101dec0f..e1024383314 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -215,7 +215,7 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
  *	This routine uses RCU to synchronize with changes to the chain.
  *
  *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain
+ *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
  *	will return immediately, with the return value of
  *	the notifier function which halted execution.
  *	Otherwise the return value is the return value
@@ -313,7 +313,7 @@ EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
  *	run in a process context, so they are allowed to block.
  *
  *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain
+ *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
  *	will return immediately, with the return value of
  *	the notifier function which halted execution.
  *	Otherwise the return value is the return value
@@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
  *	All locking must be provided by the caller.
  *
  *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain
+ *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
  *	will return immediately, with the return value of
  *	the notifier function which halted execution.
  *	Otherwise the return value is the return value
@@ -487,7 +487,7 @@ EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
  *	run in a process context, so they are allowed to block.
  *
  *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain
+ *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
  *	will return immediately, with the return value of
  *	the notifier function which halted execution.
  *	Otherwise the return value is the return value
@@ -538,7 +538,7 @@ EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
  *	Registers a function with the list of functions
  *	to be called at reboot time.
  *
- *	Currently always returns zero, as blocking_notifier_chain_register
+ *	Currently always returns zero, as blocking_notifier_chain_register()
  *	always returns zero.
  */
  
diff --git a/kernel/timer.c b/kernel/timer.c
index d38801a9586..31ab627df8a 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -85,7 +85,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
  * @j: the time in (absolute) jiffies that should be rounded
  * @cpu: the processor number on which the timeout will happen
  *
- * __round_jiffies rounds an absolute time in the future (in jiffies)
+ * __round_jiffies() rounds an absolute time in the future (in jiffies)
  * up or down to (approximately) full seconds. This is useful for timers
  * for which the exact time they fire does not matter too much, as long as
  * they fire approximately every X seconds.
@@ -98,7 +98,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
  * processors firing at the exact same time, which could lead
  * to lock contention or spurious cache line bouncing.
  *
- * The return value is the rounded version of the "j" parameter.
+ * The return value is the rounded version of the @j parameter.
  */
 unsigned long __round_jiffies(unsigned long j, int cpu)
 {
@@ -142,7 +142,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
  * @j: the time in (relative) jiffies that should be rounded
  * @cpu: the processor number on which the timeout will happen
  *
- * __round_jiffies_relative rounds a time delta  in the future (in jiffies)
+ * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
  * up or down to (approximately) full seconds. This is useful for timers
  * for which the exact time they fire does not matter too much, as long as
  * they fire approximately every X seconds.
@@ -155,7 +155,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
  * processors firing at the exact same time, which could lead
  * to lock contention or spurious cache line bouncing.
  *
- * The return value is the rounded version of the "j" parameter.
+ * The return value is the rounded version of the @j parameter.
  */
 unsigned long __round_jiffies_relative(unsigned long j, int cpu)
 {
@@ -173,7 +173,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
  * round_jiffies - function to round jiffies to a full second
  * @j: the time in (absolute) jiffies that should be rounded
  *
- * round_jiffies rounds an absolute time in the future (in jiffies)
+ * round_jiffies() rounds an absolute time in the future (in jiffies)
  * up or down to (approximately) full seconds. This is useful for timers
  * for which the exact time they fire does not matter too much, as long as
  * they fire approximately every X seconds.
@@ -182,7 +182,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
  * at the same time, rather than at various times spread out. The goal
  * of this is to have the CPU wake up less, which saves power.
  *
- * The return value is the rounded version of the "j" parameter.
+ * The return value is the rounded version of the @j parameter.
  */
 unsigned long round_jiffies(unsigned long j)
 {
@@ -194,7 +194,7 @@ EXPORT_SYMBOL_GPL(round_jiffies);
  * round_jiffies_relative - function to round jiffies to a full second
  * @j: the time in (relative) jiffies that should be rounded
  *
- * round_jiffies_relative rounds a time delta  in the future (in jiffies)
+ * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
  * up or down to (approximately) full seconds. This is useful for timers
  * for which the exact time they fire does not matter too much, as long as
  * they fire approximately every X seconds.
@@ -203,7 +203,7 @@ EXPORT_SYMBOL_GPL(round_jiffies);
  * at the same time, rather than at various times spread out. The goal
  * of this is to have the CPU wake up less, which saves power.
  *
- * The return value is the rounded version of the "j" parameter.
+ * The return value is the rounded version of the @j parameter.
  */
 unsigned long round_jiffies_relative(unsigned long j)
 {
@@ -387,7 +387,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
  * @timer: the timer to be modified
  * @expires: new timeout in jiffies
  *
- * mod_timer is a more efficient way to update the expire field of an
+ * mod_timer() is a more efficient way to update the expire field of an
  * active timer (if the timer is inactive it will be activated)
  *
  * mod_timer(timer, expires) is equivalent to:
@@ -490,7 +490,7 @@ out:
  * the timer it also makes sure the handler has finished executing on other
  * CPUs.
  *
- * Synchronization rules: callers must prevent restarting of the timer,
+ * Synchronization rules: Callers must prevent restarting of the timer,
  * otherwise this function is meaningless. It must not be called from
  * interrupt contexts. The caller must not hold locks which would prevent
  * completion of the timer's handler. The timer's handler must not call
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a3da07c5af2..020d1fff57d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -656,8 +656,7 @@ void flush_scheduled_work(void)
 EXPORT_SYMBOL(flush_scheduled_work);
 
 /**
- * cancel_rearming_delayed_workqueue - reliably kill off a delayed
- *			work whose handler rearms the delayed work.
+ * cancel_rearming_delayed_workqueue - reliably kill off a delayed work whose handler rearms the delayed work.
  * @wq:   the controlling workqueue structure
  * @dwork: the delayed work struct
  */
@@ -670,8 +669,7 @@ void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
 EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
 
 /**
- * cancel_rearming_delayed_work - reliably kill off a delayed keventd
- *			work whose handler rearms the delayed work.
+ * cancel_rearming_delayed_work - reliably kill off a delayed keventd work whose handler rearms the delayed work.
  * @dwork: the delayed work struct
  */
 void cancel_rearming_delayed_work(struct delayed_work *dwork)
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 037fa9aa2ed..ee6e58fce8f 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -95,7 +95,7 @@ void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
 }
 EXPORT_SYMBOL(__bitmap_complement);
 
-/*
+/**
  * __bitmap_shift_right - logical right shift of the bits in a bitmap
  *   @dst - destination bitmap
  *   @src - source bitmap
@@ -139,7 +139,7 @@ void __bitmap_shift_right(unsigned long *dst,
 EXPORT_SYMBOL(__bitmap_shift_right);
 
 
-/*
+/**
  * __bitmap_shift_left - logical left shift of the bits in a bitmap
  *   @dst - destination bitmap
  *   @src - source bitmap
@@ -529,7 +529,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 }
 EXPORT_SYMBOL(bitmap_parselist);
 
-/*
+/**
  * bitmap_pos_to_ord(buf, pos, bits)
  *	@buf: pointer to a bitmap
  *	@pos: a bit position in @buf (0 <= @pos < @bits)
@@ -804,7 +804,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
  *	@pos: beginning of bit region to release
  *	@order: region size (log base 2 of number of bits) to release
  *
- * This is the complement to __bitmap_find_free_region and releases
+ * This is the complement to __bitmap_find_free_region() and releases
  * the found region (by clearing it in the bitmap).
  *
  * No return value.
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 8a5b5303bd4..f596c08d213 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -43,10 +43,10 @@ static int get_range(char **str, int *pint)
  *	comma as well.
  *
  *	Return values:
- *	0 : no int in string
- *	1 : int found, no subsequent comma
- *	2 : int found including a subsequent comma
- *	3 : hyphen found to denote a range
+ *	0 - no int in string
+ *	1 - int found, no subsequent comma
+ *	2 - int found including a subsequent comma
+ *	3 - hyphen found to denote a range
  */
 
 int get_option (char **str, int *pint)
diff --git a/lib/idr.c b/lib/idr.c
index 71853531d3b..305117ca2d4 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -329,8 +329,8 @@ static void sub_remove(struct idr *idp, int shift, int id)
 
 /**
  * idr_remove - remove the given id and free it's slot
- * idp: idr handle
- * id: uniqueue key
+ * @idp: idr handle
+ * @id: unique key
  */
 void idr_remove(struct idr *idp, int id)
 {
diff --git a/lib/kobject.c b/lib/kobject.c
index c2917ffe8bf..2782f49e906 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -97,11 +97,12 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length)
 }
 
 /**
- * kobject_get_path - generate and return the path associated with a given kobj
- * and kset pair.  The result must be freed by the caller with kfree().
+ * kobject_get_path - generate and return the path associated with a given kobj and kset pair.
  *
  * @kobj:	kobject in question, with which to build the path
  * @gfp_mask:	the allocation type used to allocate the path
+ *
+ * The result must be freed by the caller with kfree().
  */
 char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
 {
diff --git a/lib/sha1.c b/lib/sha1.c
index 1cdabe3065f..4c45fd50e91 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -20,8 +20,8 @@
 #define K3  0x8F1BBCDCL			/* Rounds 40-59: sqrt(5) * 2^30 */
 #define K4  0xCA62C1D6L			/* Rounds 60-79: sqrt(10) * 2^30 */
 
-/*
- * sha_transform: single block SHA1 transform
+/**
+ * sha_transform - single block SHA1 transform
  *
  * @digest: 160 bit digest to update
  * @data:   512 bits of data to hash
@@ -80,9 +80,8 @@ void sha_transform(__u32 *digest, const char *in, __u32 *W)
 }
 EXPORT_SYMBOL(sha_transform);
 
-/*
- * sha_init: initialize the vectors for a SHA1 digest
- *
+/**
+ * sha_init - initialize the vectors for a SHA1 digest
  * @buf: vector to initialize
  */
 void sha_init(__u32 *buf)
diff --git a/lib/sort.c b/lib/sort.c
index 488788b341c..961567894d1 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -27,7 +27,7 @@ static void generic_swap(void *a, void *b, int size)
 	} while (--size > 0);
 }
 
-/*
+/**
  * sort - sort an array of elements
  * @base: pointer to data to sort
  * @num: number of elements
diff --git a/lib/string.c b/lib/string.c
index a485d75962a..bab440fb0df 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -160,7 +160,7 @@ EXPORT_SYMBOL(strcat);
  * @src: The string to append to it
  * @count: The maximum numbers of bytes to copy
  *
- * Note that in contrast to strncpy, strncat ensures the result is
+ * Note that in contrast to strncpy(), strncat() ensures the result is
  * terminated.
  */
 char *strncat(char *dest, const char *src, size_t count)
@@ -366,8 +366,7 @@ EXPORT_SYMBOL(strnlen);
 
 #ifndef __HAVE_ARCH_STRSPN
 /**
- * strspn - Calculate the length of the initial substring of @s which only
- * 	contain letters in @accept
+ * strspn - Calculate the length of the initial substring of @s which only contain letters in @accept
  * @s: The string to be searched
  * @accept: The string to search for
  */
@@ -394,8 +393,7 @@ EXPORT_SYMBOL(strspn);
 
 #ifndef __HAVE_ARCH_STRCSPN
 /**
- * strcspn - Calculate the length of the initial substring of @s which does
- * 	not contain letters in @reject
+ * strcspn - Calculate the length of the initial substring of @s which does not contain letters in @reject
  * @s: The string to be searched
  * @reject: The string to avoid
  */
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 98bcadc0118..9e2a002c5b5 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -218,7 +218,7 @@ static unsigned int get_linear_data(unsigned int consumed, const u8 **dst,
  * Call textsearch_next() to retrieve subsequent matches.
  *
  * Returns the position of first occurrence of the pattern or
- * UINT_MAX if no occurrence was found.
+ * %UINT_MAX if no occurrence was found.
  */ 
 unsigned int textsearch_find_continuous(struct ts_config *conf,
 					struct ts_state *state,
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index bed7229378f..44f0e339a94 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -247,12 +247,12 @@ static char * number(char * buf, char * end, unsigned long long num, int base, i
  * be generated for the given input, excluding the trailing
  * '\0', as per ISO C99. If you want to have the exact
  * number of characters written into @buf as return value
- * (not including the trailing '\0'), use vscnprintf. If the
+ * (not including the trailing '\0'), use vscnprintf(). If the
  * return is greater than or equal to @size, the resulting
  * string is truncated.
  *
  * Call this function if you are already dealing with a va_list.
- * You probably want snprintf instead.
+ * You probably want snprintf() instead.
  */
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
@@ -509,7 +509,7 @@ EXPORT_SYMBOL(vsnprintf);
  * returns 0.
  *
  * Call this function if you are already dealing with a va_list.
- * You probably want scnprintf instead.
+ * You probably want scnprintf() instead.
  */
 int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
@@ -577,11 +577,11 @@ EXPORT_SYMBOL(scnprintf);
  * @args: Arguments for the format string
  *
  * The function returns the number of characters written
- * into @buf. Use vsnprintf or vscnprintf in order to avoid
+ * into @buf. Use vsnprintf() or vscnprintf() in order to avoid
  * buffer overflows.
  *
  * Call this function if you are already dealing with a va_list.
- * You probably want sprintf instead.
+ * You probably want sprintf() instead.
  */
 int vsprintf(char *buf, const char *fmt, va_list args)
 {
@@ -597,7 +597,7 @@ EXPORT_SYMBOL(vsprintf);
  * @...: Arguments for the format string
  *
  * The function returns the number of characters written
- * into @buf. Use snprintf or scnprintf in order to avoid
+ * into @buf. Use snprintf() or scnprintf() in order to avoid
  * buffer overflows.
  */
 int sprintf(char * buf, const char *fmt, ...)
diff --git a/mm/filemap.c b/mm/filemap.c
index f30ef28405d..00414849a86 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -327,7 +327,7 @@ EXPORT_SYMBOL(sync_page_range);
  * @pos:	beginning offset in pages to write
  * @count:	number of bytes to write
  *
- * Note: Holding i_mutex across sync_page_range_nolock is not a good idea
+ * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea
  * as it forces O_SYNC writers to different parts of the same file
  * to be serialised right until io completion.
  */
@@ -784,7 +784,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
  * @mapping: target address_space
  * @index: the page index
  *
- * Same as grab_cache_page, but do not wait if the page is unavailable.
+ * Same as grab_cache_page(), but do not wait if the page is unavailable.
  * This is intended for speculative data generators, where the data can
  * be regenerated if the page couldn't be grabbed.  This routine should
  * be safe to call while holding the lock for another page.
diff --git a/mm/memory.c b/mm/memory.c
index 0e6a402d86b..072c1135ad3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1775,9 +1775,7 @@ restart:
 }
 
 /**
- * unmap_mapping_range - unmap the portion of all mmaps
- * in the specified address_space corresponding to the specified
- * page range in the underlying file.
+ * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
  * @mapping: the address space containing mmaps to be unmapped.
  * @holebegin: byte in first page to unmap, relative to the start of
  * the underlying file.  This will be rounded down to a PAGE_SIZE
diff --git a/mm/mempool.c b/mm/mempool.c
index ccd8cb8cd41..cc1ca86dfc2 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -46,9 +46,9 @@ static void free_pool(mempool_t *pool)
  * @pool_data: optional private data available to the user-defined functions.
  *
  * this function creates and allocates a guaranteed size, preallocated
- * memory pool. The pool can be used from the mempool_alloc and mempool_free
+ * memory pool. The pool can be used from the mempool_alloc() and mempool_free()
  * functions. This function might sleep. Both the alloc_fn() and the free_fn()
- * functions might sleep - as long as the mempool_alloc function is not called
+ * functions might sleep - as long as the mempool_alloc() function is not called
  * from IRQ contexts.
  */
 mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
@@ -195,7 +195,7 @@ EXPORT_SYMBOL(mempool_destroy);
  *             mempool_create().
  * @gfp_mask:  the usual allocation bitmask.
  *
- * this function only sleeps if the alloc_fn function sleeps or
+ * this function only sleeps if the alloc_fn() function sleeps or
  * returns NULL. Note that due to preallocation, this function
  * *never* fails when called from process contexts. (it might
  * fail if called from an IRQ context.)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 438833cbbca..fd96a555e50 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -549,9 +549,7 @@ void __init page_writeback_init(void)
 }
 
 /**
- * generic_writepages - walk the list of dirty pages of the given
- *                      address space and writepage() all of them.
- *
+ * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them.
  * @mapping: address space structure to write
  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
  *
@@ -698,7 +696,6 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 /**
  * write_one_page - write out a single page and optionally wait on I/O
- *
  * @page: the page to write
  * @wait: if true, wait on writeout
  *
diff --git a/mm/slab.c b/mm/slab.c
index 196df70eb8c..70784b848b6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2520,7 +2520,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
  * kmem_cache_destroy - delete a cache
  * @cachep: the cache to destroy
  *
- * Remove a struct kmem_cache object from the slab cache.
+ * Remove a &struct kmem_cache object from the slab cache.
  *
  * It is expected this function will be called by a module when it is
  * unloaded.  This will remove the cache completely, and avoid a duplicate
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 86897ee792d..9eef486da90 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -699,7 +699,7 @@ finished:
  *	that it is big enough to cover the vma. Will return failure if
  *	that criteria isn't met.
  *
- *	Similar to remap_pfn_range (see mm/memory.c)
+ *	Similar to remap_pfn_range() (see mm/memory.c)
  */
 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 						unsigned long pgoff)
-- 
cgit v1.2.3-70-g09d2


From d4d23add3abcd18d8021b99f230df608ccb2f007 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@parisc-linux.org>
Date: Sat, 10 Feb 2007 01:46:00 -0800
Subject: [PATCH] Common compat_sys_sysinfo

I noticed that almost all architectures implemented exactly the same
sys32_sysinfo...  except parisc, where a bug was to be found in handling of
the uptime.  So let's remove a whole whack of code for fun and profit.
Cribbed compat_sys_sysinfo from x86_64's implementation, since I figured it
would be the best tested.

This patch incorporates Arnd's suggestion of not using set_fs/get_fs, but
instead extracting out the common code from sys_sysinfo.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/ia32/ia32_entry.S        |  2 +-
 arch/ia64/ia32/sys_ia32.c          | 68 --------------------------------------
 arch/mips/kernel/linux32.c         | 44 ------------------------
 arch/mips/kernel/scall64-n32.S     |  2 +-
 arch/mips/kernel/scall64-o32.S     |  2 +-
 arch/parisc/kernel/sys_parisc32.c  | 64 -----------------------------------
 arch/parisc/kernel/syscall_table.S |  2 +-
 arch/powerpc/kernel/sys_ppc32.c    | 67 -------------------------------------
 arch/s390/kernel/compat_linux.c    | 45 -------------------------
 arch/s390/kernel/compat_wrapper.S  |  6 ++--
 arch/s390/kernel/syscalls.S        |  2 +-
 arch/sparc64/kernel/sys_sparc32.c  | 64 -----------------------------------
 arch/sparc64/kernel/systbls.S      |  2 +-
 arch/x86_64/ia32/ia32entry.S       |  2 +-
 arch/x86_64/ia32/sys_ia32.c        | 66 ------------------------------------
 include/linux/kernel.h             |  3 ++
 kernel/compat.c                    | 66 ++++++++++++++++++++++++++++++++++++
 kernel/timer.c                     | 58 ++++++++++++++++++--------------
 18 files changed, 112 insertions(+), 453 deletions(-)

(limited to 'kernel')

diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index a32cd59b81e..687e5fdc968 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -326,7 +326,7 @@ ia32_syscall_table:
 	data8 sys_ni_syscall
 	data8 compat_sys_wait4
 	data8 sys_swapoff	  /* 115 */
-	data8 sys32_sysinfo
+	data8 compat_sys_sysinfo
 	data8 sys32_ipc
 	data8 sys_fsync
 	data8 sys32_sigreturn
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 957681c39ad..d430d36ae49 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -2209,74 +2209,6 @@ sys32_fstat64 (unsigned int fd, struct stat64 __user *statbuf)
 	return ret;
 }
 
-struct sysinfo32 {
-	s32 uptime;
-	u32 loads[3];
-	u32 totalram;
-	u32 freeram;
-	u32 sharedram;
-	u32 bufferram;
-	u32 totalswap;
-	u32 freeswap;
-	u16 procs;
-	u16 pad;
-	u32 totalhigh;
-	u32 freehigh;
-	u32 mem_unit;
-	char _f[8];
-};
-
-asmlinkage long
-sys32_sysinfo (struct sysinfo32 __user *info)
-{
-	struct sysinfo s;
-	long ret, err;
-	int bitcount = 0;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_sysinfo((struct sysinfo __user *) &s);
-	set_fs(old_fs);
-	/* Check to see if any memory value is too large for 32-bit and
-	 * scale down if needed.
-	 */
-	if ((s.totalram >> 32) || (s.totalswap >> 32)) {
-		while (s.mem_unit < PAGE_SIZE) {
-			s.mem_unit <<= 1;
-			bitcount++;
-		}
-		s.totalram >>= bitcount;
-		s.freeram >>= bitcount;
-		s.sharedram >>= bitcount;
-		s.bufferram >>= bitcount;
-		s.totalswap >>= bitcount;
-		s.freeswap >>= bitcount;
-		s.totalhigh >>= bitcount;
-		s.freehigh >>= bitcount;
-	}
-
-	if (!access_ok(VERIFY_WRITE, info, sizeof(*info)))
-		return -EFAULT;
-
-	err  = __put_user(s.uptime, &info->uptime);
-	err |= __put_user(s.loads[0], &info->loads[0]);
-	err |= __put_user(s.loads[1], &info->loads[1]);
-	err |= __put_user(s.loads[2], &info->loads[2]);
-	err |= __put_user(s.totalram, &info->totalram);
-	err |= __put_user(s.freeram, &info->freeram);
-	err |= __put_user(s.sharedram, &info->sharedram);
-	err |= __put_user(s.bufferram, &info->bufferram);
-	err |= __put_user(s.totalswap, &info->totalswap);
-	err |= __put_user(s.freeswap, &info->freeswap);
-	err |= __put_user(s.procs, &info->procs);
-	err |= __put_user (s.totalhigh, &info->totalhigh);
-	err |= __put_user (s.freehigh, &info->freehigh);
-	err |= __put_user (s.mem_unit, &info->mem_unit);
-	if (err)
-		return -EFAULT;
-	return ret;
-}
-
 asmlinkage long
 sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec __user *interval)
 {
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 0b8ce59429a..ca7ad78f4de 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -193,50 +193,6 @@ sysn32_waitid(int which, compat_pid_t pid,
 	return ret;
 }
 
-struct sysinfo32 {
-	s32 uptime;
-	u32 loads[3];
-	u32 totalram;
-	u32 freeram;
-	u32 sharedram;
-	u32 bufferram;
-	u32 totalswap;
-	u32 freeswap;
-	u16 procs;
-	u32 totalhigh;
-	u32 freehigh;
-	u32 mem_unit;
-	char _f[8];
-};
-
-asmlinkage int sys32_sysinfo(struct sysinfo32 __user *info)
-{
-	struct sysinfo s;
-	int ret, err;
-	mm_segment_t old_fs = get_fs ();
-
-	set_fs (KERNEL_DS);
-	ret = sys_sysinfo((struct sysinfo __user *)&s);
-	set_fs (old_fs);
-	err = put_user (s.uptime, &info->uptime);
-	err |= __put_user (s.loads[0], &info->loads[0]);
-	err |= __put_user (s.loads[1], &info->loads[1]);
-	err |= __put_user (s.loads[2], &info->loads[2]);
-	err |= __put_user (s.totalram, &info->totalram);
-	err |= __put_user (s.freeram, &info->freeram);
-	err |= __put_user (s.sharedram, &info->sharedram);
-	err |= __put_user (s.bufferram, &info->bufferram);
-	err |= __put_user (s.totalswap, &info->totalswap);
-	err |= __put_user (s.freeswap, &info->freeswap);
-	err |= __put_user (s.procs, &info->procs);
-	err |= __put_user (s.totalhigh, &info->totalhigh);
-	err |= __put_user (s.freehigh, &info->freehigh);
-	err |= __put_user (s.mem_unit, &info->mem_unit);
-	if (err)
-		return -EFAULT;
-	return ret;
-}
-
 #define RLIM_INFINITY32	0x7fffffff
 #define RESOURCE32(x) ((x > RLIM_INFINITY32) ? RLIM_INFINITY32 : x)
 
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 39add2341aa..ee8802b5975 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -217,7 +217,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys32_gettimeofday
 	PTR	compat_sys_getrlimit		/* 6095 */
 	PTR	compat_sys_getrusage
-	PTR	sys32_sysinfo
+	PTR	compat_sys_sysinfo
 	PTR	compat_sys_times
 	PTR	sys32_ptrace
 	PTR	sys_getuid			/* 6100 */
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index c58b8e0105e..c5f590ca99b 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -321,7 +321,7 @@ sys_call_table:
 	PTR	sys_ni_syscall			/* sys_vm86 */
 	PTR	compat_sys_wait4
 	PTR	sys_swapoff			/* 4115 */
-	PTR	sys32_sysinfo
+	PTR	compat_sys_sysinfo
 	PTR	sys32_ipc
 	PTR	sys_fsync
 	PTR	sys32_sigreturn
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 29be4377aca..ce3245f87fd 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -579,70 +579,6 @@ asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *off
 }
 
 
-struct sysinfo32 {
-	s32 uptime;
-	u32 loads[3];
-	u32 totalram;
-	u32 freeram;
-	u32 sharedram;
-	u32 bufferram;
-	u32 totalswap;
-	u32 freeswap;
-	unsigned short procs;
-	u32 totalhigh;
-	u32 freehigh;
-	u32 mem_unit;
-	char _f[12];
-};
-
-/* We used to call sys_sysinfo and translate the result.  But sys_sysinfo
- * undoes the good work done elsewhere, and rather than undoing the
- * damage, I decided to just duplicate the code from sys_sysinfo here.
- */
-
-asmlinkage int sys32_sysinfo(struct sysinfo32 __user *info)
-{
-	struct sysinfo val;
-	int err;
-	unsigned long seq;
-
-	/* We don't need a memset here because we copy the
-	 * struct to userspace once element at a time.
-	 */
-
-	do {
-		seq = read_seqbegin(&xtime_lock);
-		val.uptime = jiffies / HZ;
-
-		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
-
-		val.procs = nr_threads;
-	} while (read_seqretry(&xtime_lock, seq));
-
-
-	si_meminfo(&val);
-	si_swapinfo(&val);
-	
-	err = put_user (val.uptime, &info->uptime);
-	err |= __put_user (val.loads[0], &info->loads[0]);
-	err |= __put_user (val.loads[1], &info->loads[1]);
-	err |= __put_user (val.loads[2], &info->loads[2]);
-	err |= __put_user (val.totalram, &info->totalram);
-	err |= __put_user (val.freeram, &info->freeram);
-	err |= __put_user (val.sharedram, &info->sharedram);
-	err |= __put_user (val.bufferram, &info->bufferram);
-	err |= __put_user (val.totalswap, &info->totalswap);
-	err |= __put_user (val.freeswap, &info->freeswap);
-	err |= __put_user (val.procs, &info->procs);
-	err |= __put_user (val.totalhigh, &info->totalhigh);
-	err |= __put_user (val.freehigh, &info->freehigh);
-	err |= __put_user (val.mem_unit, &info->mem_unit);
-	return err ? -EFAULT : 0;
-}
-
-
 /* lseek() needs a wrapper because 'offset' can be negative, but the top
  * half of the argument has been zeroed by syscall.S.
  */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 701d66a596e..be8eb9a0d24 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -197,7 +197,7 @@
 	/* struct rusage contains longs... */
 	ENTRY_COMP(wait4)
 	ENTRY_SAME(swapoff)		/* 115 */
-	ENTRY_DIFF(sysinfo)
+	ENTRY_COMP(sysinfo)
 	ENTRY_SAME(shutdown)
 	ENTRY_SAME(fsync)
 	ENTRY_SAME(madvise)
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 03a2a2f30d6..673e8d9df7f 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -198,73 +198,6 @@ static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
 		 __put_user(i->tv_usec, &o->tv_usec)));
 }
 
-struct sysinfo32 {
-        s32 uptime;
-        u32 loads[3];
-        u32 totalram;
-        u32 freeram;
-        u32 sharedram;
-        u32 bufferram;
-        u32 totalswap;
-        u32 freeswap;
-        unsigned short procs;
-	unsigned short pad;
-	u32 totalhigh;
-	u32 freehigh;
-	u32 mem_unit;
-	char _f[20-2*sizeof(int)-sizeof(int)];
-};
-
-asmlinkage long compat_sys_sysinfo(struct sysinfo32 __user *info)
-{
-	struct sysinfo s;
-	int ret, err;
-	int bitcount=0;
-	mm_segment_t old_fs = get_fs ();
-	
-	/* The __user cast is valid due to set_fs() */
-	set_fs (KERNEL_DS);
-	ret = sys_sysinfo((struct sysinfo __user *)&s);
-	set_fs (old_fs);
-
-	/* Check to see if any memory value is too large for 32-bit and
-         * scale down if needed.
-         */
-	if ((s.totalram >> 32) || (s.totalswap >> 32)) {
-	    while (s.mem_unit < PAGE_SIZE) {
-		s.mem_unit <<= 1;
-		bitcount++;
-	    }
-	    s.totalram >>=bitcount;
-	    s.freeram >>= bitcount;
-	    s.sharedram >>= bitcount;
-	    s.bufferram >>= bitcount;
-	    s.totalswap >>= bitcount;
-	    s.freeswap >>= bitcount;
-	    s.totalhigh >>= bitcount;
-	    s.freehigh >>= bitcount;
-	}
-
-	err = put_user (s.uptime, &info->uptime);
-	err |= __put_user (s.loads[0], &info->loads[0]);
-	err |= __put_user (s.loads[1], &info->loads[1]);
-	err |= __put_user (s.loads[2], &info->loads[2]);
-	err |= __put_user (s.totalram, &info->totalram);
-	err |= __put_user (s.freeram, &info->freeram);
-	err |= __put_user (s.sharedram, &info->sharedram);
-	err |= __put_user (s.bufferram, &info->bufferram);
-	err |= __put_user (s.totalswap, &info->totalswap);
-	err |= __put_user (s.freeswap, &info->freeswap);
-	err |= __put_user (s.procs, &info->procs);
-	err |= __put_user (s.totalhigh, &info->totalhigh);
-	err |= __put_user (s.freehigh, &info->freehigh);
-	err |= __put_user (s.mem_unit, &info->mem_unit);
-	if (err)
-		return -EFAULT;
-	
-	return ret;
-}
-
 
 
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 666bb6daa14..664c669b185 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -398,51 +398,6 @@ int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
 	return err;
 }
 
-struct sysinfo32 {
-        s32 uptime;
-        u32 loads[3];
-        u32 totalram;
-        u32 freeram;
-        u32 sharedram;
-        u32 bufferram;
-        u32 totalswap;
-        u32 freeswap;
-        unsigned short procs;
-	unsigned short pads;
-	u32 totalhigh;
-	u32 freehigh;
-	unsigned int mem_unit;
-        char _f[8];
-};
-
-asmlinkage long sys32_sysinfo(struct sysinfo32 __user *info)
-{
-	struct sysinfo s;
-	int ret, err;
-	mm_segment_t old_fs = get_fs ();
-	
-	set_fs (KERNEL_DS);
-	ret = sys_sysinfo((struct sysinfo __force __user *) &s);
-	set_fs (old_fs);
-	err = put_user (s.uptime, &info->uptime);
-	err |= __put_user (s.loads[0], &info->loads[0]);
-	err |= __put_user (s.loads[1], &info->loads[1]);
-	err |= __put_user (s.loads[2], &info->loads[2]);
-	err |= __put_user (s.totalram, &info->totalram);
-	err |= __put_user (s.freeram, &info->freeram);
-	err |= __put_user (s.sharedram, &info->sharedram);
-	err |= __put_user (s.bufferram, &info->bufferram);
-	err |= __put_user (s.totalswap, &info->totalswap);
-	err |= __put_user (s.freeswap, &info->freeswap);
-	err |= __put_user (s.procs, &info->procs);
-	err |= __put_user (s.totalhigh, &info->totalhigh);
-	err |= __put_user (s.freehigh, &info->freehigh);
-	err |= __put_user (s.mem_unit, &info->mem_unit);
-	if (err)
-		return -EFAULT;
-	return ret;
-}
-
 asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
 				struct compat_timespec __user *interval)
 {
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 71e54ef0931..97901296894 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -517,10 +517,10 @@ sys32_swapoff_wrapper:
 	llgtr	%r2,%r2			# const char *
 	jg	sys_swapoff		# branch to system call
 
-	.globl	sys32_sysinfo_wrapper
-sys32_sysinfo_wrapper:
+	.globl	compat_sys_sysinfo_wrapper
+compat_sys_sysinfo_wrapper:
 	llgtr	%r2,%r2			# struct sysinfo_emu31 *
-	jg	sys32_sysinfo		# branch to system call
+	jg	compat_sys_sysinfo	# branch to system call
 
 	.globl	sys32_ipc_wrapper
 sys32_ipc_wrapper:
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index a4ceae3dbcf..a52c44455bf 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -124,7 +124,7 @@ NI_SYSCALL							/* old "idle" system call */
 NI_SYSCALL							/* vm86old for i386 */
 SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper)
 SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper)		/* 115 */
-SYSCALL(sys_sysinfo,sys_sysinfo,sys32_sysinfo_wrapper)
+SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper)
 SYSCALL(sys_ipc,sys_ipc,sys32_ipc_wrapper)
 SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper)
 SYSCALL(sys_sigreturn_glue,sys_sigreturn_glue,sys32_sigreturn_glue)
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index e27cb71bd8e..7876a022628 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -459,70 +459,6 @@ asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2)
 	return sys_sysfs(option, arg1, arg2);
 }
 
-struct sysinfo32 {
-        s32 uptime;
-        u32 loads[3];
-        u32 totalram;
-        u32 freeram;
-        u32 sharedram;
-        u32 bufferram;
-        u32 totalswap;
-        u32 freeswap;
-        unsigned short procs;
-	unsigned short pad;
-	u32 totalhigh;
-	u32 freehigh;
-	u32 mem_unit;
-	char _f[20-2*sizeof(int)-sizeof(int)];
-};
-
-asmlinkage long sys32_sysinfo(struct sysinfo32 __user *info)
-{
-	struct sysinfo s;
-	int ret, err;
-	int bitcount = 0;
-	mm_segment_t old_fs = get_fs ();
-	
-	set_fs(KERNEL_DS);
-	ret = sys_sysinfo((struct sysinfo __user *) &s);
-	set_fs(old_fs);
-	/* Check to see if any memory value is too large for 32-bit and
-         * scale down if needed.
-         */
-	if ((s.totalram >> 32) || (s.totalswap >> 32)) {
-		while (s.mem_unit < PAGE_SIZE) {
-			s.mem_unit <<= 1;
-			bitcount++;
-		}
-		s.totalram >>= bitcount;
-		s.freeram >>= bitcount;
-		s.sharedram >>= bitcount;
-		s.bufferram >>= bitcount;
-		s.totalswap >>= bitcount;
-		s.freeswap >>= bitcount;
-		s.totalhigh >>= bitcount;
-		s.freehigh >>= bitcount;
-	}
-
-	err = put_user (s.uptime, &info->uptime);
-	err |= __put_user (s.loads[0], &info->loads[0]);
-	err |= __put_user (s.loads[1], &info->loads[1]);
-	err |= __put_user (s.loads[2], &info->loads[2]);
-	err |= __put_user (s.totalram, &info->totalram);
-	err |= __put_user (s.freeram, &info->freeram);
-	err |= __put_user (s.sharedram, &info->sharedram);
-	err |= __put_user (s.bufferram, &info->bufferram);
-	err |= __put_user (s.totalswap, &info->totalswap);
-	err |= __put_user (s.freeswap, &info->freeswap);
-	err |= __put_user (s.procs, &info->procs);
-	err |= __put_user (s.totalhigh, &info->totalhigh);
-	err |= __put_user (s.freehigh, &info->freehigh);
-	err |= __put_user (s.mem_unit, &info->mem_unit);
-	if (err)
-		return -EFAULT;
-	return ret;
-}
-
 asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
 {
 	struct timespec t;
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index 9a8026797ac..948b7d2d587 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -61,7 +61,7 @@ sys_call_table32:
 	.word sys32_epoll_wait, sys32_ioprio_set, sys_getppid, sys32_sigaction, sys_sgetmask
 /*200*/	.word sys32_ssetmask, sys_sigsuspend, compat_sys_newlstat, sys_uselib, compat_sys_old_readdir
 	.word sys32_readahead, sys32_socketcall, sys32_syslog, sys32_lookup_dcookie, sys32_fadvise64
-/*210*/	.word sys32_fadvise64_64, sys32_tgkill, sys32_waitpid, sys_swapoff, sys32_sysinfo
+/*210*/	.word sys32_fadvise64_64, sys32_tgkill, sys32_waitpid, sys_swapoff, compat_sys_sysinfo
 	.word sys32_ipc, sys32_sigreturn, sys_clone, sys32_ioprio_get, compat_sys_adjtimex
 /*220*/	.word sys32_sigprocmask, sys_ni_syscall, sys32_delete_module, sys_ni_syscall, sys32_getpgid
 	.word sys32_bdflush, sys32_sysfs, sys_nis_syscall, sys32_setfsuid16, sys32_setfsgid16
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index b4aa875e175..5f32cf4de5f 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -515,7 +515,7 @@ ia32_sys_call_table:
 	.quad sys32_vm86_warning	/* vm86old */ 
 	.quad compat_sys_wait4
 	.quad sys_swapoff		/* 115 */
-	.quad sys32_sysinfo
+	.quad compat_sys_sysinfo
 	.quad sys32_ipc
 	.quad sys_fsync
 	.quad stub32_sigreturn
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index c9bac3af29d..200fdde18d9 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -523,72 +523,6 @@ sys32_sysfs(int option, u32 arg1, u32 arg2)
 	return sys_sysfs(option, arg1, arg2);
 }
 
-struct sysinfo32 {
-        s32 uptime;
-        u32 loads[3];
-        u32 totalram;
-        u32 freeram;
-        u32 sharedram;
-        u32 bufferram;
-        u32 totalswap;
-        u32 freeswap;
-        unsigned short procs;
-	unsigned short pad; 
-        u32 totalhigh;
-        u32 freehigh;
-        u32 mem_unit;
-        char _f[20-2*sizeof(u32)-sizeof(int)];
-};
-
-asmlinkage long
-sys32_sysinfo(struct sysinfo32 __user *info)
-{
-	struct sysinfo s;
-	int ret;
-	mm_segment_t old_fs = get_fs ();
-	int bitcount = 0;
-	
-	set_fs (KERNEL_DS);
-	ret = sys_sysinfo((struct sysinfo __user *)&s);
-	set_fs (old_fs);
-
-        /* Check to see if any memory value is too large for 32-bit and scale
-	 *  down if needed
-	 */
-	if ((s.totalram >> 32) || (s.totalswap >> 32)) {
-		while (s.mem_unit < PAGE_SIZE) {
-			s.mem_unit <<= 1;
-			bitcount++;
-		}
-		s.totalram >>= bitcount;
-		s.freeram >>= bitcount;
-		s.sharedram >>= bitcount;
-		s.bufferram >>= bitcount;
-		s.totalswap >>= bitcount;
-		s.freeswap >>= bitcount;
-		s.totalhigh >>= bitcount;
-		s.freehigh >>= bitcount;
-	}
-
-	if (!access_ok(VERIFY_WRITE, info, sizeof(struct sysinfo32)) ||
-	    __put_user (s.uptime, &info->uptime) ||
-	    __put_user (s.loads[0], &info->loads[0]) ||
-	    __put_user (s.loads[1], &info->loads[1]) ||
-	    __put_user (s.loads[2], &info->loads[2]) ||
-	    __put_user (s.totalram, &info->totalram) ||
-	    __put_user (s.freeram, &info->freeram) ||
-	    __put_user (s.sharedram, &info->sharedram) ||
-	    __put_user (s.bufferram, &info->bufferram) ||
-	    __put_user (s.totalswap, &info->totalswap) ||
-	    __put_user (s.freeswap, &info->freeswap) ||
-	    __put_user (s.procs, &info->procs) ||
-	    __put_user (s.totalhigh, &info->totalhigh) || 
-	    __put_user (s.freehigh, &info->freehigh) ||
-	    __put_user (s.mem_unit, &info->mem_unit))
-		return -EFAULT;
-	return 0;
-}
-                
 asmlinkage long
 sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
 {
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e1a429ada97..7e861303cbd 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -313,6 +313,9 @@ static inline int __attribute__ ((format (printf, 1, 2))) pr_debug(const char *
 	(void)__tmp; \
 })
 
+struct sysinfo;
+extern int do_sysinfo(struct sysinfo *info);
+
 #endif /* __KERNEL__ */
 
 #define SI_LOAD_SHIFT	16
diff --git a/kernel/compat.c b/kernel/compat.c
index 6952dd05730..cebb4c28c03 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1016,3 +1016,69 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
 	return sys_migrate_pages(pid, nr_bits + 1, old, new);
 }
 #endif
+
+struct compat_sysinfo {
+	s32 uptime;
+	u32 loads[3];
+	u32 totalram;
+	u32 freeram;
+	u32 sharedram;
+	u32 bufferram;
+	u32 totalswap;
+	u32 freeswap;
+	u16 procs;
+	u16 pad;
+	u32 totalhigh;
+	u32 freehigh;
+	u32 mem_unit;
+	char _f[20-2*sizeof(u32)-sizeof(int)];
+};
+
+asmlinkage long
+compat_sys_sysinfo(struct compat_sysinfo __user *info)
+{
+	struct sysinfo s;
+
+	do_sysinfo(&s);
+
+	/* Check to see if any memory value is too large for 32-bit and scale
+	 *  down if needed
+	 */
+	if ((s.totalram >> 32) || (s.totalswap >> 32)) {
+		int bitcount = 0;
+
+		while (s.mem_unit < PAGE_SIZE) {
+			s.mem_unit <<= 1;
+			bitcount++;
+		}
+
+		s.totalram >>= bitcount;
+		s.freeram >>= bitcount;
+		s.sharedram >>= bitcount;
+		s.bufferram >>= bitcount;
+		s.totalswap >>= bitcount;
+		s.freeswap >>= bitcount;
+		s.totalhigh >>= bitcount;
+		s.freehigh >>= bitcount;
+	}
+
+	if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) ||
+	    __put_user (s.uptime, &info->uptime) ||
+	    __put_user (s.loads[0], &info->loads[0]) ||
+	    __put_user (s.loads[1], &info->loads[1]) ||
+	    __put_user (s.loads[2], &info->loads[2]) ||
+	    __put_user (s.totalram, &info->totalram) ||
+	    __put_user (s.freeram, &info->freeram) ||
+	    __put_user (s.sharedram, &info->sharedram) ||
+	    __put_user (s.bufferram, &info->bufferram) ||
+	    __put_user (s.totalswap, &info->totalswap) ||
+	    __put_user (s.freeswap, &info->freeswap) ||
+	    __put_user (s.procs, &info->procs) ||
+	    __put_user (s.totalhigh, &info->totalhigh) ||
+	    __put_user (s.freehigh, &info->freehigh) ||
+	    __put_user (s.mem_unit, &info->mem_unit))
+		return -EFAULT;
+
+	return 0;
+}
+
diff --git a/kernel/timer.c b/kernel/timer.c
index 31ab627df8a..8533c379608 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1392,17 +1392,16 @@ asmlinkage long sys_gettid(void)
 }
 
 /**
- * sys_sysinfo - fill in sysinfo struct
+ * do_sysinfo - fill in sysinfo struct
  * @info: pointer to buffer to fill
  */ 
-asmlinkage long sys_sysinfo(struct sysinfo __user *info)
+int do_sysinfo(struct sysinfo *info)
 {
-	struct sysinfo val;
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
 	unsigned long seq;
 
-	memset((char *)&val, 0, sizeof(struct sysinfo));
+	memset(info, 0, sizeof(struct sysinfo));
 
 	do {
 		struct timespec tp;
@@ -1422,17 +1421,17 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
 			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
 			tp.tv_sec++;
 		}
-		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+		info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
 
-		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+		info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+		info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+		info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 
-		val.procs = nr_threads;
+		info->procs = nr_threads;
 	} while (read_seqretry(&xtime_lock, seq));
 
-	si_meminfo(&val);
-	si_swapinfo(&val);
+	si_meminfo(info);
+	si_swapinfo(info);
 
 	/*
 	 * If the sum of all the available memory (i.e. ram + swap)
@@ -1443,11 +1442,11 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
 	 *  -Erik Andersen <andersee@debian.org>
 	 */
 
-	mem_total = val.totalram + val.totalswap;
-	if (mem_total < val.totalram || mem_total < val.totalswap)
+	mem_total = info->totalram + info->totalswap;
+	if (mem_total < info->totalram || mem_total < info->totalswap)
 		goto out;
 	bitcount = 0;
-	mem_unit = val.mem_unit;
+	mem_unit = info->mem_unit;
 	while (mem_unit > 1) {
 		bitcount++;
 		mem_unit >>= 1;
@@ -1459,22 +1458,31 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
 
 	/*
 	 * If mem_total did not overflow, multiply all memory values by
-	 * val.mem_unit and set it to 1.  This leaves things compatible
+	 * info->mem_unit and set it to 1.  This leaves things compatible
 	 * with 2.2.x, and also retains compatibility with earlier 2.4.x
 	 * kernels...
 	 */
 
-	val.mem_unit = 1;
-	val.totalram <<= bitcount;
-	val.freeram <<= bitcount;
-	val.sharedram <<= bitcount;
-	val.bufferram <<= bitcount;
-	val.totalswap <<= bitcount;
-	val.freeswap <<= bitcount;
-	val.totalhigh <<= bitcount;
-	val.freehigh <<= bitcount;
+	info->mem_unit = 1;
+	info->totalram <<= bitcount;
+	info->freeram <<= bitcount;
+	info->sharedram <<= bitcount;
+	info->bufferram <<= bitcount;
+	info->totalswap <<= bitcount;
+	info->freeswap <<= bitcount;
+	info->totalhigh <<= bitcount;
+	info->freehigh <<= bitcount;
+
+out:
+	return 0;
+}
+
+asmlinkage long sys_sysinfo(struct sysinfo __user *info)
+{
+	struct sysinfo val;
+
+	do_sysinfo(&val);
 
- out:
 	if (copy_to_user(info, &val, sizeof(struct sysinfo)))
 		return -EFAULT;
 
-- 
cgit v1.2.3-70-g09d2


From 11f57cedcf382574a1e41d6cec2349f287fcea67 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 10 Feb 2007 01:46:09 -0800
Subject: [PATCH] audit: fix audit_filter_user_rules() initialization bug

gcc emits this warning:

 kernel/auditfilter.c: In function 'audit_filter_user':
 kernel/auditfilter.c:1611: warning: 'state' is used uninitialized in this function

I tend to agree with gcc - there are a couple of plausible exit paths from
audit_filter_user_rules() where it does not set 'state', keeping the
variable uninitialized.  For example if a filter rule has an AUDIT_POSSIBLE
action.  Initialize to 'wont audit'.  Fix whitespace damage too.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/auditfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 9c8c23227c7..87865f8b4ce 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1601,8 +1601,8 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
 
 int audit_filter_user(struct netlink_skb_parms *cb, int type)
 {
+	enum audit_state state = AUDIT_DISABLED;
 	struct audit_entry *e;
-	enum audit_state   state;
 	int ret = 1;
 
 	rcu_read_lock();
-- 
cgit v1.2.3-70-g09d2


From e3e8a75d2acfc61ebf25524666a0a2c6abb0620c Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Sat, 10 Feb 2007 01:46:19 -0800
Subject: [PATCH] Extract and use wake_up_klogd()

Remove hack with printing space to wake up klogd.  Use explicit
wake_up_klogd().

See earlier discussion
http://groups.google.com/group/fa.linux.kernel/browse_frm/thread/75f496668409f58d/1a8f28983a51e1ff?lnk=st&q=wake_up_klogd+group%3Afa.linux.kernel&rnum=2#1a8f28983a51e1ff

Signed-off-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  1 +
 kernel/printk.c        | 10 ++++++++--
 lib/bust_spinlocks.c   | 10 +---------
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7e861303cbd..e91dce75bbc 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -176,6 +176,7 @@ static inline void console_verbose(void)
 }
 
 extern void bust_spinlocks(int yes);
+extern void wake_up_klogd(void);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
 extern int panic_timeout;
 extern int panic_on_oops;
diff --git a/kernel/printk.c b/kernel/printk.c
index 3e79e18dce3..4da26b06797 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -783,6 +783,12 @@ int is_console_locked(void)
 	return console_locked;
 }
 
+void wake_up_klogd(void)
+{
+	if (!oops_in_progress && waitqueue_active(&log_wait))
+		wake_up_interruptible(&log_wait);
+}
+
 /**
  * release_console_sem - unlock the console system
  *
@@ -825,8 +831,8 @@ void release_console_sem(void)
 	console_locked = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
-	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
-		wake_up_interruptible(&log_wait);
+	if (wake_klogd)
+		wake_up_klogd();
 }
 EXPORT_SYMBOL(release_console_sem);
 
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
index 0ee968e8e02..accb3565816 100644
--- a/lib/bust_spinlocks.c
+++ b/lib/bust_spinlocks.c
@@ -19,19 +19,11 @@ void __attribute__((weak)) bust_spinlocks(int yes)
 	if (yes) {
 		oops_in_progress = 1;
 	} else {
-		int loglevel_save = console_loglevel;
 #ifdef CONFIG_VT
 		unblank_screen();
 #endif
 		oops_in_progress = 0;
-		/*
-		 * OK, the message is on the console.  Now we call printk()
-		 * without oops_in_progress set so that printk() will give klogd
-		 * and the blanked console a poke.  Hold onto your hats...
-		 */
-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
-		printk(" ");
-		console_loglevel = loglevel_save;
+		wake_up_klogd();
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 1efc5da3cf567d2f6b795f9d2112ed97fec4ee7c Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Date: Sat, 10 Feb 2007 01:46:29 -0800
Subject: [PATCH] order of lockdep off/on in vprintk() should be changed

The order of locking between lockdep_off/on() and local_irq_save/restore() in
vprintk() should be changed.

* In kernel/printk.c :

vprintk() does :

preempt_disable()
local_irq_save()
lockdep_off()
spin_lock(&logbuf_lock)
spin_unlock(&logbuf_lock)
if(!down_trylock(&console_sem))
   up(&console_sem)
lockdep_on()
local_irq_restore()
preempt_enable()

The goals here is to make sure we do not call printk() recursively from
kernel/lockdep.c:__lock_acquire() (called from spin_* and down/up) nor from
kernel/lockdep.c:trace_hardirqs_on/off() (called from local_irq_restore/save).
It can then potentially call printk() through mark_held_locks/mark_lock.

It correctly protects against the spin_lock call and the up/down call, but it
does not protect against local_irq_restore. It could cause infinite recursive
printk/trace_hardirqs_on() calls when printk() is called from the
mark_lock() error handing path.

We should change the locking so it becomes correct :

preempt_disable()
lockdep_off()
local_irq_save()
spin_lock(&logbuf_lock)
spin_unlock(&logbuf_lock)
if(!down_trylock(&console_sem))
   up(&console_sem)
local_irq_restore()
lockdep_on()
preempt_enable()

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 4da26b06797..0c151877ff7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -529,7 +529,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 		zap_locks();
 
 	/* This stops the holder of console_sem just where we want him */
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	lockdep_off();
 	spin_lock(&logbuf_lock);
 	printk_cpu = smp_processor_id();
@@ -618,7 +618,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 			up(&console_sem);
 		}
 		lockdep_on();
-		local_irq_restore(flags);
+		raw_local_irq_restore(flags);
 	} else {
 		/*
 		 * Someone else owns the drivers.  We drop the spinlock, which
@@ -628,7 +628,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 		printk_cpu = UINT_MAX;
 		spin_unlock(&logbuf_lock);
 		lockdep_on();
-		local_irq_restore(flags);
+		raw_local_irq_restore(flags);
 	}
 
 	preempt_enable();
-- 
cgit v1.2.3-70-g09d2


From 501b9ebf43f9973c3e246c8fbd17144d81a989ef Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Sat, 10 Feb 2007 01:46:34 -0800
Subject: [PATCH] Fix apparent typo CONFIG_LOCKDEP_DEBUG

Replace the apparent typo CONFIG_LOCKDEP_DEBUG with the correct
CONFIG_DEBUG_LOCKDEP.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/lockdep_proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 57a547a2da3..88fc611b3ae 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -244,7 +244,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
 
 		sum_forward_deps += count_forward_deps(class);
 	}
-#ifdef CONFIG_LOCKDEP_DEBUG
+#ifdef CONFIG_DEBUG_LOCKDEP
 	DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
 #endif
 	seq_printf(m, " lock-classes:                  %11lu [max: %lu]\n",
-- 
cgit v1.2.3-70-g09d2


From 8d06087714b78e8921bd30b5c64202fe80c47339 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sat, 10 Feb 2007 01:46:38 -0800
Subject: [PATCH] _proc_do_string(): fix short reads

If you try to read things like /proc/sys/kernel/osrelease with single-byte
reads, you get just one byte and then EOF.  This is because _proc_do_string()
assumes that the caller is read()ing into a buffer which is large enough to
fit the whole string in a single hit.

Fix.

Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 84cab0ce44d..e0ac6cd79fc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1686,13 +1686,12 @@ static int _proc_do_string(void* data, int maxlen, int write,
 	size_t len;
 	char __user *p;
 	char c;
-	
-	if (!data || !maxlen || !*lenp ||
-	    (*ppos && !write)) {
+
+	if (!data || !maxlen || !*lenp) {
 		*lenp = 0;
 		return 0;
 	}
-	
+
 	if (write) {
 		len = 0;
 		p = buffer;
@@ -1713,6 +1712,15 @@ static int _proc_do_string(void* data, int maxlen, int write,
 		len = strlen(data);
 		if (len > maxlen)
 			len = maxlen;
+
+		if (*ppos > len) {
+			*lenp = 0;
+			return 0;
+		}
+
+		data += *ppos;
+		len  -= *ppos;
+
 		if (len > *lenp)
 			len = *lenp;
 		if (len)
-- 
cgit v1.2.3-70-g09d2


From 4b98d11b40f03382918796f3c5c936d5495d20a4 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 10 Feb 2007 01:46:45 -0800
Subject: [PATCH] ifdef ->rchar, ->wchar, ->syscr, ->syscw from task_struct

They are fat: 4x8 bytes in task_struct.
They are uncoditionally updated in every fork, read, write and sendfile.
They are used only if you have some "extended acct fields feature".

And please, please, please, read(2) knows about bytes, not characters,
why it is called "rchar"?

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c        |  4 ++++
 fs/read_write.c       | 24 ++++++++++++------------
 include/linux/sched.h | 40 ++++++++++++++++++++++++++++++++++++++++
 kernel/fork.c         |  2 ++
 4 files changed, 58 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1a979ea3b37..7fb37d6f286 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1810,17 +1810,21 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filld
 static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
 {
 	return sprintf(buffer,
+#ifdef CONFIG_TASK_XACCT
 			"rchar: %llu\n"
 			"wchar: %llu\n"
 			"syscr: %llu\n"
 			"syscw: %llu\n"
+#endif
 			"read_bytes: %llu\n"
 			"write_bytes: %llu\n"
 			"cancelled_write_bytes: %llu\n",
+#ifdef CONFIG_TASK_XACCT
 			(unsigned long long)task->rchar,
 			(unsigned long long)task->wchar,
 			(unsigned long long)task->syscr,
 			(unsigned long long)task->syscw,
+#endif
 			(unsigned long long)task->ioac.read_bytes,
 			(unsigned long long)task->ioac.write_bytes,
 			(unsigned long long)task->ioac.cancelled_write_bytes);
diff --git a/fs/read_write.c b/fs/read_write.c
index 707ac21700d..bcb0ef2aae3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -274,9 +274,9 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 				ret = do_sync_read(file, buf, count, pos);
 			if (ret > 0) {
 				fsnotify_access(file->f_path.dentry);
-				current->rchar += ret;
+				add_rchar(current, ret);
 			}
-			current->syscr++;
+			inc_syscr(current);
 		}
 	}
 
@@ -332,9 +332,9 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 				ret = do_sync_write(file, buf, count, pos);
 			if (ret > 0) {
 				fsnotify_modify(file->f_path.dentry);
-				current->wchar += ret;
+				add_wchar(current, ret);
 			}
-			current->syscw++;
+			inc_syscw(current);
 		}
 	}
 
@@ -675,8 +675,8 @@ sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
 	}
 
 	if (ret > 0)
-		current->rchar += ret;
-	current->syscr++;
+		add_rchar(current, ret);
+	inc_syscr(current);
 	return ret;
 }
 
@@ -696,8 +696,8 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
 	}
 
 	if (ret > 0)
-		current->wchar += ret;
-	current->syscw++;
+		add_wchar(current, ret);
+	inc_syscw(current);
 	return ret;
 }
 
@@ -779,12 +779,12 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
 
 	if (retval > 0) {
-		current->rchar += retval;
-		current->wchar += retval;
+		add_rchar(current, retval);
+		add_wchar(current, retval);
 	}
-	current->syscr++;
-	current->syscw++;
 
+	inc_syscr(current);
+	inc_syscw(current);
 	if (*ppos > max)
 		retval = -EOVERFLOW;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 44637353519..76c8e2dc48d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1013,8 +1013,10 @@ struct task_struct {
  * to a stack based synchronous wait) if its doing sync IO.
  */
 	wait_queue_t *io_wait;
+#ifdef CONFIG_TASK_XACCT
 /* i/o counters(bytes read/written, #syscalls */
 	u64 rchar, wchar, syscr, syscw;
+#endif
 	struct task_io_accounting ioac;
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
@@ -1649,6 +1651,44 @@ extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls);
 
 extern void normalize_rt_tasks(void);
 
+#ifdef CONFIG_TASK_XACCT
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+	tsk->rchar += amt;
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+	tsk->wchar += amt;
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+	tsk->syscr++;
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+	tsk->syscw++;
+}
+#else
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+}
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index d57118da73f..80284eb488c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1038,10 +1038,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->utime = cputime_zero;
 	p->stime = cputime_zero;
  	p->sched_time = 0;
+#ifdef CONFIG_TASK_XACCT
 	p->rchar = 0;		/* I/O counter: bytes read */
 	p->wchar = 0;		/* I/O counter: bytes written */
 	p->syscr = 0;		/* I/O counter: read syscalls */
 	p->syscw = 0;		/* I/O counter: write syscalls */
+#endif
 	task_io_accounting_init(p);
 	acct_clear_integrals(p);
 
-- 
cgit v1.2.3-70-g09d2


From 5ea8176994003483a18c8fed580901e2125f8a83 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sun, 11 Feb 2007 15:41:31 +0000
Subject: [PATCH] sort the devres mess out

* Split the implementation-agnostic stuff in separate files.
* Make sure that targets using non-default request_irq() pull
  kernel/irq/devres.o
* Introduce new symbols (HAS_IOPORT and HAS_IOMEM) defaulting to positive;
  allow architectures to turn them off (we needed these symbols anyway for
  dependencies of quite a few drivers).
* protect the ioport-related parts of lib/devres.o with CONFIG_HAS_IOPORT.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig           |   5 +
 arch/cris/Kconfig          |   3 +
 arch/h8300/Kconfig         |   3 +
 arch/h8300/kernel/Makefile |   4 +-
 arch/m32r/Kconfig          |   3 +
 arch/m68k/Kconfig          |   3 +
 arch/m68k/kernel/Makefile  |   4 +-
 arch/m68knommu/Kconfig     |   3 +
 arch/s390/Kconfig          |   3 +
 arch/sparc/kernel/Makefile |   4 +-
 arch/um/Kconfig            |   3 +
 arch/xtensa/Kconfig        |   3 +
 include/linux/io.h         |   6 -
 include/linux/pci.h        |   5 +
 kernel/irq/Makefile        |   2 +-
 kernel/irq/devres.c        |  88 +++++++++++++
 kernel/irq/manage.c        |  86 -------------
 lib/Kconfig                |   9 +-
 lib/Makefile               |   6 +-
 lib/devres.c               | 300 +++++++++++++++++++++++++++++++++++++++++++++
 lib/iomap.c                | 296 --------------------------------------------
 21 files changed, 442 insertions(+), 397 deletions(-)
 create mode 100644 kernel/irq/devres.c
 create mode 100644 lib/devres.c

(limited to 'kernel')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fbf4b2a62b6..5c795193ebb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -29,6 +29,10 @@ config MMU
 	bool
 	default y
 
+config NO_IOPORT
+	bool
+	default n
+
 config EISA
 	bool
 	---help---
@@ -298,6 +302,7 @@ config ARCH_RPC
 	select TIMER_ACORN
 	select ARCH_MAY_HAVE_PC_FDC
 	select ISA_DMA_API
+	select NO_IOPORT
 	help
 	  On the Acorn Risc-PC, Linux can support the internal IDE disk and
 	  CD-ROM interface, serial and parallel port, and the floppy drive.
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index e3db1427dbe..4b41248b61a 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -44,6 +44,9 @@ config IRQ_PER_CPU
 	bool
 	default y
 
+config NO_IOPORT
+	def_bool y
+
 config CRIS
 	bool
 	default y
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 146eb28f622..1734d96422c 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -57,6 +57,9 @@ config TIME_LOW_RES
 	bool
 	default y
 
+config NO_IOPORT
+	def_bool y
+
 config ISA
 	bool
 	default y
diff --git a/arch/h8300/kernel/Makefile b/arch/h8300/kernel/Makefile
index 71b6131e98b..4edbc2ef6ca 100644
--- a/arch/h8300/kernel/Makefile
+++ b/arch/h8300/kernel/Makefile
@@ -6,6 +6,8 @@ extra-y := vmlinux.lds
 
 obj-y := process.o traps.o ptrace.o ints.o \
 	 sys_h8300.o time.o semaphore.o signal.o \
-         setup.o gpio.o init_task.o syscalls.o
+         setup.o gpio.o init_task.o syscalls.o devres.o
+
+devres-y = ../../../kernel/irq/devres.o
 
 obj-$(CONFIG_MODULES) += module.o h8300_ksyms.o 
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 565d0138078..9740d6b8ae1 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -28,6 +28,9 @@ config GENERIC_IRQ_PROBE
 	bool
 	default y
 
+config NO_IOPORT
+	def_bool y
+
 source "init/Kconfig"
 
 
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 0bffbe6e7e1..a8e1e604dfa 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -42,6 +42,9 @@ config ARCH_MAY_HAVE_PC_FDC
 	depends on Q40 || (BROKEN && SUN3X)
 	default y
 
+config NO_IOPORT
+	def_bool y
+
 mainmenu "Linux/68k Kernel Configuration"
 
 source "init/Kconfig"
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index 1c9ecaa473d..0b68ab8d63d 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -10,7 +10,9 @@ endif
 extra-y	+= vmlinux.lds
 
 obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o \
-	   sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o
+	   sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o devres.o
+
+devres-y = ../../../kernel/irq/devres.o
 
 obj-$(CONFIG_PCI)	+= bios32.o
 obj-$(CONFIG_MODULES)	+= module.o
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index c5fc5406dad..823f73736bb 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -53,6 +53,9 @@ config TIME_LOW_RES
 	bool
 	default y
 
+config NO_IOPORT
+	def_bool y
+
 source "init/Kconfig"
 
 menu "Processor type and features"
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c6497300426..0c83d26ef09 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -41,6 +41,9 @@ config GENERIC_HWEIGHT
 config GENERIC_TIME
 	def_bool y
 
+config NO_IOPORT
+	def_bool y
+
 mainmenu "Linux Kernel Configuration"
 
 config S390
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 6616ee05c31..e795f282dec 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -12,7 +12,9 @@ obj-y    := entry.o wof.o wuf.o etrap.o rtrap.o traps.o $(IRQ_OBJS) \
 	    sys_sparc.o sunos_asm.o systbls.o \
 	    time.o windows.o cpu.o devices.o sclow.o \
 	    tadpole.o tick14.o ptrace.o sys_solaris.o \
-	    unaligned.o muldiv.o semaphore.o prom.o of_device.o
+	    unaligned.o muldiv.o semaphore.o prom.o of_device.o devres.o
+
+devres-y = ../../../kernel/irq/devres.o
 
 obj-$(CONFIG_PCI) += pcic.o
 obj-$(CONFIG_SUN4) += sun4setup.o
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index d32a80e6668..b3a21ba77cd 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -16,6 +16,9 @@ config MMU
 	bool
 	default y
 
+config NO_IOMEM
+	def_bool y
+
 mainmenu "Linux/Usermode Kernel Configuration"
 
 config ISA
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 7c99d518e49..7fbb44bea37 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -46,6 +46,9 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default n
 
+config NO_IOPORT
+	def_bool y
+
 source "init/Kconfig"
 
 menu "Processor type and features"
diff --git a/include/linux/io.h b/include/linux/io.h
index 9e419ebfc98..c244a0cc931 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -43,12 +43,6 @@ void __iomem * devm_ioremap_nocache(struct device *dev, unsigned long offset,
 				    unsigned long size);
 void devm_iounmap(struct device *dev, void __iomem *addr);
 
-void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
-void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
-void __iomem * const * pcim_iomap_table(struct pci_dev *pdev);
-
-int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name);
-
 /**
  *	check_signature		-	find BIOS signatures
  *	@io_addr: mmio address to check
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e3042e7e1c..98c8765a488 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -840,6 +840,11 @@ enum pci_fixup_pass {
 
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
 
+void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
+void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
+void __iomem * const * pcim_iomap_table(struct pci_dev *pdev);
+int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name);
+
 extern int pci_pci_problems;
 #define PCIPCI_FAIL		1	/* No PCI PCI DMA */
 #define PCIPCI_TRITON		2
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 1dab0ac3f79..681c52dbfe2 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,5 @@
 
-obj-y := handle.o manage.o spurious.o resend.o chip.o
+obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
new file mode 100644
index 00000000000..85a430da0fb
--- /dev/null
+++ b/kernel/irq/devres.c
@@ -0,0 +1,88 @@
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+/*
+ * Device resource management aware IRQ request/free implementation.
+ */
+struct irq_devres {
+	unsigned int irq;
+	void *dev_id;
+};
+
+static void devm_irq_release(struct device *dev, void *res)
+{
+	struct irq_devres *this = res;
+
+	free_irq(this->irq, this->dev_id);
+}
+
+static int devm_irq_match(struct device *dev, void *res, void *data)
+{
+	struct irq_devres *this = res, *match = data;
+
+	return this->irq == match->irq && this->dev_id == match->dev_id;
+}
+
+/**
+ *	devm_request_irq - allocate an interrupt line for a managed device
+ *	@dev: device to request interrupt for
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs
+ *	@irqflags: Interrupt type flags
+ *	@devname: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	Except for the extra @dev argument, this function takes the
+ *	same arguments and performs the same function as
+ *	request_irq().  IRQs requested with this function will be
+ *	automatically freed on driver detach.
+ *
+ *	If an IRQ allocated with this function needs to be freed
+ *	separately, dev_free_irq() must be used.
+ */
+int devm_request_irq(struct device *dev, unsigned int irq,
+		     irq_handler_t handler, unsigned long irqflags,
+		     const char *devname, void *dev_id)
+{
+	struct irq_devres *dr;
+	int rc;
+
+	dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres),
+			  GFP_KERNEL);
+	if (!dr)
+		return -ENOMEM;
+
+	rc = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (rc) {
+		kfree(dr);
+		return rc;
+	}
+
+	dr->irq = irq;
+	dr->dev_id = dev_id;
+	devres_add(dev, dr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_request_irq);
+
+/**
+ *	devm_free_irq - free an interrupt
+ *	@dev: device to free interrupt for
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Except for the extra @dev argument, this function takes the
+ *	same arguments and performs the same function as free_irq().
+ *	This function instead of free_irq() should be used to manually
+ *	free IRQs allocated with dev_request_irq().
+ */
+void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
+{
+	struct irq_devres match_data = { irq, dev_id };
+
+	free_irq(irq, dev_id);
+	WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match,
+			       &match_data));
+}
+EXPORT_SYMBOL(devm_free_irq);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c4b7ed1cebf..8b961adc3bd 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -482,89 +482,3 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 	return retval;
 }
 EXPORT_SYMBOL(request_irq);
-
-/*
- * Device resource management aware IRQ request/free implementation.
- */
-struct irq_devres {
-	unsigned int irq;
-	void *dev_id;
-};
-
-static void devm_irq_release(struct device *dev, void *res)
-{
-	struct irq_devres *this = res;
-
-	free_irq(this->irq, this->dev_id);
-}
-
-static int devm_irq_match(struct device *dev, void *res, void *data)
-{
-	struct irq_devres *this = res, *match = data;
-
-	return this->irq == match->irq && this->dev_id == match->dev_id;
-}
-
-/**
- *	devm_request_irq - allocate an interrupt line for a managed device
- *	@dev: device to request interrupt for
- *	@irq: Interrupt line to allocate
- *	@handler: Function to be called when the IRQ occurs
- *	@irqflags: Interrupt type flags
- *	@devname: An ascii name for the claiming device
- *	@dev_id: A cookie passed back to the handler function
- *
- *	Except for the extra @dev argument, this function takes the
- *	same arguments and performs the same function as
- *	request_irq().  IRQs requested with this function will be
- *	automatically freed on driver detach.
- *
- *	If an IRQ allocated with this function needs to be freed
- *	separately, dev_free_irq() must be used.
- */
-int devm_request_irq(struct device *dev, unsigned int irq,
-		     irq_handler_t handler, unsigned long irqflags,
-		     const char *devname, void *dev_id)
-{
-	struct irq_devres *dr;
-	int rc;
-
-	dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres),
-			  GFP_KERNEL);
-	if (!dr)
-		return -ENOMEM;
-
-	rc = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (rc) {
-		kfree(dr);
-		return rc;
-	}
-
-	dr->irq = irq;
-	dr->dev_id = dev_id;
-	devres_add(dev, dr);
-
-	return 0;
-}
-EXPORT_SYMBOL(devm_request_irq);
-
-/**
- *	devm_free_irq - free an interrupt
- *	@dev: device to free interrupt for
- *	@irq: Interrupt line to free
- *	@dev_id: Device identity to free
- *
- *	Except for the extra @dev argument, this function takes the
- *	same arguments and performs the same function as free_irq().
- *	This function instead of free_irq() should be used to manually
- *	free IRQs allocated with dev_request_irq().
- */
-void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
-{
-	struct irq_devres match_data = { irq, dev_id };
-
-	free_irq(irq, dev_id);
-	WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match,
-			       &match_data));
-}
-EXPORT_SYMBOL(devm_free_irq);
diff --git a/lib/Kconfig b/lib/Kconfig
index 9b03581cdec..38424991504 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -101,9 +101,14 @@ config TEXTSEARCH_FSM
 config PLIST
 	boolean
 
-config IOMAP_COPY
+config HAS_IOMEM
 	boolean
-	depends on !UML
+	depends on !NO_IOMEM
+	default y
+
+config HAS_IOPORT
+	boolean
+	depends on HAS_IOMEM && !NO_IOPORT
 	default y
 
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index b819e37440d..992a39ef9ff 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,15 +12,15 @@ lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o kref.o kobject_uevent.o klist.o
 
-obj-y += sort.o parser.o halfmd4.o debug_locks.o random32.o iomap.o \
-	bust_spinlocks.o
+obj-y += sort.o parser.o halfmd4.o debug_locks.o random32.o bust_spinlocks.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
 CFLAGS_kobject_uevent.o += -DDEBUG
 endif
 
-obj-$(CONFIG_IOMAP_COPY) += iomap_copy.o
+obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
diff --git a/lib/devres.c b/lib/devres.c
new file mode 100644
index 00000000000..2a668dd7cac
--- /dev/null
+++ b/lib/devres.c
@@ -0,0 +1,300 @@
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/module.h>
+
+static void devm_ioremap_release(struct device *dev, void *res)
+{
+	iounmap(*(void __iomem **)res);
+}
+
+static int devm_ioremap_match(struct device *dev, void *res, void *match_data)
+{
+	return *(void **)res == match_data;
+}
+
+/**
+ * devm_ioremap - Managed ioremap()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ *
+ * Managed ioremap().  Map is automatically unmapped on driver detach.
+ */
+void __iomem *devm_ioremap(struct device *dev, unsigned long offset,
+			   unsigned long size)
+{
+	void __iomem **ptr, *addr;
+
+	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = ioremap(offset, size);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_ioremap);
+
+/**
+ * devm_ioremap_nocache - Managed ioremap_nocache()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ *
+ * Managed ioremap_nocache().  Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem *devm_ioremap_nocache(struct device *dev, unsigned long offset,
+				   unsigned long size)
+{
+	void __iomem **ptr, *addr;
+
+	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = ioremap_nocache(offset, size);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_ioremap_nocache);
+
+/**
+ * devm_iounmap - Managed iounmap()
+ * @dev: Generic device to unmap for
+ * @addr: Address to unmap
+ *
+ * Managed iounmap().  @addr must have been mapped using devm_ioremap*().
+ */
+void devm_iounmap(struct device *dev, void __iomem *addr)
+{
+	iounmap(addr);
+	WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match,
+			       (void *)addr));
+}
+EXPORT_SYMBOL(devm_iounmap);
+
+#ifdef CONFIG_HAS_IOPORT
+/*
+ * Generic iomap devres
+ */
+static void devm_ioport_map_release(struct device *dev, void *res)
+{
+	ioport_unmap(*(void __iomem **)res);
+}
+
+static int devm_ioport_map_match(struct device *dev, void *res,
+				 void *match_data)
+{
+	return *(void **)res == match_data;
+}
+
+/**
+ * devm_ioport_map - Managed ioport_map()
+ * @dev: Generic device to map ioport for
+ * @port: Port to map
+ * @nr: Number of ports to map
+ *
+ * Managed ioport_map().  Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
+			       unsigned int nr)
+{
+	void __iomem **ptr, *addr;
+
+	ptr = devres_alloc(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = ioport_map(port, nr);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_ioport_map);
+
+/**
+ * devm_ioport_unmap - Managed ioport_unmap()
+ * @dev: Generic device to unmap for
+ * @addr: Address to unmap
+ *
+ * Managed ioport_unmap().  @addr must have been mapped using
+ * devm_ioport_map().
+ */
+void devm_ioport_unmap(struct device *dev, void __iomem *addr)
+{
+	ioport_unmap(addr);
+	WARN_ON(devres_destroy(dev, devm_ioport_map_release,
+			       devm_ioport_map_match, (void *)addr));
+}
+EXPORT_SYMBOL(devm_ioport_unmap);
+
+#ifdef CONFIG_PCI
+/*
+ * PCI iomap devres
+ */
+#define PCIM_IOMAP_MAX	PCI_ROM_RESOURCE
+
+struct pcim_iomap_devres {
+	void __iomem *table[PCIM_IOMAP_MAX];
+};
+
+static void pcim_iomap_release(struct device *gendev, void *res)
+{
+	struct pci_dev *dev = container_of(gendev, struct pci_dev, dev);
+	struct pcim_iomap_devres *this = res;
+	int i;
+
+	for (i = 0; i < PCIM_IOMAP_MAX; i++)
+		if (this->table[i])
+			pci_iounmap(dev, this->table[i]);
+}
+
+/**
+ * pcim_iomap_table - access iomap allocation table
+ * @pdev: PCI device to access iomap table for
+ *
+ * Access iomap allocation table for @dev.  If iomap table doesn't
+ * exist and @pdev is managed, it will be allocated.  All iomaps
+ * recorded in the iomap table are automatically unmapped on driver
+ * detach.
+ *
+ * This function might sleep when the table is first allocated but can
+ * be safely called without context and guaranteed to succed once
+ * allocated.
+ */
+void __iomem * const * pcim_iomap_table(struct pci_dev *pdev)
+{
+	struct pcim_iomap_devres *dr, *new_dr;
+
+	dr = devres_find(&pdev->dev, pcim_iomap_release, NULL, NULL);
+	if (dr)
+		return dr->table;
+
+	new_dr = devres_alloc(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL);
+	if (!new_dr)
+		return NULL;
+	dr = devres_get(&pdev->dev, new_dr, NULL, NULL);
+	return dr->table;
+}
+EXPORT_SYMBOL(pcim_iomap_table);
+
+/**
+ * pcim_iomap - Managed pcim_iomap()
+ * @pdev: PCI device to iomap for
+ * @bar: BAR to iomap
+ * @maxlen: Maximum length of iomap
+ *
+ * Managed pci_iomap().  Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
+{
+	void __iomem **tbl;
+
+	BUG_ON(bar >= PCIM_IOMAP_MAX);
+
+	tbl = (void __iomem **)pcim_iomap_table(pdev);
+	if (!tbl || tbl[bar])	/* duplicate mappings not allowed */
+		return NULL;
+
+	tbl[bar] = pci_iomap(pdev, bar, maxlen);
+	return tbl[bar];
+}
+EXPORT_SYMBOL(pcim_iomap);
+
+/**
+ * pcim_iounmap - Managed pci_iounmap()
+ * @pdev: PCI device to iounmap for
+ * @addr: Address to unmap
+ *
+ * Managed pci_iounmap().  @addr must have been mapped using pcim_iomap().
+ */
+void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+	void __iomem **tbl;
+	int i;
+
+	pci_iounmap(pdev, addr);
+
+	tbl = (void __iomem **)pcim_iomap_table(pdev);
+	BUG_ON(!tbl);
+
+	for (i = 0; i < PCIM_IOMAP_MAX; i++)
+		if (tbl[i] == addr) {
+			tbl[i] = NULL;
+			return;
+		}
+	WARN_ON(1);
+}
+EXPORT_SYMBOL(pcim_iounmap);
+
+/**
+ * pcim_iomap_regions - Request and iomap PCI BARs
+ * @pdev: PCI device to map IO resources for
+ * @mask: Mask of BARs to request and iomap
+ * @name: Name used when requesting regions
+ *
+ * Request and iomap regions specified by @mask.
+ */
+int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name)
+{
+	void __iomem * const *iomap;
+	int i, rc;
+
+	iomap = pcim_iomap_table(pdev);
+	if (!iomap)
+		return -ENOMEM;
+
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+		unsigned long len;
+
+		if (!(mask & (1 << i)))
+			continue;
+
+		rc = -EINVAL;
+		len = pci_resource_len(pdev, i);
+		if (!len)
+			goto err_inval;
+
+		rc = pci_request_region(pdev, i, name);
+		if (rc)
+			goto err_region;
+
+		rc = -ENOMEM;
+		if (!pcim_iomap(pdev, i, 0))
+			goto err_iomap;
+	}
+
+	return 0;
+
+ err_iomap:
+	pcim_iounmap(pdev, iomap[i]);
+ err_region:
+	pci_release_region(pdev, i);
+ err_inval:
+	while (--i >= 0) {
+		pcim_iounmap(pdev, iomap[i]);
+		pci_release_region(pdev, i);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(pcim_iomap_regions);
+#endif
+#endif
diff --git a/lib/iomap.c b/lib/iomap.c
index 4990c736bc4..4d43f37c015 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -6,7 +6,6 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 
-#ifdef CONFIG_GENERIC_IOMAP
 #include <linux/module.h>
 
 /*
@@ -256,298 +255,3 @@ void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 }
 EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
-
-#endif /* CONFIG_GENERIC_IOMAP */
-
-/*
- * Generic iomap devres
- */
-static void devm_ioport_map_release(struct device *dev, void *res)
-{
-	ioport_unmap(*(void __iomem **)res);
-}
-
-static int devm_ioport_map_match(struct device *dev, void *res,
-				 void *match_data)
-{
-	return *(void **)res == match_data;
-}
-
-/**
- * devm_ioport_map - Managed ioport_map()
- * @dev: Generic device to map ioport for
- * @port: Port to map
- * @nr: Number of ports to map
- *
- * Managed ioport_map().  Map is automatically unmapped on driver
- * detach.
- */
-void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
-			       unsigned int nr)
-{
-	void __iomem **ptr, *addr;
-
-	ptr = devres_alloc(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return NULL;
-
-	addr = ioport_map(port, nr);
-	if (addr) {
-		*ptr = addr;
-		devres_add(dev, ptr);
-	} else
-		devres_free(ptr);
-
-	return addr;
-}
-EXPORT_SYMBOL(devm_ioport_map);
-
-/**
- * devm_ioport_unmap - Managed ioport_unmap()
- * @dev: Generic device to unmap for
- * @addr: Address to unmap
- *
- * Managed ioport_unmap().  @addr must have been mapped using
- * devm_ioport_map().
- */
-void devm_ioport_unmap(struct device *dev, void __iomem *addr)
-{
-	ioport_unmap(addr);
-	WARN_ON(devres_destroy(dev, devm_ioport_map_release,
-			       devm_ioport_map_match, (void *)addr));
-}
-EXPORT_SYMBOL(devm_ioport_unmap);
-
-static void devm_ioremap_release(struct device *dev, void *res)
-{
-	iounmap(*(void __iomem **)res);
-}
-
-static int devm_ioremap_match(struct device *dev, void *res, void *match_data)
-{
-	return *(void **)res == match_data;
-}
-
-/**
- * devm_ioremap - Managed ioremap()
- * @dev: Generic device to remap IO address for
- * @offset: BUS offset to map
- * @size: Size of map
- *
- * Managed ioremap().  Map is automatically unmapped on driver detach.
- */
-void __iomem *devm_ioremap(struct device *dev, unsigned long offset,
-			   unsigned long size)
-{
-	void __iomem **ptr, *addr;
-
-	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return NULL;
-
-	addr = ioremap(offset, size);
-	if (addr) {
-		*ptr = addr;
-		devres_add(dev, ptr);
-	} else
-		devres_free(ptr);
-
-	return addr;
-}
-EXPORT_SYMBOL(devm_ioremap);
-
-/**
- * devm_ioremap_nocache - Managed ioremap_nocache()
- * @dev: Generic device to remap IO address for
- * @offset: BUS offset to map
- * @size: Size of map
- *
- * Managed ioremap_nocache().  Map is automatically unmapped on driver
- * detach.
- */
-void __iomem *devm_ioremap_nocache(struct device *dev, unsigned long offset,
-				   unsigned long size)
-{
-	void __iomem **ptr, *addr;
-
-	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return NULL;
-
-	addr = ioremap_nocache(offset, size);
-	if (addr) {
-		*ptr = addr;
-		devres_add(dev, ptr);
-	} else
-		devres_free(ptr);
-
-	return addr;
-}
-EXPORT_SYMBOL(devm_ioremap_nocache);
-
-/**
- * devm_iounmap - Managed iounmap()
- * @dev: Generic device to unmap for
- * @addr: Address to unmap
- *
- * Managed iounmap().  @addr must have been mapped using devm_ioremap*().
- */
-void devm_iounmap(struct device *dev, void __iomem *addr)
-{
-	iounmap(addr);
-	WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match,
-			       (void *)addr));
-}
-EXPORT_SYMBOL(devm_iounmap);
-
-/*
- * PCI iomap devres
- */
-#define PCIM_IOMAP_MAX	PCI_ROM_RESOURCE
-
-struct pcim_iomap_devres {
-	void __iomem *table[PCIM_IOMAP_MAX];
-};
-
-static void pcim_iomap_release(struct device *gendev, void *res)
-{
-	struct pci_dev *dev = container_of(gendev, struct pci_dev, dev);
-	struct pcim_iomap_devres *this = res;
-	int i;
-
-	for (i = 0; i < PCIM_IOMAP_MAX; i++)
-		if (this->table[i])
-			pci_iounmap(dev, this->table[i]);
-}
-
-/**
- * pcim_iomap_table - access iomap allocation table
- * @pdev: PCI device to access iomap table for
- *
- * Access iomap allocation table for @dev.  If iomap table doesn't
- * exist and @pdev is managed, it will be allocated.  All iomaps
- * recorded in the iomap table are automatically unmapped on driver
- * detach.
- *
- * This function might sleep when the table is first allocated but can
- * be safely called without context and guaranteed to succed once
- * allocated.
- */
-void __iomem * const * pcim_iomap_table(struct pci_dev *pdev)
-{
-	struct pcim_iomap_devres *dr, *new_dr;
-
-	dr = devres_find(&pdev->dev, pcim_iomap_release, NULL, NULL);
-	if (dr)
-		return dr->table;
-
-	new_dr = devres_alloc(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL);
-	if (!new_dr)
-		return NULL;
-	dr = devres_get(&pdev->dev, new_dr, NULL, NULL);
-	return dr->table;
-}
-EXPORT_SYMBOL(pcim_iomap_table);
-
-/**
- * pcim_iomap - Managed pcim_iomap()
- * @pdev: PCI device to iomap for
- * @bar: BAR to iomap
- * @maxlen: Maximum length of iomap
- *
- * Managed pci_iomap().  Map is automatically unmapped on driver
- * detach.
- */
-void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
-{
-	void __iomem **tbl;
-
-	BUG_ON(bar >= PCIM_IOMAP_MAX);
-
-	tbl = (void __iomem **)pcim_iomap_table(pdev);
-	if (!tbl || tbl[bar])	/* duplicate mappings not allowed */
-		return NULL;
-
-	tbl[bar] = pci_iomap(pdev, bar, maxlen);
-	return tbl[bar];
-}
-EXPORT_SYMBOL(pcim_iomap);
-
-/**
- * pcim_iounmap - Managed pci_iounmap()
- * @pdev: PCI device to iounmap for
- * @addr: Address to unmap
- *
- * Managed pci_iounmap().  @addr must have been mapped using pcim_iomap().
- */
-void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr)
-{
-	void __iomem **tbl;
-	int i;
-
-	pci_iounmap(pdev, addr);
-
-	tbl = (void __iomem **)pcim_iomap_table(pdev);
-	BUG_ON(!tbl);
-
-	for (i = 0; i < PCIM_IOMAP_MAX; i++)
-		if (tbl[i] == addr) {
-			tbl[i] = NULL;
-			return;
-		}
-	WARN_ON(1);
-}
-EXPORT_SYMBOL(pcim_iounmap);
-
-/**
- * pcim_iomap_regions - Request and iomap PCI BARs
- * @pdev: PCI device to map IO resources for
- * @mask: Mask of BARs to request and iomap
- * @name: Name used when requesting regions
- *
- * Request and iomap regions specified by @mask.
- */
-int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name)
-{
-	void __iomem * const *iomap;
-	int i, rc;
-
-	iomap = pcim_iomap_table(pdev);
-	if (!iomap)
-		return -ENOMEM;
-
-	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-		unsigned long len;
-
-		if (!(mask & (1 << i)))
-			continue;
-
-		rc = -EINVAL;
-		len = pci_resource_len(pdev, i);
-		if (!len)
-			goto err_inval;
-
-		rc = pci_request_region(pdev, i, name);
-		if (rc)
-			goto err_region;
-
-		rc = -ENOMEM;
-		if (!pcim_iomap(pdev, i, 0))
-			goto err_iomap;
-	}
-
-	return 0;
-
- err_iomap:
-	pcim_iounmap(pdev, iomap[i]);
- err_region:
-	pci_release_region(pdev, i);
- err_inval:
-	while (--i >= 0) {
-		pcim_iounmap(pdev, iomap[i]);
-		pci_release_region(pdev, i);
-	}
-
-	return rc;
-}
-EXPORT_SYMBOL(pcim_iomap_regions);
-- 
cgit v1.2.3-70-g09d2


From a304e1b82808904c561b7b149b467e338c53fcce Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 12 Feb 2007 00:52:00 -0800
Subject: [PATCH] Debug shared irqs

Drivers registering IRQ handlers with SA_SHIRQ really ought to be able to
handle an interrupt happening before request_irq() returns.  They also
ought to be able to handle an interrupt happening during the start of their
call to free_irq().  Let's test that hypothesis....

[bunk@stusta.de: Kconfig fixes]
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/irq/manage.c | 33 +++++++++++++++++++++++++++++++++
 lib/Kconfig.debug   |  9 +++++++++
 2 files changed, 42 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 8b961adc3bd..400b12a6364 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -357,6 +357,7 @@ void free_irq(unsigned int irq, void *dev_id)
 	struct irq_desc *desc;
 	struct irqaction **p;
 	unsigned long flags;
+	irqreturn_t (*handler)(int, void *) = NULL;
 
 	WARN_ON(in_interrupt());
 	if (irq >= NR_IRQS)
@@ -396,6 +397,8 @@ void free_irq(unsigned int irq, void *dev_id)
 
 			/* Make sure it's not being used on another CPU */
 			synchronize_irq(irq);
+			if (action->flags & IRQF_SHARED)
+				handler = action->handler;
 			kfree(action);
 			return;
 		}
@@ -403,6 +406,17 @@ void free_irq(unsigned int irq, void *dev_id)
 		spin_unlock_irqrestore(&desc->lock, flags);
 		return;
 	}
+#ifdef CONFIG_DEBUG_SHIRQ
+	if (handler) {
+		/*
+		 * It's a shared IRQ -- the driver ought to be prepared for it
+		 * to happen even now it's being freed, so let's make sure....
+		 * We do this after actually deregistering it, to make sure that
+		 * a 'real' IRQ doesn't run in parallel with our fake
+		 */
+		handler(irq, dev_id);
+	}
+#endif
 }
 EXPORT_SYMBOL(free_irq);
 
@@ -475,6 +489,25 @@ int request_irq(unsigned int irq, irq_handler_t handler,
 
 	select_smp_affinity(irq);
 
+#ifdef CONFIG_DEBUG_SHIRQ
+	if (irqflags & IRQF_SHARED) {
+		/*
+		 * It's a shared IRQ -- the driver ought to be prepared for it
+		 * to happen immediately, so let's make sure....
+		 * We do this before actually registering it, to make sure that
+		 * a 'real' IRQ doesn't run in parallel with our fake
+		 */
+		if (irqflags & IRQF_DISABLED) {
+			unsigned long flags;
+
+			local_irq_save(flags);
+			handler(irq, dev_id);
+			local_irq_restore(flags);
+		} else
+			handler(irq, dev_id);
+	}
+#endif
+
 	retval = setup_irq(irq, action);
 	if (retval)
 		kfree(action);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 356a5ab8279..63f04c15e6f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -77,6 +77,15 @@ config DEBUG_KERNEL
 	  Say Y here if you are developing drivers or trying to debug and
 	  identify kernel problems.
 
+config DEBUG_SHIRQ
+	bool "Debug shared IRQ handlers"
+	depends on DEBUG_KERNEL && GENERIC_HARDIRQS
+	help
+	  Enable this to generate a spurious interrupt as soon as a shared
+	  interrupt handler is registered, and just before one is deregistered.
+	  Drivers ought to be able to handle interrupts coming in at those
+	  points; some don't and need to be caught.
+
 config LOG_BUF_SHIFT
 	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
 	range 12 21
-- 
cgit v1.2.3-70-g09d2


From 3f0504471536a2b6978b9a99ed1c222950fff07a Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 12 Feb 2007 00:52:04 -0800
Subject: [PATCH] kernel: shut up the IRQ mismatch messages

The problem is various drivers legally validly and sensibly try to claim
IRQs but the kernel insists on vomiting forth a giant irrelevant debugging
spew when the types clash.

Edit kernel/irq/manage.c go down to mismatch: in setup_irq() and ifdef out
the if clause that checks for mismatches.  It'll then just do the right
thing and work sanely.

For the current -mm kernel this will do the trick (and moves it into shared
irq debugging as in debug mode the info spew is useful).  I've had a
variant of this in my private tree for some time as I got fed up on the
mess on boxes where old legacy IRQs get reused.

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/irq/manage.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 400b12a6364..7c85d69188e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -328,12 +328,14 @@ int setup_irq(unsigned int irq, struct irqaction *new)
 	return 0;
 
 mismatch:
+#ifdef CONFIG_DEBUG_SHIRQ
 	if (!(new->flags & IRQF_PROBE_SHARED)) {
 		printk(KERN_ERR "IRQ handler type mismatch for IRQ %d\n", irq);
 		if (old_name)
 			printk(KERN_ERR "current handler: %s\n", old_name);
 		dump_stack();
 	}
+#endif
 	spin_unlock_irqrestore(&desc->lock, flags);
 	return -EBUSY;
 }
-- 
cgit v1.2.3-70-g09d2


From 944be0b224724fcbf63c3a3fe3a5478c325a6547 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 12 Feb 2007 00:52:26 -0800
Subject: [PATCH] close_files(): add scheduling point

close_files() can sometimes take long enough to trigger the soft lockup
detector.

Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/exit.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index bc71fdfcd8a..14f17033f56 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -430,8 +430,10 @@ static void close_files(struct files_struct * files)
 		while (set) {
 			if (set & 1) {
 				struct file * file = xchg(&fdt->fd[i], NULL);
-				if (file)
+				if (file) {
 					filp_close(file, files);
+					cond_resched();
+				}
 			}
 			i++;
 			set >>= 1;
-- 
cgit v1.2.3-70-g09d2


From 8d42db189ca99703f0f4f91c477cb54808c8eaaa Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 12 Feb 2007 00:52:55 -0800
Subject: [PATCH] signal: rewrite kill_something_info so it uses newer helpers

The goal is to remove users of the old signal helper functions so they can be
removed.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 228fdb5c01d..de66def7164 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1191,8 +1191,10 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
 
 static int kill_something_info(int sig, struct siginfo *info, int pid)
 {
+	int ret;
+	rcu_read_lock();
 	if (!pid) {
-		return kill_pg_info(sig, info, process_group(current));
+		ret = kill_pgrp_info(sig, info, task_pgrp(current));
 	} else if (pid == -1) {
 		int retval = 0, count = 0;
 		struct task_struct * p;
@@ -1207,12 +1209,14 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
 			}
 		}
 		read_unlock(&tasklist_lock);
-		return count ? retval : -ESRCH;
+		ret = count ? retval : -ESRCH;
 	} else if (pid < 0) {
-		return kill_pg_info(sig, info, -pid);
+		ret = kill_pgrp_info(sig, info, find_pid(-pid));
 	} else {
-		return kill_proc_info(sig, info, pid);
+		ret = kill_pid_info(sig, info, find_pid(pid));
 	}
+	rcu_read_unlock();
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 04a2e6a5cbf84e85fe86de0a18f6509b147e1d89 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 12 Feb 2007 00:52:56 -0800
Subject: [PATCH] pid: make session_of_pgrp use struct pid instead of pid_t

To properly implement a pid namespace I need to deal exclusively in terms of
struct pid, because pid_t values become ambiguous.

To this end session_of_pgrp is transformed to take and return a struct pid
pointer.  To avoid the need to worry about reference counting I now require my
caller to hold the appropriate locks.  Leaving callers repsonsible for
increasing the reference count if they need access to the result outside of
the locks.

Since session_of_pgrp currently only has one caller and that caller simply
uses only test the result for equality with another process group, the locking
change means I don't actually have to acquire the tasklist_lock at all.

tiocspgrp is also modified to take and release the lock.  The logic there is a
little more complicated but nothing I won't need when I convert pgrp of a tty
to a struct pid pointer.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c  | 24 +++++++++++++++++-------
 include/linux/kernel.h |  3 ++-
 kernel/exit.c          | 16 +++++++---------
 3 files changed, 26 insertions(+), 17 deletions(-)

(limited to 'kernel')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index abfe24d28c5..95f3596189c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2990,7 +2990,8 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 
 static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
 {
-	pid_t pgrp;
+	struct pid *pgrp;
+	pid_t pgrp_nr;
 	int retval = tty_check_change(real_tty);
 
 	if (retval == -EIO)
@@ -3001,14 +3002,23 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 	    (current->signal->tty != real_tty) ||
 	    (real_tty->session != process_session(current)))
 		return -ENOTTY;
-	if (get_user(pgrp, p))
+	if (get_user(pgrp_nr, p))
 		return -EFAULT;
-	if (pgrp < 0)
+	if (pgrp_nr < 0)
 		return -EINVAL;
-	if (session_of_pgrp(pgrp) != process_session(current))
-		return -EPERM;
-	real_tty->pgrp = pgrp;
-	return 0;
+	rcu_read_lock();
+	pgrp = find_pid(pgrp_nr);
+	retval = -ESRCH;
+	if (!pgrp)
+		goto out_unlock;
+	retval = -EPERM;
+	if (session_of_pgrp(pgrp) != task_session(current))
+		goto out_unlock;
+	retval = 0;
+	real_tty->pgrp = pgrp_nr;
+out_unlock:
+	rcu_read_unlock();
+	return retval;
 }
 
 /**
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3531764318f..9ddf25c2153 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -134,7 +134,8 @@ extern unsigned long long memparse(char *ptr, char **retptr);
 extern int core_kernel_text(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
-extern int session_of_pgrp(int pgrp);
+struct pid;
+extern struct pid *session_of_pgrp(struct pid *pgrp);
 
 extern void dump_thread(struct pt_regs *regs, struct user *dump);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 14f17033f56..3ac6a7a6f85 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -185,21 +185,19 @@ repeat:
  * This checks not only the pgrp, but falls back on the pid if no
  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
  * without this...
+ *
+ * The caller must hold rcu lock or the tasklist lock.
  */
-int session_of_pgrp(int pgrp)
+struct pid *session_of_pgrp(struct pid *pgrp)
 {
 	struct task_struct *p;
-	int sid = 0;
-
-	read_lock(&tasklist_lock);
+	struct pid *sid = NULL;
 
-	p = find_task_by_pid_type(PIDTYPE_PGID, pgrp);
+	p = pid_task(pgrp, PIDTYPE_PGID);
 	if (p == NULL)
-		p = find_task_by_pid(pgrp);
+		p = pid_task(pgrp, PIDTYPE_PID);
 	if (p != NULL)
-		sid = process_session(p);
-
-	read_unlock(&tasklist_lock);
+		sid = task_session(p);
 
 	return sid;
 }
-- 
cgit v1.2.3-70-g09d2


From 0475ac0845f9295bc5f69af45f58dff2c104c8d1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 12 Feb 2007 00:52:57 -0800
Subject: [PATCH] pid: use struct pid for talking about process groups in exitc

Modify has_stopped_jobs and will_become_orphan_pgrp to use struct pid based
process groups.  This reduces the number of hash tables looks ups and paves
the way for multiple pid spaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/exit.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 3ac6a7a6f85..407b80aaefd 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -210,22 +210,22 @@ struct pid *session_of_pgrp(struct pid *pgrp)
  *
  * "I ask you, have you ever known what it is to be an orphan?"
  */
-static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
+static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
 {
 	struct task_struct *p;
 	int ret = 1;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->exit_state
 				|| is_init(p->real_parent))
 			continue;
-		if (process_group(p->real_parent) != pgrp &&
-		    process_session(p->real_parent) == process_session(p)) {
+		if (task_pgrp(p->real_parent) != pgrp &&
+		    task_session(p->real_parent) == task_session(p)) {
 			ret = 0;
 			break;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 	return ret;	/* (sighing) "Often!" */
 }
 
@@ -234,23 +234,23 @@ int is_orphaned_pgrp(int pgrp)
 	int retval;
 
 	read_lock(&tasklist_lock);
-	retval = will_become_orphaned_pgrp(pgrp, NULL);
+	retval = will_become_orphaned_pgrp(find_pid(pgrp), NULL);
 	read_unlock(&tasklist_lock);
 
 	return retval;
 }
 
-static int has_stopped_jobs(int pgrp)
+static int has_stopped_jobs(struct pid *pgrp)
 {
 	int retval = 0;
 	struct task_struct *p;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 		if (p->state != TASK_STOPPED)
 			continue;
 		retval = 1;
 		break;
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 	return retval;
 }
 
@@ -648,14 +648,14 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 	 * than we are, and it was the only connection
 	 * outside, so the child pgrp is now orphaned.
 	 */
-	if ((process_group(p) != process_group(father)) &&
-	    (process_session(p) == process_session(father))) {
-		int pgrp = process_group(p);
+	if ((task_pgrp(p) != task_pgrp(father)) &&
+	    (task_session(p) == task_session(father))) {
+		struct pid *pgrp = task_pgrp(p);
 
 		if (will_become_orphaned_pgrp(pgrp, NULL) &&
 		    has_stopped_jobs(pgrp)) {
-			__kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp);
-			__kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp);
+			__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
+			__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 		}
 	}
 }
@@ -735,6 +735,7 @@ static void exit_notify(struct task_struct *tsk)
 	int state;
 	struct task_struct *t;
 	struct list_head ptrace_dead, *_p, *_n;
+	struct pid *pgrp;
 
 	if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
 	    && !thread_group_empty(tsk)) {
@@ -787,12 +788,13 @@ static void exit_notify(struct task_struct *tsk)
 	 
 	t = tsk->real_parent;
 	
-	if ((process_group(t) != process_group(tsk)) &&
-	    (process_session(t) == process_session(tsk)) &&
-	    will_become_orphaned_pgrp(process_group(tsk), tsk) &&
-	    has_stopped_jobs(process_group(tsk))) {
-		__kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk));
-		__kill_pg_info(SIGCONT, SEND_SIG_PRIV, process_group(tsk));
+	pgrp = task_pgrp(tsk);
+	if ((task_pgrp(t) != pgrp) &&
+	    (task_session(t) != task_session(tsk)) &&
+	    will_become_orphaned_pgrp(pgrp, tsk) &&
+	    has_stopped_jobs(pgrp)) {
+		__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
+		__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 	}
 
 	/* Let father know we died 
-- 
cgit v1.2.3-70-g09d2


From 3e7cd6c413c9e6fbb5e1ee2acdadb4ababd2d474 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 12 Feb 2007 00:52:58 -0800
Subject: [PATCH] pid: replace is_orphaned_pgrp with is_current_pgrp_orphaned

Every call to is_orphaned_pgrp passed in process_group(current) which is racy
with respect to another thread changing our process group.  It didn't bite us
because we were dealing with integers and the worse we would get would be a
stale answer.

In switching the checks to use struct pid to be a little more efficient and
prepare the way for pid namespaces this race became apparent.

So I simplified the calls to the more specialized is_current_pgrp_orphaned so
I didn't have to worry about making logic changes to avoid the race.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/n_tty.c  | 2 +-
 drivers/char/tty_io.c | 2 +-
 include/linux/tty.h   | 2 +-
 kernel/exit.c         | 4 ++--
 kernel/signal.c       | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 2bdb0144a22..c035c2f1f46 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -1188,7 +1188,7 @@ static int job_control(struct tty_struct *tty, struct file *file)
 			printk("read_chan: tty->pgrp <= 0!\n");
 		else if (process_group(current) != tty->pgrp) {
 			if (is_ignored(SIGTTIN) ||
-			    is_orphaned_pgrp(process_group(current)))
+			    is_current_pgrp_orphaned())
 				return -EIO;
 			kill_pg(process_group(current), SIGTTIN, 1);
 			return -ERESTARTSYS;
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 95f3596189c..94070f7bf38 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1118,7 +1118,7 @@ int tty_check_change(struct tty_struct * tty)
 		return 0;
 	if (is_ignored(SIGTTOU))
 		return 0;
-	if (is_orphaned_pgrp(process_group(current)))
+	if (is_current_pgrp_orphaned())
 		return -EIO;
 	(void) kill_pg(process_group(current), SIGTTOU, 1);
 	return -ERESTARTSYS;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 0a10a4e7bbc..d0e03c4a71b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -303,7 +303,7 @@ extern int tty_read_raw_data(struct tty_struct *tty, unsigned char *bufp,
 			     int buflen);
 extern void tty_write_message(struct tty_struct *tty, char *msg);
 
-extern int is_orphaned_pgrp(int pgrp);
+extern int is_current_pgrp_orphaned(void);
 extern int is_ignored(int sig);
 extern int tty_signal(int sig, struct tty_struct *tty);
 extern void tty_hangup(struct tty_struct * tty);
diff --git a/kernel/exit.c b/kernel/exit.c
index 407b80aaefd..f132349c032 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -229,12 +229,12 @@ static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignor
 	return ret;	/* (sighing) "Often!" */
 }
 
-int is_orphaned_pgrp(int pgrp)
+int is_current_pgrp_orphaned(void)
 {
 	int retval;
 
 	read_lock(&tasklist_lock);
-	retval = will_become_orphaned_pgrp(find_pid(pgrp), NULL);
+	retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
 	read_unlock(&tasklist_lock);
 
 	return retval;
diff --git a/kernel/signal.c b/kernel/signal.c
index de66def7164..a9b679ed795 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1909,7 +1909,7 @@ relock:
 
 				/* signals can be posted during this window */
 
-				if (is_orphaned_pgrp(process_group(current)))
+				if (is_current_pgrp_orphaned())
 					goto relock;
 
 				spin_lock_irq(&current->sighand->siglock);
-- 
cgit v1.2.3-70-g09d2


From ab521dc0f8e117fd808d3e425216864d60390500 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 12 Feb 2007 00:53:00 -0800
Subject: [PATCH] tty: update the tty layer to work with struct pid

Of kernel subsystems that work with pids the tty layer is probably the largest
consumer.  But it has the nice virtue that the assiation with a session only
lasts until the session leader exits.  Which means that no reference counting
is required.  So using struct pid winds up being a simple optimization to
avoid hash table lookups.

In the long term the use of pid_nr also ensures that when we have multiple pid
spaces mixed everything will work correctly.

Signed-off-by: Eric W. Biederman <eric@maxwell.lnxi.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/drivers/line.c     |   2 +-
 drivers/char/ip2/ip2main.c |   4 +-
 drivers/char/n_tty.c       |  12 ++---
 drivers/char/tty_io.c      | 130 +++++++++++++++++++++++++++------------------
 drivers/char/vt.c          |   4 +-
 fs/proc/array.c            |   2 +-
 include/linux/init_task.h  |   2 +-
 include/linux/sched.h      |   2 +-
 include/linux/tty.h        |   4 +-
 kernel/fork.c              |   2 +-
 kernel/sys.c               |   1 -
 11 files changed, 96 insertions(+), 69 deletions(-)

(limited to 'kernel')

diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 0e1e9a20a4d..01d4ab6b0ef 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -774,7 +774,7 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 		line = tty->driver_data;
 		chan_window_size(&line->chan_list, &tty->winsize.ws_row,
 				 &tty->winsize.ws_col);
-		kill_pg(tty->pgrp, SIGWINCH, 1);
+		kill_pgrp(tty->pgrp, SIGWINCH, 1);
 	}
  out:
 	if(winch->fd != -1)
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index 7c70310a49b..83c7258d358 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -1271,8 +1271,8 @@ static void do_input(struct work_struct *work)
 // code duplicated from n_tty (ldisc)
 static inline void  isig(int sig, struct tty_struct *tty, int flush)
 {
-	if (tty->pgrp > 0)
-		kill_pg(tty->pgrp, sig, 1);
+	if (tty->pgrp)
+		kill_pgrp(tty->pgrp, sig, 1);
 	if (flush || !L_NOFLSH(tty)) {
 		if ( tty->ldisc.flush_buffer )  
 			tty->ldisc.flush_buffer(tty);
diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index c035c2f1f46..6ac3ca4c723 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -579,8 +579,8 @@ static void eraser(unsigned char c, struct tty_struct *tty)
  
 static inline void isig(int sig, struct tty_struct *tty, int flush)
 {
-	if (tty->pgrp > 0)
-		kill_pg(tty->pgrp, sig, 1);
+	if (tty->pgrp)
+		kill_pgrp(tty->pgrp, sig, 1);
 	if (flush || !L_NOFLSH(tty)) {
 		n_tty_flush_buffer(tty);
 		if (tty->driver->flush_buffer)
@@ -1184,13 +1184,13 @@ static int job_control(struct tty_struct *tty, struct file *file)
 	/* don't stop on /dev/console */
 	if (file->f_op->write != redirected_tty_write &&
 	    current->signal->tty == tty) {
-		if (tty->pgrp <= 0)
-			printk("read_chan: tty->pgrp <= 0!\n");
-		else if (process_group(current) != tty->pgrp) {
+		if (!tty->pgrp)
+			printk("read_chan: no tty->pgrp!\n");
+		else if (task_pgrp(current) != tty->pgrp) {
 			if (is_ignored(SIGTTIN) ||
 			    is_current_pgrp_orphaned())
 				return -EIO;
-			kill_pg(process_group(current), SIGTTIN, 1);
+			kill_pgrp(task_pgrp(current), SIGTTIN, 1);
 			return -ERESTARTSYS;
 		}
 	}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 94070f7bf38..65672c57470 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -155,7 +155,8 @@ int tty_ioctl(struct inode * inode, struct file * file,
 	      unsigned int cmd, unsigned long arg);
 static int tty_fasync(int fd, struct file * filp, int on);
 static void release_tty(struct tty_struct *tty, int idx);
-static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+static struct pid *__proc_set_tty(struct task_struct *tsk,
+				struct tty_struct *tty);
 
 /**
  *	alloc_tty_struct	-	allocate a tty object
@@ -1110,17 +1111,17 @@ int tty_check_change(struct tty_struct * tty)
 {
 	if (current->signal->tty != tty)
 		return 0;
-	if (tty->pgrp <= 0) {
-		printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n");
+	if (!tty->pgrp) {
+		printk(KERN_WARNING "tty_check_change: tty->pgrp == NULL!\n");
 		return 0;
 	}
-	if (process_group(current) == tty->pgrp)
+	if (task_pgrp(current) == tty->pgrp)
 		return 0;
 	if (is_ignored(SIGTTOU))
 		return 0;
 	if (is_current_pgrp_orphaned())
 		return -EIO;
-	(void) kill_pg(process_group(current), SIGTTOU, 1);
+	(void) kill_pgrp(task_pgrp(current), SIGTTOU, 1);
 	return -ERESTARTSYS;
 }
 
@@ -1355,8 +1356,8 @@ static void do_tty_hangup(struct work_struct *work)
 	  tty_release is called */
 	
 	read_lock(&tasklist_lock);
-	if (tty->session > 0) {
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+	if (tty->session) {
+		do_each_pid_task(tty->session, PIDTYPE_SID, p) {
 			spin_lock_irq(&p->sighand->siglock);
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
@@ -1366,16 +1367,17 @@ static void do_tty_hangup(struct work_struct *work)
 			}
 			__group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
 			__group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
-			if (tty->pgrp > 0)
-				p->signal->tty_old_pgrp = tty->pgrp;
+			put_pid(p->signal->tty_old_pgrp);  /* A noop */
+			if (tty->pgrp)
+				p->signal->tty_old_pgrp = get_pid(tty->pgrp);
 			spin_unlock_irq(&p->sighand->siglock);
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		} while_each_pid_task(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
 
 	tty->flags = 0;
-	tty->session = 0;
-	tty->pgrp = -1;
+	tty->session = NULL;
+	tty->pgrp = NULL;
 	tty->ctrl_status = 0;
 	/*
 	 *	If one of the devices matches a console pointer, we
@@ -1460,12 +1462,12 @@ int tty_hung_up_p(struct file * filp)
 
 EXPORT_SYMBOL(tty_hung_up_p);
 
-static void session_clear_tty(pid_t session)
+static void session_clear_tty(struct pid *session)
 {
 	struct task_struct *p;
-	do_each_task_pid(session, PIDTYPE_SID, p) {
+	do_each_pid_task(session, PIDTYPE_SID, p) {
 		proc_clear_tty(p);
-	} while_each_task_pid(session, PIDTYPE_SID, p);
+	} while_each_pid_task(session, PIDTYPE_SID, p);
 }
 
 /**
@@ -1495,48 +1497,54 @@ static void session_clear_tty(pid_t session)
 void disassociate_ctty(int on_exit)
 {
 	struct tty_struct *tty;
-	int tty_pgrp = -1;
+	struct pid *tty_pgrp = NULL;
 
 	lock_kernel();
 
 	mutex_lock(&tty_mutex);
 	tty = get_current_tty();
 	if (tty) {
-		tty_pgrp = tty->pgrp;
+		tty_pgrp = get_pid(tty->pgrp);
 		mutex_unlock(&tty_mutex);
 		/* XXX: here we race, there is nothing protecting tty */
 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
 			tty_vhangup(tty);
 	} else if (on_exit) {
-		pid_t old_pgrp;
+		struct pid *old_pgrp;
 		spin_lock_irq(&current->sighand->siglock);
 		old_pgrp = current->signal->tty_old_pgrp;
-		current->signal->tty_old_pgrp = 0;
+		current->signal->tty_old_pgrp = NULL;
 		spin_unlock_irq(&current->sighand->siglock);
 		if (old_pgrp) {
-			kill_pg(old_pgrp, SIGHUP, on_exit);
-			kill_pg(old_pgrp, SIGCONT, on_exit);
+			kill_pgrp(old_pgrp, SIGHUP, on_exit);
+			kill_pgrp(old_pgrp, SIGCONT, on_exit);
+			put_pid(old_pgrp);
 		}
 		mutex_unlock(&tty_mutex);
 		unlock_kernel();	
 		return;
 	}
-	if (tty_pgrp > 0) {
-		kill_pg(tty_pgrp, SIGHUP, on_exit);
+	if (tty_pgrp) {
+		kill_pgrp(tty_pgrp, SIGHUP, on_exit);
 		if (!on_exit)
-			kill_pg(tty_pgrp, SIGCONT, on_exit);
+			kill_pgrp(tty_pgrp, SIGCONT, on_exit);
+		put_pid(tty_pgrp);
 	}
 
 	spin_lock_irq(&current->sighand->siglock);
+	tty_pgrp = current->signal->tty_old_pgrp;
 	current->signal->tty_old_pgrp = 0;
 	spin_unlock_irq(&current->sighand->siglock);
+	put_pid(tty_pgrp);
 
 	mutex_lock(&tty_mutex);
 	/* It is possible that do_tty_hangup has free'd this tty */
 	tty = get_current_tty();
 	if (tty) {
-		tty->session = 0;
-		tty->pgrp = 0;
+		put_pid(tty->session);
+		put_pid(tty->pgrp);
+		tty->session = NULL;
+		tty->pgrp = NULL;
 	} else {
 #ifdef TTY_DEBUG_HANGUP
 		printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
@@ -1547,7 +1555,7 @@ void disassociate_ctty(int on_exit)
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
-	session_clear_tty(process_session(current));
+	session_clear_tty(task_session(current));
 	read_unlock(&tasklist_lock);
 	unlock_kernel();
 }
@@ -2484,6 +2492,7 @@ static int tty_open(struct inode * inode, struct file * filp)
 	int index;
 	dev_t device = inode->i_rdev;
 	unsigned short saved_flags = filp->f_flags;
+	struct pid *old_pgrp;
 
 	nonseekable_open(inode, filp);
 	
@@ -2577,15 +2586,17 @@ got_driver:
 		goto retry_open;
 	}
 
+	old_pgrp = NULL;
 	mutex_lock(&tty_mutex);
 	spin_lock_irq(&current->sighand->siglock);
 	if (!noctty &&
 	    current->signal->leader &&
 	    !current->signal->tty &&
-	    tty->session == 0)
-		__proc_set_tty(current, tty);
+	    tty->session == NULL)
+		old_pgrp = __proc_set_tty(current, tty);
 	spin_unlock_irq(&current->sighand->siglock);
 	mutex_unlock(&tty_mutex);
+	put_pid(old_pgrp);
 	return 0;
 }
 
@@ -2724,9 +2735,18 @@ static int tty_fasync(int fd, struct file * filp, int on)
 		return retval;
 
 	if (on) {
+		enum pid_type type;
+		struct pid *pid;
 		if (!waitqueue_active(&tty->read_wait))
 			tty->minimum_to_wake = 1;
-		retval = f_setown(filp, (-tty->pgrp) ? : current->pid, 0);
+		if (tty->pgrp) {
+			pid = tty->pgrp;
+			type = PIDTYPE_PGID;
+		} else {
+			pid = task_pid(current);
+			type = PIDTYPE_PID;
+		}
+		retval = __f_setown(filp, pid, type, 0);
 		if (retval)
 			return retval;
 	} else {
@@ -2828,10 +2848,10 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
 		}
 	}
 #endif
-	if (tty->pgrp > 0)
-		kill_pg(tty->pgrp, SIGWINCH, 1);
-	if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0))
-		kill_pg(real_tty->pgrp, SIGWINCH, 1);
+	if (tty->pgrp)
+		kill_pgrp(tty->pgrp, SIGWINCH, 1);
+	if ((real_tty->pgrp != tty->pgrp) && real_tty->pgrp)
+		kill_pgrp(real_tty->pgrp, SIGWINCH, 1);
 	tty->winsize = tmp_ws;
 	real_tty->winsize = tmp_ws;
 done:
@@ -2916,8 +2936,7 @@ static int fionbio(struct file *file, int __user *p)
 static int tiocsctty(struct tty_struct *tty, int arg)
 {
 	int ret = 0;
-	if (current->signal->leader &&
-			(process_session(current) == tty->session))
+	if (current->signal->leader && (task_session(current) == tty->session))
 		return ret;
 
 	mutex_lock(&tty_mutex);
@@ -2930,7 +2949,7 @@ static int tiocsctty(struct tty_struct *tty, int arg)
 		goto unlock;
 	}
 
-	if (tty->session > 0) {
+	if (tty->session) {
 		/*
 		 * This tty is already the controlling
 		 * tty for another session group!
@@ -2973,7 +2992,7 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 	 */
 	if (tty == real_tty && current->signal->tty != real_tty)
 		return -ENOTTY;
-	return put_user(real_tty->pgrp, p);
+	return put_user(pid_nr(real_tty->pgrp), p);
 }
 
 /**
@@ -3000,7 +3019,7 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 		return retval;
 	if (!current->signal->tty ||
 	    (current->signal->tty != real_tty) ||
-	    (real_tty->session != process_session(current)))
+	    (real_tty->session != task_session(current)))
 		return -ENOTTY;
 	if (get_user(pgrp_nr, p))
 		return -EFAULT;
@@ -3015,7 +3034,8 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 	if (session_of_pgrp(pgrp) != task_session(current))
 		goto out_unlock;
 	retval = 0;
-	real_tty->pgrp = pgrp_nr;
+	put_pid(real_tty->pgrp);
+	real_tty->pgrp = get_pid(pgrp);
 out_unlock:
 	rcu_read_unlock();
 	return retval;
@@ -3041,9 +3061,9 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _
 	*/
 	if (tty == real_tty && current->signal->tty != real_tty)
 		return -ENOTTY;
-	if (real_tty->session <= 0)
+	if (!real_tty->session)
 		return -ENOTTY;
-	return put_user(real_tty->session, p);
+	return put_user(pid_nr(real_tty->session), p);
 }
 
 /**
@@ -3343,7 +3363,7 @@ void __do_SAK(struct tty_struct *tty)
 	tty_hangup(tty);
 #else
 	struct task_struct *g, *p;
-	int session;
+	struct pid *session;
 	int		i;
 	struct file	*filp;
 	struct fdtable *fdt;
@@ -3359,12 +3379,12 @@ void __do_SAK(struct tty_struct *tty)
 	
 	read_lock(&tasklist_lock);
 	/* Kill the entire session */
-	do_each_task_pid(session, PIDTYPE_SID, p) {
+	do_each_pid_task(session, PIDTYPE_SID, p) {
 		printk(KERN_NOTICE "SAK: killed process %d"
 			" (%s): process_session(p)==tty->session\n",
 			p->pid, p->comm);
 		send_sig(SIGKILL, p, 1);
-	} while_each_task_pid(session, PIDTYPE_SID, p);
+	} while_each_pid_task(session, PIDTYPE_SID, p);
 	/* Now kill any processes that happen to have the
 	 * tty open.
 	 */
@@ -3533,7 +3553,8 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	memset(tty, 0, sizeof(struct tty_struct));
 	tty->magic = TTY_MAGIC;
 	tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
-	tty->pgrp = -1;
+	tty->session = NULL;
+	tty->pgrp = NULL;
 	tty->overrun_time = jiffies;
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
@@ -3804,21 +3825,28 @@ void proc_clear_tty(struct task_struct *p)
 }
 EXPORT_SYMBOL(proc_clear_tty);
 
-static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+static struct pid *__proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
 {
+	struct pid *old_pgrp;
 	if (tty) {
-		tty->session = process_session(tsk);
-		tty->pgrp = process_group(tsk);
+		tty->session = get_pid(task_session(tsk));
+		tty->pgrp = get_pid(task_pgrp(tsk));
 	}
+	old_pgrp = tsk->signal->tty_old_pgrp;
 	tsk->signal->tty = tty;
-	tsk->signal->tty_old_pgrp = 0;
+	tsk->signal->tty_old_pgrp = NULL;
+	return old_pgrp;
 }
 
 void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
 {
+	struct pid *old_pgrp;
+
 	spin_lock_irq(&tsk->sighand->siglock);
-	__proc_set_tty(tsk, tty);
+	old_pgrp = __proc_set_tty(tsk, tty);
 	spin_unlock_irq(&tsk->sighand->siglock);
+
+	put_pid(old_pgrp);
 }
 
 struct tty_struct *get_current_tty(void)
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index d6694163b6f..94ce3e7fc9e 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -866,8 +866,8 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines)
 		ws.ws_col = vc->vc_cols;
 		ws.ws_ypixel = vc->vc_scan_lines;
 		if ((ws.ws_row != cws->ws_row || ws.ws_col != cws->ws_col) &&
-		    vc->vc_tty->pgrp > 0)
-			kill_pg(vc->vc_tty->pgrp, SIGWINCH, 1);
+		    vc->vc_tty->pgrp)
+			kill_pgrp(vc->vc_tty->pgrp, SIGWINCH, 1);
 		*cws = ws;
 	}
 
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 70e4fab117b..07c9cdbcdca 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -351,7 +351,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		struct signal_struct *sig = task->signal;
 
 		if (sig->tty) {
-			tty_pgrp = sig->tty->pgrp;
+			tty_pgrp = pid_nr(sig->tty->pgrp);
 			tty_nr = new_encode_dev(tty_devnum(sig->tty));
 		}
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6383d2d83bb..a2d95ff50e9 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -66,7 +66,7 @@
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
 	.pgrp		= 1,						\
-	.tty_old_pgrp   = 0,						\
+	.tty_old_pgrp   = NULL,						\
 	{ .__session      = 1},						\
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 76c8e2dc48d..39d40c51853 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -436,7 +436,7 @@ struct signal_struct {
 
 	/* job control IDs */
 	pid_t pgrp;
-	pid_t tty_old_pgrp;
+	struct pid *tty_old_pgrp;
 
 	union {
 		pid_t session __deprecated;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d0e03c4a71b..dee72b9a20f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -197,8 +197,8 @@ struct tty_struct {
 	struct mutex termios_mutex;
 	struct ktermios *termios, *termios_locked;
 	char name[64];
-	int pgrp;
-	int session;
+	struct pid *pgrp;
+	struct pid *session;
 	unsigned long flags;
 	int count;
 	struct winsize winsize;
diff --git a/kernel/fork.c b/kernel/fork.c
index 80284eb488c..0b6293d94d9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -869,7 +869,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	sig->it_prof_incr = cputime_zero;
 
 	sig->leader = 0;	/* session leadership doesn't inherit */
-	sig->tty_old_pgrp = 0;
+	sig->tty_old_pgrp = NULL;
 
 	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index e1024383314..efcf76e0dad 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1510,7 +1510,6 @@ asmlinkage long sys_setsid(void)
 
 	spin_lock(&group_leader->sighand->siglock);
 	group_leader->signal->tty = NULL;
-	group_leader->signal->tty_old_pgrp = 0;
 	spin_unlock(&group_leader->sighand->siglock);
 
 	err = process_group(group_leader);
-- 
cgit v1.2.3-70-g09d2


From 41487c65bfcce9c8e4d123da1719fcfd8df6d4d0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 12 Feb 2007 00:53:01 -0800
Subject: [PATCH] pid: replace do/while_each_task_pid with
 do/while_each_pid_task

There isn't any real advantage to this change except that it allows the old
functions to be removed.  Which is easier on maintenance and puts the code in
a more uniform style.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ioprio.c         | 18 ++++++++++++------
 kernel/capability.c |  8 +++++---
 kernel/sys.c        | 40 ++++++++++++++++++++++++----------------
 3 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'kernel')

diff --git a/fs/ioprio.c b/fs/ioprio.c
index 89e8da112a7..10d2c211d18 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -60,6 +60,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 	int data = IOPRIO_PRIO_DATA(ioprio);
 	struct task_struct *p, *g;
 	struct user_struct *user;
+	struct pid *pgrp;
 	int ret;
 
 	switch (class) {
@@ -98,12 +99,14 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
-				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+				pgrp = task_pgrp(current);
+			else
+				pgrp = find_pid(who);
+			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
 					break;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
@@ -167,6 +170,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 {
 	struct task_struct *g, *p;
 	struct user_struct *user;
+	struct pid *pgrp;
 	int ret = -ESRCH;
 	int tmpio;
 
@@ -182,8 +186,10 @@ asmlinkage long sys_ioprio_get(int which, int who)
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
-				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+				pgrp = task_pgrp(current);
+			else
+				pgrp = find_pid(who);
+			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
 					continue;
@@ -191,7 +197,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 					ret = tmpio;
 				else
 					ret = ioprio_best(ret, tmpio);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
diff --git a/kernel/capability.c b/kernel/capability.c
index edb845a6e84..c8d3c776203 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -92,15 +92,17 @@ out:
  * cap_set_pg - set capabilities for all processes in a given process
  * group.  We call this holding task_capability_lock and tasklist_lock.
  */
-static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
+static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
 			      kernel_cap_t *inheritable,
 			      kernel_cap_t *permitted)
 {
 	struct task_struct *g, *target;
 	int ret = -EPERM;
 	int found = 0;
+	struct pid *pgrp;
 
-	do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
+	pgrp = find_pid(pgrp_nr);
+	do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
 		target = g;
 		while_each_thread(g, target) {
 			if (!security_capset_check(target, effective,
@@ -113,7 +115,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
 			}
 			found = 1;
 		}
-	} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
+	} while_each_pid_task(pgrp, PIDTYPE_PGID, g);
 
 	if (!found)
 	     ret = 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index efcf76e0dad..123b165080e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -596,6 +596,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 	struct task_struct *g, *p;
 	struct user_struct *user;
 	int error = -EINVAL;
+	struct pid *pgrp;
 
 	if (which > 2 || which < 0)
 		goto out;
@@ -610,18 +611,21 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 	read_lock(&tasklist_lock);
 	switch (which) {
 		case PRIO_PROCESS:
-			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+			if (who)
+				p = find_task_by_pid(who);
+			else
+				p = current;
 			if (p)
 				error = set_one_prio(p, niceval, error);
 			break;
 		case PRIO_PGRP:
-			if (!who)
-				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			if (who)
+				pgrp = find_pid(who);
+			else
+				pgrp = task_pgrp(current);
+			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 				error = set_one_prio(p, niceval, error);
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -656,6 +660,7 @@ asmlinkage long sys_getpriority(int which, int who)
 	struct task_struct *g, *p;
 	struct user_struct *user;
 	long niceval, retval = -ESRCH;
+	struct pid *pgrp;
 
 	if (which > 2 || which < 0)
 		return -EINVAL;
@@ -663,9 +668,10 @@ asmlinkage long sys_getpriority(int which, int who)
 	read_lock(&tasklist_lock);
 	switch (which) {
 		case PRIO_PROCESS:
-			if (!who)
-				who = current->pid;
-			p = find_task_by_pid(who);
+			if (who)
+				p = find_task_by_pid(who);
+			else
+				p = current;
 			if (p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
@@ -673,13 +679,15 @@ asmlinkage long sys_getpriority(int which, int who)
 			}
 			break;
 		case PRIO_PGRP:
-			if (!who)
-				who = process_group(current);
-			do_each_task_pid(who, PIDTYPE_PGID, p) {
+			if (who)
+				pgrp = find_pid(who);
+			else
+				pgrp = task_pgrp(current);
+			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
 					retval = niceval;
-			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			user = current->user;
@@ -1388,7 +1396,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 
 	if (p->real_parent == group_leader) {
 		err = -EPERM;
-		if (process_session(p) != process_session(group_leader))
+		if (task_session(p) != task_session(group_leader))
 			goto out;
 		err = -EACCES;
 		if (p->did_exec)
@@ -1407,7 +1415,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 		struct task_struct *g =
 			find_task_by_pid_type(PIDTYPE_PGID, pgid);
 
-		if (!g || process_session(g) != process_session(group_leader))
+		if (!g || task_session(g) != task_session(group_leader))
 			goto out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 27b0b2f44adffe0193a695bb528a83b550b8e54b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 12 Feb 2007 00:53:02 -0800
Subject: [PATCH] pid: remove the now unused kill_pg kill_pg_info and
 __kill_pg_info

Now that I have changed all of the in-tree users remove the old version of
these functions.  This should make it clear to any out of tree users that they
should be using kill_pgrp kill_pgrp_info or __kill_pgrp_info instead.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  3 ---
 kernel/signal.c       | 27 ---------------------------
 2 files changed, 30 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 39d40c51853..5053dc01fad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1329,14 +1329,11 @@ extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
 extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_t, u32);
 extern int kill_pgrp(struct pid *pid, int sig, int priv);
 extern int kill_pid(struct pid *pid, int sig, int priv);
-extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
-extern int kill_pg_info(int, struct siginfo *, pid_t);
 extern void do_notify_parent(struct task_struct *, int);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
 extern void zap_other_threads(struct task_struct *p);
-extern int kill_pg(pid_t, int, int);
 extern int kill_proc(pid_t, int, int);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
diff --git a/kernel/signal.c b/kernel/signal.c
index a9b679ed795..8072e568bbe 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1096,26 +1096,6 @@ int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
 	return retval;
 }
 
-int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
-{
-	if (pgrp <= 0)
-		return -EINVAL;
-
-	return __kill_pgrp_info(sig, info, find_pid(pgrp));
-}
-
-int
-kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
-{
-	int retval;
-
-	read_lock(&tasklist_lock);
-	retval = __kill_pg_info(sig, info, pgrp);
-	read_unlock(&tasklist_lock);
-
-	return retval;
-}
-
 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
 {
 	int error;
@@ -1314,12 +1294,6 @@ int kill_pid(struct pid *pid, int sig, int priv)
 }
 EXPORT_SYMBOL(kill_pid);
 
-int
-kill_pg(pid_t pgrp, int sig, int priv)
-{
-	return kill_pg_info(sig, __si_special(priv), pgrp);
-}
-
 int
 kill_proc(pid_t pid, int sig, int priv)
 {
@@ -1959,7 +1933,6 @@ EXPORT_SYMBOL(recalc_sigpending);
 EXPORT_SYMBOL_GPL(dequeue_signal);
 EXPORT_SYMBOL(flush_signals);
 EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(kill_pg);
 EXPORT_SYMBOL(kill_proc);
 EXPORT_SYMBOL(ptrace_notify);
 EXPORT_SYMBOL(send_sig);
-- 
cgit v1.2.3-70-g09d2


From ff91691bccdb741efb2df0489058a4961fa79598 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 12 Feb 2007 00:53:51 -0800
Subject: [PATCH] sched: avoid div in rebalance_tick

Avoid expensive integer divide 3 times per CPU per tick.

A userspace test of this loop went from 26ns, down to 19ns on a G5; and
from 123ns down to 28ns on a P3.

(Also avoid a variable bit shift, as suggested by Alan. The effect
of this wasn't noticable on the CPUs I tested with).

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sched.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 1fd67e16cd3..08f86178aa3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2897,14 +2897,16 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 static void update_load(struct rq *this_rq)
 {
 	unsigned long this_load;
-	int i, scale;
+	unsigned int i, scale;
 
 	this_load = this_rq->raw_weighted_load;
 
 	/* Update our load: */
-	for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
+	for (i = 0, scale = 1; i < 3; i++, scale += scale) {
 		unsigned long old_load, new_load;
 
+		/* scale is effectively 1 << i now, and >> i divides by scale */
+
 		old_load = this_rq->cpu_load[i];
 		new_load = this_load;
 		/*
@@ -2914,7 +2916,7 @@ static void update_load(struct rq *this_rq)
 		 */
 		if (new_load > old_load)
 			new_load += scale-1;
-		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
+		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9a32144e9d7b4e21341174b1a83b82a82353be86 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 12 Feb 2007 00:55:35 -0800
Subject: [PATCH] mark struct file_operations const 7

Many struct file_operations in the kernel can be "const".  Marking them const
moves these to the .rodata section, which avoids false sharing with potential
dirty data.  In addition it'll catch accidental writes at compile time to
these shared resources.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/mqueue.c                                          |  4 ++--
 ipc/shm.c                                             |  4 ++--
 ipc/util.c                                            |  4 ++--
 kernel/cpuset.c                                       |  2 +-
 net/802/tr.c                                          |  2 +-
 net/8021q/vlanproc.c                                  |  4 ++--
 net/appletalk/aarp.c                                  |  2 +-
 net/appletalk/atalk_proc.c                            |  6 +++---
 net/atm/br2684.c                                      |  2 +-
 net/atm/clip.c                                        |  2 +-
 net/atm/lec.c                                         |  2 +-
 net/atm/mpoa_proc.c                                   |  2 +-
 net/atm/proc.c                                        | 12 ++++++------
 net/ax25/af_ax25.c                                    |  2 +-
 net/ax25/ax25_route.c                                 |  2 +-
 net/ax25/ax25_uid.c                                   |  2 +-
 net/core/dev.c                                        |  4 ++--
 net/core/dev_mcast.c                                  |  2 +-
 net/core/neighbour.c                                  |  4 ++--
 net/core/pktgen.c                                     |  6 +++---
 net/core/sock.c                                       |  2 +-
 net/core/wireless.c                                   |  2 +-
 net/dccp/probe.c                                      |  2 +-
 net/decnet/af_decnet.c                                |  2 +-
 net/decnet/dn_dev.c                                   |  2 +-
 net/decnet/dn_neigh.c                                 |  2 +-
 net/decnet/dn_route.c                                 |  2 +-
 net/ipv4/arp.c                                        |  2 +-
 net/ipv4/fib_hash.c                                   |  2 +-
 net/ipv4/fib_trie.c                                   |  6 +++---
 net/ipv4/igmp.c                                       |  4 ++--
 net/ipv4/ipconfig.c                                   |  2 +-
 net/ipv4/ipmr.c                                       |  4 ++--
 net/ipv4/ipvs/ip_vs_app.c                             |  2 +-
 net/ipv4/ipvs/ip_vs_conn.c                            |  2 +-
 net/ipv4/ipvs/ip_vs_ctl.c                             |  4 ++--
 net/ipv4/netfilter/ip_conntrack_standalone.c          |  6 +++---
 net/ipv4/netfilter/ipt_CLUSTERIP.c                    |  4 ++--
 net/ipv4/netfilter/ipt_recent.c                       |  4 ++--
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c |  6 +++---
 net/ipv4/proc.c                                       |  6 +++---
 net/ipv4/raw.c                                        |  2 +-
 net/ipv4/route.c                                      |  4 ++--
 net/ipv4/tcp_probe.c                                  |  2 +-
 net/ipv6/addrconf.c                                   |  2 +-
 net/ipv6/anycast.c                                    |  2 +-
 net/ipv6/ip6_flowlabel.c                              |  2 +-
 net/ipv6/mcast.c                                      |  4 ++--
 net/ipv6/proc.c                                       |  4 ++--
 net/ipv6/raw.c                                        |  2 +-
 net/ipv6/route.c                                      |  2 +-
 net/ipx/ipx_proc.c                                    |  6 +++---
 net/irda/discovery.c                                  |  2 +-
 53 files changed, 86 insertions(+), 86 deletions(-)

(limited to 'kernel')

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 02717f71d8d..fafdef357e9 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -85,7 +85,7 @@ struct mqueue_inode_info {
 };
 
 static struct inode_operations mqueue_dir_inode_operations;
-static struct file_operations mqueue_file_operations;
+static const struct file_operations mqueue_file_operations;
 static struct super_operations mqueue_super_ops;
 static void remove_notification(struct mqueue_inode_info *info);
 
@@ -1166,7 +1166,7 @@ static struct inode_operations mqueue_dir_inode_operations = {
 	.unlink = mqueue_unlink,
 };
 
-static struct file_operations mqueue_file_operations = {
+static const struct file_operations mqueue_file_operations = {
 	.flush = mqueue_flush_file,
 	.poll = mqueue_poll_file,
 	.read = mqueue_read_file,
diff --git a/ipc/shm.c b/ipc/shm.c
index f8e10a25ad7..5bb617f6306 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -42,7 +42,7 @@
 
 #include "util.h"
 
-static struct file_operations shm_file_operations;
+static const struct file_operations shm_file_operations;
 static struct vm_operations_struct shm_vm_ops;
 
 static struct ipc_ids init_shm_ids;
@@ -249,7 +249,7 @@ static int shm_release(struct inode *ino, struct file *file)
 	return 0;
 }
 
-static struct file_operations shm_file_operations = {
+static const struct file_operations shm_file_operations = {
 	.mmap		= shm_mmap,
 	.release	= shm_release,
 #ifndef CONFIG_MMU
diff --git a/ipc/util.c b/ipc/util.c
index 115e9aac136..08a647965b9 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -205,7 +205,7 @@ void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size)
 }
 
 #ifdef CONFIG_PROC_FS
-static struct file_operations sysvipc_proc_fops;
+static const struct file_operations sysvipc_proc_fops;
 /**
  *	ipc_init_proc_interface	-  Create a proc interface for sysipc types using a seq_file interface.
  *	@path: Path in procfs
@@ -879,7 +879,7 @@ static int sysvipc_proc_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static struct file_operations sysvipc_proc_fops = {
+static const struct file_operations sysvipc_proc_fops = {
 	.open    = sysvipc_proc_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6b05dc69c95..232aed2b10f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2656,7 +2656,7 @@ static int cpuset_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_cpuset_show, pid);
 }
 
-struct file_operations proc_cpuset_operations = {
+const struct file_operations proc_cpuset_operations = {
 	.open		= cpuset_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/net/802/tr.c b/net/802/tr.c
index 31509f61340..96bd14452c5 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -576,7 +576,7 @@ static int rif_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &rif_seq_ops);
 }
 
-static struct file_operations rif_seq_fops = {
+static const struct file_operations rif_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = rif_seq_open,
 	.read    = seq_read,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 1b72c9854d6..5e24f72602a 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -81,7 +81,7 @@ static int vlan_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &vlan_seq_ops);
 }
 
-static struct file_operations vlan_fops = {
+static const struct file_operations vlan_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = vlan_seq_open,
 	.read    = seq_read,
@@ -98,7 +98,7 @@ static int vlandev_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, vlandev_seq_show, PDE(inode)->data);
 }
 
-static struct file_operations vlandev_fops = {
+static const struct file_operations vlandev_fops = {
 	.owner = THIS_MODULE,
 	.open    = vlandev_seq_open,
 	.read    = seq_read,
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 27e845d260a..d89d62f3702 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -1048,7 +1048,7 @@ out_kfree:
 	goto out;
 }
 
-struct file_operations atalk_seq_arp_fops = {
+const struct file_operations atalk_seq_arp_fops = {
 	.owner		= THIS_MODULE,
 	.open           = aarp_seq_open,
 	.read           = seq_read,
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index bc3015f277b..57ff8122b5c 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -240,7 +240,7 @@ static int atalk_seq_socket_open(struct inode *inode, struct file *file)
 	return seq_open(file, &atalk_seq_socket_ops);
 }
 
-static struct file_operations atalk_seq_interface_fops = {
+static const struct file_operations atalk_seq_interface_fops = {
 	.owner		= THIS_MODULE,
 	.open		= atalk_seq_interface_open,
 	.read		= seq_read,
@@ -248,7 +248,7 @@ static struct file_operations atalk_seq_interface_fops = {
 	.release	= seq_release,
 };
 
-static struct file_operations atalk_seq_route_fops = {
+static const struct file_operations atalk_seq_route_fops = {
 	.owner		= THIS_MODULE,
 	.open		= atalk_seq_route_open,
 	.read		= seq_read,
@@ -256,7 +256,7 @@ static struct file_operations atalk_seq_route_fops = {
 	.release	= seq_release,
 };
 
-static struct file_operations atalk_seq_socket_fops = {
+static const struct file_operations atalk_seq_socket_fops = {
 	.owner		= THIS_MODULE,
 	.open		= atalk_seq_socket_open,
 	.read		= seq_read,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index f949b5c74ec..ec4ebd3299e 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -784,7 +784,7 @@ static int br2684_proc_open(struct inode *inode, struct file *file)
 	return seq_open(file, &br2684_seq_ops);
 }
 
-static struct file_operations br2684_proc_ops = {
+static const struct file_operations br2684_proc_ops = {
 	.owner   = THIS_MODULE,
 	.open    = br2684_proc_open,
 	.read    = seq_read,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 5f8a1d22272..ebb5d0ce8b6 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -971,7 +971,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations arp_seq_fops = {
+static const struct file_operations arp_seq_fops = {
 	.open		= arp_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 57dc2ab1b65..98694552769 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1212,7 +1212,7 @@ static int lec_seq_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static struct file_operations lec_seq_fops = {
+static const struct file_operations lec_seq_fops = {
 	.owner = THIS_MODULE,
 	.open = lec_seq_open,
 	.read = seq_read,
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 43315af1030..4b05cbec7a5 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -39,7 +39,7 @@ static int parse_qos(const char *buff);
 /*
  *   Define allowed FILE OPERATIONS
  */
-static struct file_operations mpc_file_operations = {
+static const struct file_operations mpc_file_operations = {
 	.owner =	THIS_MODULE,
 	.open =		proc_mpc_open,
 	.read =		seq_read,
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 190f49ce2ca..9e61e512f66 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -33,7 +33,7 @@
 static ssize_t proc_dev_atm_read(struct file *file,char __user *buf,size_t count,
     loff_t *pos);
 
-static struct file_operations proc_atm_dev_ops = {
+static const struct file_operations proc_atm_dev_ops = {
 	.owner =	THIS_MODULE,
 	.read =		proc_dev_atm_read,
 };
@@ -272,7 +272,7 @@ static int atm_dev_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &atm_dev_seq_ops);
 }
 
-static struct file_operations devices_seq_fops = {
+static const struct file_operations devices_seq_fops = {
 	.open		= atm_dev_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -307,7 +307,7 @@ static int pvc_seq_open(struct inode *inode, struct file *file)
 	return __vcc_seq_open(inode, file, PF_ATMPVC, &pvc_seq_ops);
 }
 
-static struct file_operations pvc_seq_fops = {
+static const struct file_operations pvc_seq_fops = {
 	.open		= pvc_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -341,7 +341,7 @@ static int vcc_seq_open(struct inode *inode, struct file *file)
 	return __vcc_seq_open(inode, file, 0, &vcc_seq_ops);
 }
 
-static struct file_operations vcc_seq_fops = {
+static const struct file_operations vcc_seq_fops = {
 	.open		= vcc_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -376,7 +376,7 @@ static int svc_seq_open(struct inode *inode, struct file *file)
 	return __vcc_seq_open(inode, file, PF_ATMSVC, &svc_seq_ops);
 }
 
-static struct file_operations svc_seq_fops = {
+static const struct file_operations svc_seq_fops = {
 	.open		= svc_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -457,7 +457,7 @@ void atm_proc_dev_deregister(struct atm_dev *dev)
 
 static struct atm_proc_entry {
 	char *name;
-	struct file_operations *proc_fops;
+	const struct file_operations *proc_fops;
 	struct proc_dir_entry *dirent;
 } atm_proc_ents[] = {
 	{ .name = "devices",	.proc_fops = &devices_seq_fops },
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 9a0b677d1e7..1c07c6a50eb 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1938,7 +1938,7 @@ static int ax25_info_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ax25_info_seqops);
 }
 
-static struct file_operations ax25_info_fops = {
+static const struct file_operations ax25_info_fops = {
 	.owner = THIS_MODULE,
 	.open = ax25_info_open,
 	.read = seq_read,
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 7078861a738..d65b8e22868 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -332,7 +332,7 @@ static int ax25_rt_info_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ax25_rt_seqops);
 }
 
-struct file_operations ax25_route_fops = {
+const struct file_operations ax25_route_fops = {
 	.owner = THIS_MODULE,
 	.open = ax25_rt_info_open,
 	.read = seq_read,
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 7f4c294b36f..59a41b1e61f 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -198,7 +198,7 @@ static int ax25_uid_info_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ax25_uid_seqops);
 }
 
-struct file_operations ax25_uid_fops = {
+const struct file_operations ax25_uid_fops = {
 	.owner = THIS_MODULE,
 	.open = ax25_uid_info_open,
 	.read = seq_read,
diff --git a/net/core/dev.c b/net/core/dev.c
index 85d58d79932..cf71614dae9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2200,7 +2200,7 @@ static int dev_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &dev_seq_ops);
 }
 
-static struct file_operations dev_seq_fops = {
+static const struct file_operations dev_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = dev_seq_open,
 	.read    = seq_read,
@@ -2220,7 +2220,7 @@ static int softnet_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &softnet_seq_ops);
 }
 
-static struct file_operations softnet_seq_fops = {
+static const struct file_operations softnet_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = softnet_seq_open,
 	.read    = seq_read,
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index bfcbdf73a29..c4e754e86e9 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -277,7 +277,7 @@ static int dev_mc_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &dev_mc_seq_ops);
 }
 
-static struct file_operations dev_mc_seq_fops = {
+static const struct file_operations dev_mc_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = dev_mc_seq_open,
 	.read    = seq_read,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c08d6965056..512eed91785 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -63,7 +63,7 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
 
 static struct neigh_table *neigh_tables;
 #ifdef CONFIG_PROC_FS
-static struct file_operations neigh_stat_seq_fops;
+static const struct file_operations neigh_stat_seq_fops;
 #endif
 
 /*
@@ -2403,7 +2403,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
 	return ret;
 };
 
-static struct file_operations neigh_stat_seq_fops = {
+static const struct file_operations neigh_stat_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open 	 = neigh_stat_seq_open,
 	.read	 = seq_read,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index c2818e07a4b..74a9a32b906 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -579,7 +579,7 @@ static int pgctrl_open(struct inode *inode, struct file *file)
 	return single_open(file, pgctrl_show, PDE(inode)->data);
 }
 
-static struct file_operations pktgen_fops = {
+static const struct file_operations pktgen_fops = {
 	.owner   = THIS_MODULE,
 	.open    = pgctrl_open,
 	.read    = seq_read,
@@ -1672,7 +1672,7 @@ static int pktgen_if_open(struct inode *inode, struct file *file)
 	return single_open(file, pktgen_if_show, PDE(inode)->data);
 }
 
-static struct file_operations pktgen_if_fops = {
+static const struct file_operations pktgen_if_fops = {
 	.owner   = THIS_MODULE,
 	.open    = pktgen_if_open,
 	.read    = seq_read,
@@ -1815,7 +1815,7 @@ static int pktgen_thread_open(struct inode *inode, struct file *file)
 	return single_open(file, pktgen_thread_show, PDE(inode)->data);
 }
 
-static struct file_operations pktgen_thread_fops = {
+static const struct file_operations pktgen_thread_fops = {
 	.owner   = THIS_MODULE,
 	.open    = pktgen_thread_open,
 	.read    = seq_read,
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e35d9973f5..e9986acdd0a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1911,7 +1911,7 @@ static int proto_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &proto_seq_ops);
 }
 
-static struct file_operations proto_seq_fops = {
+static const struct file_operations proto_seq_fops = {
 	.owner		= THIS_MODULE,
 	.open		= proto_seq_open,
 	.read		= seq_read,
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 64017d47b25..9936ab11e6e 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -674,7 +674,7 @@ static int wireless_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &wireless_seq_ops);
 }
 
-static struct file_operations wireless_seq_fops = {
+static const struct file_operations wireless_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = wireless_seq_open,
 	.read    = seq_read,
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index f81e37de35d..3b1f509f51d 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -149,7 +149,7 @@ out_free:
 	return error ? error : cnt;
 }
 
-static struct file_operations dccpprobe_fops = {
+static const struct file_operations dccpprobe_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = dccpprobe_open,
 	.read    = dccpprobe_read,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 70061641ee5..c6568d637e1 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2331,7 +2331,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations dn_socket_seq_fops = {
+static const struct file_operations dn_socket_seq_fops = {
 	.owner		= THIS_MODULE,
 	.open		= dn_socket_seq_open,
 	.read		= seq_read,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c5e28114beb..2b2c7fe45a7 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1442,7 +1442,7 @@ static int dn_dev_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &dn_dev_seq_ops);
 }
 
-static struct file_operations dn_dev_seq_fops = {
+static const struct file_operations dn_dev_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = dn_dev_seq_open,
 	.read	 = seq_read,
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 11d692dfb4f..bf701cf5a38 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -598,7 +598,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations dn_neigh_seq_fops = {
+static const struct file_operations dn_neigh_seq_fops = {
 	.owner		= THIS_MODULE,
 	.open		= dn_neigh_seq_open,
 	.read		= seq_read,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index efccc42ff1c..c1b5502f195 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1751,7 +1751,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations dn_rt_cache_seq_fops = {
+static const struct file_operations dn_rt_cache_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = dn_rt_cache_seq_open,
 	.read	 = seq_read,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a58afde4f72..0ffd2d2920c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1390,7 +1390,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations arp_seq_fops = {
+static const struct file_operations arp_seq_fops = {
 	.owner		= THIS_MODULE,
 	.open           = arp_seq_open,
 	.read           = seq_read,
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index dea04d725b0..b21bb28d1fd 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -1057,7 +1057,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations fib_seq_fops = {
+static const struct file_operations fib_seq_fops = {
 	.owner		= THIS_MODULE,
 	.open           = fib_seq_open,
 	.read           = seq_read,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 004a437bd7b..c33dca07380 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2173,7 +2173,7 @@ static int fib_triestat_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, fib_triestat_seq_show, NULL);
 }
 
-static struct file_operations fib_triestat_fops = {
+static const struct file_operations fib_triestat_fops = {
 	.owner	= THIS_MODULE,
 	.open	= fib_triestat_seq_open,
 	.read	= seq_read,
@@ -2364,7 +2364,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations fib_trie_fops = {
+static const struct file_operations fib_trie_fops = {
 	.owner  = THIS_MODULE,
 	.open   = fib_trie_seq_open,
 	.read   = seq_read,
@@ -2485,7 +2485,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations fib_route_fops = {
+static const struct file_operations fib_route_fops = {
 	.owner  = THIS_MODULE,
 	.open   = fib_route_seq_open,
 	.read   = seq_read,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b8e1625d34c..063721302eb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2403,7 +2403,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations igmp_mc_seq_fops = {
+static const struct file_operations igmp_mc_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	igmp_mc_seq_open,
 	.read		=	seq_read,
@@ -2577,7 +2577,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations igmp_mcf_seq_fops = {
+static const struct file_operations igmp_mcf_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	igmp_mcf_seq_open,
 	.read		=	seq_read,
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ba882bec317..cf49de1a498 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1200,7 +1200,7 @@ static int pnp_seq_open(struct inode *indoe, struct file *file)
 	return single_open(file, pnp_seq_show, NULL);
 }
 
-static struct file_operations pnp_seq_fops = {
+static const struct file_operations pnp_seq_fops = {
 	.owner		= THIS_MODULE,
 	.open		= pnp_seq_open,
 	.read		= seq_read,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 604f5b58510..e6d11abd784 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1712,7 +1712,7 @@ out_kfree:
 
 }
 
-static struct file_operations ipmr_vif_fops = {
+static const struct file_operations ipmr_vif_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = ipmr_vif_open,
 	.read    = seq_read,
@@ -1874,7 +1874,7 @@ out_kfree:
 
 }
 
-static struct file_operations ipmr_mfc_fops = {
+static const struct file_operations ipmr_mfc_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = ipmr_mfc_open,
 	.read    = seq_read,
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 6c40899aa16..22e104c6a49 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -561,7 +561,7 @@ static int ip_vs_app_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ip_vs_app_seq_ops);
 }
 
-static struct file_operations ip_vs_app_fops = {
+static const struct file_operations ip_vs_app_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = ip_vs_app_open,
 	.read	 = seq_read,
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 0b5e03476ce..7018f97c75d 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -757,7 +757,7 @@ static int ip_vs_conn_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ip_vs_conn_seq_ops);
 }
 
-static struct file_operations ip_vs_conn_fops = {
+static const struct file_operations ip_vs_conn_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = ip_vs_conn_open,
 	.read    = seq_read,
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9b933381ebb..8b08d9cdcbc 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1812,7 +1812,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations ip_vs_info_fops = {
+static const struct file_operations ip_vs_info_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = ip_vs_info_open,
 	.read    = seq_read,
@@ -1859,7 +1859,7 @@ static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, ip_vs_stats_show, NULL);
 }
 
-static struct file_operations ip_vs_stats_fops = {
+static const struct file_operations ip_vs_stats_fops = {
 	.owner = THIS_MODULE,
 	.open = ip_vs_stats_seq_open,
 	.read = seq_read,
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 5903588fddc..300ccbbbdac 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -222,7 +222,7 @@ out_free:
 	return ret;
 }
 
-static struct file_operations ct_file_ops = {
+static const struct file_operations ct_file_ops = {
 	.owner   = THIS_MODULE,
 	.open    = ct_open,
 	.read    = seq_read,
@@ -298,7 +298,7 @@ static int exp_open(struct inode *inode, struct file *file)
 	return seq_open(file, &exp_seq_ops);
 }
 
-static struct file_operations exp_file_ops = {
+static const struct file_operations exp_file_ops = {
 	.owner   = THIS_MODULE,
 	.open    = exp_open,
 	.read    = seq_read,
@@ -386,7 +386,7 @@ static int ct_cpu_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ct_cpu_seq_ops);
 }
 
-static struct file_operations ct_cpu_seq_fops = {
+static const struct file_operations ct_cpu_seq_fops = {
 	.owner   = THIS_MODULE,
 	.open    = ct_cpu_seq_open,
 	.read    = seq_read,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4fe28f26447..e965b333c99 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -70,7 +70,7 @@ static LIST_HEAD(clusterip_configs);
 static DEFINE_RWLOCK(clusterip_lock);
 
 #ifdef CONFIG_PROC_FS
-static struct file_operations clusterip_proc_fops;
+static const struct file_operations clusterip_proc_fops;
 static struct proc_dir_entry *clusterip_procdir;
 #endif
 
@@ -715,7 +715,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 	return size;
 }
 
-static struct file_operations clusterip_proc_fops = {
+static const struct file_operations clusterip_proc_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = clusterip_proc_open,
 	.read	 = seq_read,
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 6b97b679617..aecb9c48e15 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -79,7 +79,7 @@ static DEFINE_MUTEX(recent_mutex);
 
 #ifdef CONFIG_PROC_FS
 static struct proc_dir_entry	*proc_dir;
-static struct file_operations	recent_fops;
+static const struct file_operations	recent_fops;
 #endif
 
 static u_int32_t hash_rnd;
@@ -454,7 +454,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input,
 	return size;
 }
 
-static struct file_operations recent_fops = {
+static const struct file_operations recent_fops = {
 	.open		= recent_seq_open,
 	.read		= seq_read,
 	.write		= recent_proc_write,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 14a93a73841..89f933e8103 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -197,7 +197,7 @@ out_free:
 	return ret;
 }
 
-static struct file_operations ct_file_ops = {
+static const struct file_operations ct_file_ops = {
 	.owner   = THIS_MODULE,
 	.open    = ct_open,
 	.read    = seq_read,
@@ -278,7 +278,7 @@ static int exp_open(struct inode *inode, struct file *file)
 	return seq_open(file, &exp_seq_ops);
 }
 
-static struct file_operations ip_exp_file_ops = {
+static const struct file_operations ip_exp_file_ops = {
 	.owner   = THIS_MODULE,
 	.open    = exp_open,
 	.read    = seq_read,
@@ -366,7 +366,7 @@ static int ct_cpu_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ct_cpu_seq_ops);
 }
 
-static struct file_operations ct_cpu_seq_fops = {
+static const struct file_operations ct_cpu_seq_fops = {
 	.owner   = THIS_MODULE,
 	.open    = ct_cpu_seq_open,
 	.read    = seq_read,
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ccb199e9dd8..ae68a691e8c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -79,7 +79,7 @@ static int sockstat_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, sockstat_seq_show, NULL);
 }
 
-static struct file_operations sockstat_seq_fops = {
+static const struct file_operations sockstat_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = sockstat_seq_open,
 	.read	 = seq_read,
@@ -326,7 +326,7 @@ static int snmp_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, snmp_seq_show, NULL);
 }
 
-static struct file_operations snmp_seq_fops = {
+static const struct file_operations snmp_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = snmp_seq_open,
 	.read	 = seq_read,
@@ -360,7 +360,7 @@ static int netstat_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, netstat_seq_show, NULL);
 }
 
-static struct file_operations netstat_seq_fops = {
+static const struct file_operations netstat_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = netstat_seq_open,
 	.read	 = seq_read,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 931084bfb57..87e9c161810 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -916,7 +916,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations raw_seq_fops = {
+static const struct file_operations raw_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = raw_seq_open,
 	.read	 = seq_read,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5b3834b38a2..9b5e56481d5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -393,7 +393,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations rt_cache_seq_fops = {
+static const struct file_operations rt_cache_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = rt_cache_seq_open,
 	.read	 = seq_read,
@@ -484,7 +484,7 @@ static int rt_cpu_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &rt_cpu_seq_ops);
 }
 
-static struct file_operations rt_cpu_seq_fops = {
+static const struct file_operations rt_cpu_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = rt_cpu_seq_open,
 	.read	 = seq_read,
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 41c15784818..61f406f2729 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -143,7 +143,7 @@ out_free:
 	return error ? error : cnt;
 }
 
-static struct file_operations tcpprobe_fops = {
+static const struct file_operations tcpprobe_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = tcpprobe_open,
 	.read    = tcpprobe_read,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 77e56f2b1af..ea0755b0903 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2776,7 +2776,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations if6_fops = {
+static const struct file_operations if6_fops = {
 	.owner		= THIS_MODULE,
 	.open		= if6_seq_open,
 	.read		= seq_read,
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index e2dc1c41bbf..6fb2e9d716c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -565,7 +565,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations ac6_seq_fops = {
+static const struct file_operations ac6_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	ac6_seq_open,
 	.read		=	seq_read,
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1551ab3890a..c206a152ed9 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -677,7 +677,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations ip6fl_seq_fops = {
+static const struct file_operations ip6fl_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	ip6fl_seq_open,
 	.read		=	seq_read,
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index c9db5bc5b0f..a8d6625ec78 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2451,7 +2451,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations igmp6_mc_seq_fops = {
+static const struct file_operations igmp6_mc_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	igmp6_mc_seq_open,
 	.read		=	seq_read,
@@ -2625,7 +2625,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations igmp6_mcf_seq_fops = {
+static const struct file_operations igmp6_mcf_seq_fops = {
 	.owner		=	THIS_MODULE,
 	.open		=	igmp6_mcf_seq_open,
 	.read		=	seq_read,
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 032ef95c5b0..c82257dd04b 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -187,7 +187,7 @@ static int sockstat6_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, sockstat6_seq_show, NULL);
 }
 
-static struct file_operations sockstat6_seq_fops = {
+static const struct file_operations sockstat6_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = sockstat6_seq_open,
 	.read	 = seq_read,
@@ -200,7 +200,7 @@ static int snmp6_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, snmp6_seq_show, PDE(inode)->data);
 }
 
-static struct file_operations snmp6_seq_fops = {
+static const struct file_operations snmp6_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = snmp6_seq_open,
 	.read	 = seq_read,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 16d4c63ff55..1f8f6275a7e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1273,7 +1273,7 @@ out_kfree:
 	goto out;
 }
 
-static struct file_operations raw6_seq_fops = {
+static const struct file_operations raw6_seq_fops = {
 	.owner =	THIS_MODULE,
 	.open =		raw6_seq_open,
 	.read =		seq_read,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a415ac610e2..0e1f4b2cd3d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2345,7 +2345,7 @@ static int rt6_stats_seq_open(struct inode *inode, struct file *file)
 	return single_open(file, rt6_stats_seq_show, NULL);
 }
 
-static struct file_operations rt6_stats_seq_fops = {
+static const struct file_operations rt6_stats_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = rt6_stats_seq_open,
 	.read	 = seq_read,
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 811e4badce8..db32ac8e79b 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -322,7 +322,7 @@ static int ipx_seq_socket_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ipx_seq_socket_ops);
 }
 
-static struct file_operations ipx_seq_interface_fops = {
+static const struct file_operations ipx_seq_interface_fops = {
 	.owner		= THIS_MODULE,
 	.open           = ipx_seq_interface_open,
 	.read           = seq_read,
@@ -330,7 +330,7 @@ static struct file_operations ipx_seq_interface_fops = {
 	.release        = seq_release,
 };
 
-static struct file_operations ipx_seq_route_fops = {
+static const struct file_operations ipx_seq_route_fops = {
 	.owner		= THIS_MODULE,
 	.open           = ipx_seq_route_open,
 	.read           = seq_read,
@@ -338,7 +338,7 @@ static struct file_operations ipx_seq_route_fops = {
 	.release        = seq_release,
 };
 
-static struct file_operations ipx_seq_socket_fops = {
+static const struct file_operations ipx_seq_socket_fops = {
 	.owner		= THIS_MODULE,
 	.open           = ipx_seq_socket_open,
 	.read           = seq_read,
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index 789478bc300..f0973412867 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -409,7 +409,7 @@ static int discovery_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &discovery_seq_ops);
 }
 
-struct file_operations discovery_seq_fops = {
+const struct file_operations discovery_seq_fops = {
 	.owner		= THIS_MODULE,
 	.open           = discovery_seq_open,
 	.read           = seq_read,
-- 
cgit v1.2.3-70-g09d2


From 92e1d5be91a0e3ffa5c4697eeb09b2aa22792122 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 12 Feb 2007 00:55:39 -0800
Subject: [PATCH] mark struct inode_operations const 2

Many struct inode_operations in the kernel can be "const".  Marking them const
moves these to the .rodata section, which avoids false sharing with potential
dirty data.  In addition it'll catch accidental writes at compile time to
these shared resources.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/gfs2/ops_inode.c     |  8 ++++----
 fs/gfs2/ops_inode.h     |  8 ++++----
 fs/hfs/dir.c            |  2 +-
 fs/hfs/hfs_fs.h         |  2 +-
 fs/hfs/inode.c          |  4 ++--
 fs/hfsplus/dir.c        |  2 +-
 fs/hfsplus/inode.c      |  4 ++--
 fs/hostfs/hostfs_kern.c |  8 ++++----
 fs/hpfs/file.c          |  2 +-
 fs/hpfs/hpfs_fn.h       |  4 ++--
 fs/hpfs/namei.c         |  2 +-
 fs/hppfs/hppfs_kern.c   |  6 +++---
 fs/hugetlbfs/inode.c    |  8 ++++----
 fs/isofs/dir.c          |  2 +-
 fs/isofs/isofs.h        |  2 +-
 fs/jffs/inode-v23.c     |  8 ++++----
 fs/jffs2/dir.c          |  2 +-
 fs/jffs2/file.c         |  2 +-
 fs/jffs2/os-linux.h     |  6 +++---
 fs/jffs2/symlink.c      |  2 +-
 fs/jfs/file.c           |  2 +-
 fs/jfs/jfs_inode.h      |  6 +++---
 fs/jfs/namei.c          |  2 +-
 fs/jfs/symlink.c        |  2 +-
 fs/libfs.c              |  2 +-
 fs/minix/file.c         |  2 +-
 fs/minix/inode.c        |  2 +-
 fs/minix/minix.h        |  4 ++--
 fs/minix/namei.c        |  2 +-
 fs/msdos/namei.c        |  2 +-
 fs/namei.c              |  2 +-
 fs/ncpfs/dir.c          |  2 +-
 fs/ncpfs/file.c         |  2 +-
 fs/ncpfs/inode.c        |  2 +-
 fs/nfs/dir.c            |  6 +++---
 fs/nfs/file.c           |  4 ++--
 fs/nfs/namespace.c      |  4 ++--
 fs/nfs/nfs4_fs.h        |  2 +-
 fs/nfs/nfs4proc.c       |  2 +-
 fs/nfs/symlink.c        |  2 +-
 fs/ntfs/file.c          |  4 ++--
 fs/ntfs/namei.c         |  2 +-
 fs/ntfs/ntfs.h          |  6 +++---
 fs/ocfs2/dlm/dlmfs.c    | 12 ++++++------
 fs/ocfs2/file.c         |  4 ++--
 fs/ocfs2/file.h         |  4 ++--
 fs/ocfs2/namei.c        |  2 +-
 fs/ocfs2/namei.h        |  2 +-
 fs/ocfs2/symlink.c      |  4 ++--
 fs/ocfs2/symlink.h      |  4 ++--
 fs/openpromfs/inode.c   |  2 +-
 ipc/mqueue.c            |  4 ++--
 kernel/cpuset.c         |  2 +-
 mm/shmem.c              | 20 ++++++++++----------
 54 files changed, 105 insertions(+), 105 deletions(-)

(limited to 'kernel')

diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index f40a84807d7..60f47bf2e8e 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1115,7 +1115,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 	return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
 }
 
-struct inode_operations gfs2_file_iops = {
+const struct inode_operations gfs2_file_iops = {
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
@@ -1125,7 +1125,7 @@ struct inode_operations gfs2_file_iops = {
 	.removexattr = gfs2_removexattr,
 };
 
-struct inode_operations gfs2_dev_iops = {
+const struct inode_operations gfs2_dev_iops = {
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
@@ -1135,7 +1135,7 @@ struct inode_operations gfs2_dev_iops = {
 	.removexattr = gfs2_removexattr,
 };
 
-struct inode_operations gfs2_dir_iops = {
+const struct inode_operations gfs2_dir_iops = {
 	.create = gfs2_create,
 	.lookup = gfs2_lookup,
 	.link = gfs2_link,
@@ -1154,7 +1154,7 @@ struct inode_operations gfs2_dir_iops = {
 	.removexattr = gfs2_removexattr,
 };
 
-struct inode_operations gfs2_symlink_iops = {
+const struct inode_operations gfs2_symlink_iops = {
 	.readlink = gfs2_readlink,
 	.follow_link = gfs2_follow_link,
 	.permission = gfs2_permission,
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
index b15acb4fd34..34f0caac1a0 100644
--- a/fs/gfs2/ops_inode.h
+++ b/fs/gfs2/ops_inode.h
@@ -12,9 +12,9 @@
 
 #include <linux/fs.h>
 
-extern struct inode_operations gfs2_file_iops;
-extern struct inode_operations gfs2_dir_iops;
-extern struct inode_operations gfs2_symlink_iops;
-extern struct inode_operations gfs2_dev_iops;
+extern const struct inode_operations gfs2_file_iops;
+extern const struct inode_operations gfs2_dir_iops;
+extern const struct inode_operations gfs2_symlink_iops;
+extern const struct inode_operations gfs2_dev_iops;
 
 #endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index e2e0358da33..7c69b98a2e4 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -320,7 +320,7 @@ const struct file_operations hfs_dir_operations = {
 	.release	= hfs_dir_release,
 };
 
-struct inode_operations hfs_dir_inode_operations = {
+const struct inode_operations hfs_dir_inode_operations = {
 	.create		= hfs_create,
 	.lookup		= hfs_lookup,
 	.unlink		= hfs_unlink,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 735332dfd1b..147374b6f67 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -170,7 +170,7 @@ extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qst
 
 /* dir.c */
 extern const struct file_operations hfs_dir_operations;
-extern struct inode_operations hfs_dir_inode_operations;
+extern const struct inode_operations hfs_dir_inode_operations;
 
 /* extent.c */
 extern int hfs_ext_keycmp(const btree_key *, const btree_key *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 5cb7f8fee8d..fafcba59387 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -18,7 +18,7 @@
 #include "btree.h"
 
 static const struct file_operations hfs_file_operations;
-static struct inode_operations hfs_file_inode_operations;
+static const struct inode_operations hfs_file_inode_operations;
 
 /*================ Variable-like macros ================*/
 
@@ -612,7 +612,7 @@ static const struct file_operations hfs_file_operations = {
 	.release	= hfs_file_release,
 };
 
-static struct inode_operations hfs_file_inode_operations = {
+static const struct inode_operations hfs_file_inode_operations = {
 	.lookup		= hfs_file_lookup,
 	.truncate	= hfs_file_truncate,
 	.setattr	= hfs_inode_setattr,
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index e886ac8460d..78137007ccc 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -471,7 +471,7 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return res;
 }
 
-struct inode_operations hfsplus_dir_inode_operations = {
+const struct inode_operations hfsplus_dir_inode_operations = {
 	.lookup		= hfsplus_lookup,
 	.create		= hfsplus_create,
 	.link		= hfsplus_link,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 75e8c4d8aac..642012ac337 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -268,10 +268,10 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-extern struct inode_operations hfsplus_dir_inode_operations;
+extern const struct inode_operations hfsplus_dir_inode_operations;
 extern struct file_operations hfsplus_dir_operations;
 
-static struct inode_operations hfsplus_file_inode_operations = {
+static const struct inode_operations hfsplus_file_inode_operations = {
 	.lookup		= hfsplus_file_lookup,
 	.truncate	= hfsplus_file_truncate,
 	.permission	= hfsplus_permission,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 69a376f35a6..fec208db5e4 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -52,8 +52,8 @@ static int append = 0;
 
 #define HOSTFS_SUPER_MAGIC 0x00c0ffee
 
-static struct inode_operations hostfs_iops;
-static struct inode_operations hostfs_dir_iops;
+static const struct inode_operations hostfs_iops;
+static const struct inode_operations hostfs_dir_iops;
 static const struct address_space_operations hostfs_link_aops;
 
 #ifndef MODULE
@@ -880,7 +880,7 @@ int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	return(0);
 }
 
-static struct inode_operations hostfs_iops = {
+static const struct inode_operations hostfs_iops = {
 	.create		= hostfs_create,
 	.link		= hostfs_link,
 	.unlink		= hostfs_unlink,
@@ -894,7 +894,7 @@ static struct inode_operations hostfs_iops = {
 	.getattr	= hostfs_getattr,
 };
 
-static struct inode_operations hostfs_dir_iops = {
+static const struct inode_operations hostfs_dir_iops = {
 	.create		= hostfs_create,
 	.lookup		= hostfs_lookup,
 	.link		= hostfs_link,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index fb4c8915010..b4eafc0f1e5 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -132,7 +132,7 @@ const struct file_operations hpfs_file_ops =
 	.sendfile	= generic_file_sendfile,
 };
 
-struct inode_operations hpfs_file_iops =
+const struct inode_operations hpfs_file_iops =
 {
 	.truncate	= hpfs_truncate,
 	.setattr	= hpfs_notify_change,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1c07aa82d32..42ff60ccf2a 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -266,7 +266,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
 
 int hpfs_file_fsync(struct file *, struct dentry *, int);
 extern const struct file_operations hpfs_file_ops;
-extern struct inode_operations hpfs_file_iops;
+extern const struct inode_operations hpfs_file_iops;
 extern const struct address_space_operations hpfs_aops;
 
 /* inode.c */
@@ -302,7 +302,7 @@ void hpfs_decide_conv(struct inode *, unsigned char *, unsigned);
 
 /* namei.c */
 
-extern struct inode_operations hpfs_dir_iops;
+extern const struct inode_operations hpfs_dir_iops;
 extern const struct address_space_operations hpfs_symlink_aops;
 
 static inline struct hpfs_inode_info *hpfs_i(struct inode *inode)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 2507e7393f3..9953cf9a2f1 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -659,7 +659,7 @@ end1:
 	return err;
 }
 
-struct inode_operations hpfs_dir_iops =
+const struct inode_operations hpfs_dir_iops =
 {
 	.create		= hpfs_create,
 	.lookup		= hpfs_lookup,
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index afd340a45da..bd711681631 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -212,7 +212,7 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
 	return(ERR_PTR(err));
 }
 
-static struct inode_operations hppfs_file_iops = {
+static const struct inode_operations hppfs_file_iops = {
 };
 
 static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
@@ -693,11 +693,11 @@ static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return ret;
 }
 
-static struct inode_operations hppfs_dir_iops = {
+static const struct inode_operations hppfs_dir_iops = {
 	.lookup		= hppfs_lookup,
 };
 
-static struct inode_operations hppfs_link_iops = {
+static const struct inode_operations hppfs_link_iops = {
 	.readlink	= hppfs_readlink,
 	.follow_link	= hppfs_follow_link,
 };
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e6bd553fdc4..26ba81fc7fa 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -36,8 +36,8 @@
 static struct super_operations hugetlbfs_ops;
 static const struct address_space_operations hugetlbfs_aops;
 const struct file_operations hugetlbfs_file_operations;
-static struct inode_operations hugetlbfs_dir_inode_operations;
-static struct inode_operations hugetlbfs_inode_operations;
+static const struct inode_operations hugetlbfs_dir_inode_operations;
+static const struct inode_operations hugetlbfs_inode_operations;
 
 static struct backing_dev_info hugetlbfs_backing_dev_info = {
 	.ra_pages	= 0,	/* No readahead */
@@ -563,7 +563,7 @@ const struct file_operations hugetlbfs_file_operations = {
 	.get_unmapped_area	= hugetlb_get_unmapped_area,
 };
 
-static struct inode_operations hugetlbfs_dir_inode_operations = {
+static const struct inode_operations hugetlbfs_dir_inode_operations = {
 	.create		= hugetlbfs_create,
 	.lookup		= simple_lookup,
 	.link		= simple_link,
@@ -576,7 +576,7 @@ static struct inode_operations hugetlbfs_dir_inode_operations = {
 	.setattr	= hugetlbfs_setattr,
 };
 
-static struct inode_operations hugetlbfs_inode_operations = {
+static const struct inode_operations hugetlbfs_inode_operations = {
 	.setattr	= hugetlbfs_setattr,
 };
 
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 4af2548f97a..0e94c31cad9 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -24,7 +24,7 @@ const struct file_operations isofs_dir_operations =
 /*
  * directories can handle most operations...
  */
-struct inode_operations isofs_dir_inode_operations =
+const struct inode_operations isofs_dir_inode_operations =
 {
 	.lookup		= isofs_lookup,
 };
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index e6308c8b573..efe2872cd4e 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -174,7 +174,7 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
 	}
 }
 
-extern struct inode_operations isofs_dir_inode_operations;
+extern const struct inode_operations isofs_dir_inode_operations;
 extern const struct file_operations isofs_dir_operations;
 extern const struct address_space_operations isofs_symlink_aops;
 extern struct export_operations isofs_export_ops;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 6ee20668850..fe3347defe6 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -56,9 +56,9 @@ static int jffs_remove(struct inode *dir, struct dentry *dentry, int type);
 
 static struct super_operations jffs_ops;
 static const struct file_operations jffs_file_operations;
-static struct inode_operations jffs_file_inode_operations;
+static const struct inode_operations jffs_file_inode_operations;
 static const struct file_operations jffs_dir_operations;
-static struct inode_operations jffs_dir_inode_operations;
+static const struct inode_operations jffs_dir_inode_operations;
 static const struct address_space_operations jffs_address_operations;
 
 struct kmem_cache     *node_cache = NULL;
@@ -1642,7 +1642,7 @@ static const struct file_operations jffs_file_operations =
 };
 
 
-static struct inode_operations jffs_file_inode_operations =
+static const struct inode_operations jffs_file_inode_operations =
 {
 	.lookup		= jffs_lookup,          /* lookup */
 	.setattr	= jffs_setattr,
@@ -1655,7 +1655,7 @@ static const struct file_operations jffs_dir_operations =
 };
 
 
-static struct inode_operations jffs_dir_inode_operations =
+static const struct inode_operations jffs_dir_inode_operations =
 {
 	.create		= jffs_create,
 	.lookup		= jffs_lookup,
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index da6034d5071..cdbe2fe14e2 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -46,7 +46,7 @@ const struct file_operations jffs2_dir_operations =
 };
 
 
-struct inode_operations jffs2_dir_inode_operations =
+const struct inode_operations jffs2_dir_inode_operations =
 {
 	.create =	jffs2_create,
 	.lookup =	jffs2_lookup,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 242875f77cb..e82eeaf7590 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -54,7 +54,7 @@ const struct file_operations jffs2_file_operations =
 
 /* jffs2_file_inode_operations */
 
-struct inode_operations jffs2_file_inode_operations =
+const struct inode_operations jffs2_file_inode_operations =
 {
 	.permission =	jffs2_permission,
 	.setattr =	jffs2_setattr,
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 9f41fc01a37..e07a0edcdb4 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -153,11 +153,11 @@ void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c);
 
 /* dir.c */
 extern const struct file_operations jffs2_dir_operations;
-extern struct inode_operations jffs2_dir_inode_operations;
+extern const struct inode_operations jffs2_dir_inode_operations;
 
 /* file.c */
 extern const struct file_operations jffs2_file_operations;
-extern struct inode_operations jffs2_file_inode_operations;
+extern const struct inode_operations jffs2_file_inode_operations;
 extern const struct address_space_operations jffs2_file_address_operations;
 int jffs2_fsync(struct file *, struct dentry *, int);
 int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
@@ -166,7 +166,7 @@ int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
 int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
 
 /* symlink.c */
-extern struct inode_operations jffs2_symlink_inode_operations;
+extern const struct inode_operations jffs2_symlink_inode_operations;
 
 /* fs.c */
 int jffs2_setattr (struct dentry *, struct iattr *);
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b90d5aa3d96..7e4882c8a7e 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -20,7 +20,7 @@
 
 static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
 
-struct inode_operations jffs2_symlink_inode_operations =
+const struct inode_operations jffs2_symlink_inode_operations =
 {
 	.readlink =	generic_readlink,
 	.follow_link =	jffs2_follow_link,
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index aa9132d0492..f7f8eff19b7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -88,7 +88,7 @@ static int jfs_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-struct inode_operations jfs_file_inode_operations = {
+const struct inode_operations jfs_file_inode_operations = {
 	.truncate	= jfs_truncate,
 	.setxattr	= jfs_setxattr,
 	.getxattr	= jfs_getxattr,
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 0d06ccfaff0..6802837f757 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -35,10 +35,10 @@ extern void jfs_set_inode_flags(struct inode *);
 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 extern const struct address_space_operations jfs_aops;
-extern struct inode_operations jfs_dir_inode_operations;
+extern const struct inode_operations jfs_dir_inode_operations;
 extern const struct file_operations jfs_dir_operations;
-extern struct inode_operations jfs_file_inode_operations;
+extern const struct inode_operations jfs_file_inode_operations;
 extern const struct file_operations jfs_file_operations;
-extern struct inode_operations jfs_symlink_inode_operations;
+extern const struct inode_operations jfs_symlink_inode_operations;
 extern struct dentry_operations jfs_ci_dentry_operations;
 #endif				/* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 7ab47561b68..41c20477126 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1503,7 +1503,7 @@ struct dentry *jfs_get_parent(struct dentry *dentry)
 	return parent;
 }
 
-struct inode_operations jfs_dir_inode_operations = {
+const struct inode_operations jfs_dir_inode_operations = {
 	.create		= jfs_create,
 	.lookup		= jfs_lookup,
 	.link		= jfs_link,
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index cee43f36f51..4af1a05aad0 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,7 +29,7 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return NULL;
 }
 
-struct inode_operations jfs_symlink_inode_operations = {
+const struct inode_operations jfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= jfs_follow_link,
 	.setxattr	= jfs_setxattr,
diff --git a/fs/libfs.c b/fs/libfs.c
index 503898d5c4a..0f4ee02e0b1 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -186,7 +186,7 @@ const struct file_operations simple_dir_operations = {
 	.fsync		= simple_sync_file,
 };
 
-struct inode_operations simple_dir_inode_operations = {
+const struct inode_operations simple_dir_inode_operations = {
 	.lookup		= simple_lookup,
 };
 
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 40eac2e60d2..f92baa1d757 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -26,7 +26,7 @@ const struct file_operations minix_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
-struct inode_operations minix_file_inode_operations = {
+const struct inode_operations minix_file_inode_operations = {
 	.truncate	= minix_truncate,
 	.getattr	= minix_getattr,
 };
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 9ddfcc14828..e56822bff94 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -369,7 +369,7 @@ static const struct address_space_operations minix_aops = {
 	.bmap = minix_bmap
 };
 
-static struct inode_operations minix_symlink_inode_operations = {
+static const struct inode_operations minix_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index e016ee91d93..73ef84f8fb0 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -75,8 +75,8 @@ extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**);
 extern ino_t minix_inode_by_name(struct dentry*);
 extern int minix_sync_file(struct file *, struct dentry *, int);
 
-extern struct inode_operations minix_file_inode_operations;
-extern struct inode_operations minix_dir_inode_operations;
+extern const struct inode_operations minix_file_inode_operations;
+extern const struct inode_operations minix_dir_inode_operations;
 extern const struct file_operations minix_file_operations;
 extern const struct file_operations minix_dir_operations;
 extern struct dentry_operations minix_dentry_operations;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 299bb66e3bd..f4aa7a93904 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -291,7 +291,7 @@ out:
 /*
  * directories can handle most operations...
  */
-struct inode_operations minix_dir_inode_operations = {
+const struct inode_operations minix_dir_inode_operations = {
 	.create		= minix_create,
 	.lookup		= minix_lookup,
 	.link		= minix_link,
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 452461955cb..30f7d0ae221 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -646,7 +646,7 @@ out:
 	return err;
 }
 
-static struct inode_operations msdos_dir_inode_operations = {
+static const struct inode_operations msdos_dir_inode_operations = {
 	.create		= msdos_create,
 	.lookup		= msdos_lookup,
 	.unlink		= msdos_unlink,
diff --git a/fs/namei.c b/fs/namei.c
index e4f108f0823..161e2225c75 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2744,7 +2744,7 @@ int page_symlink(struct inode *inode, const char *symname, int len)
 			mapping_gfp_mask(inode->i_mapping));
 }
 
-struct inode_operations page_symlink_inode_operations = {
+const struct inode_operations page_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 73747772c3b..011ef0b6d2d 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -58,7 +58,7 @@ const struct file_operations ncp_dir_operations =
 #endif
 };
 
-struct inode_operations ncp_dir_inode_operations =
+const struct inode_operations ncp_dir_inode_operations =
 {
 	.create		= ncp_create,
 	.lookup		= ncp_lookup,
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index b91fea03b1c..6b1f6d27099 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -297,7 +297,7 @@ const struct file_operations ncp_file_operations =
 	.fsync		= ncp_fsync,
 };
 
-struct inode_operations ncp_file_inode_operations =
+const struct inode_operations ncp_file_inode_operations =
 {
 	.setattr	= ncp_notify_change,
 };
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 67a90bf795d..730433f3d23 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -229,7 +229,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
 }
 
 #if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
-static struct inode_operations ncp_symlink_inode_operations = {
+static const struct inode_operations ncp_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d9ba8cb0ee7..f03a770bacb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -65,7 +65,7 @@ const struct file_operations nfs_dir_operations = {
 	.fsync		= nfs_fsync_dir,
 };
 
-struct inode_operations nfs_dir_inode_operations = {
+const struct inode_operations nfs_dir_inode_operations = {
 	.create		= nfs_create,
 	.lookup		= nfs_lookup,
 	.link		= nfs_link,
@@ -81,7 +81,7 @@ struct inode_operations nfs_dir_inode_operations = {
 };
 
 #ifdef CONFIG_NFS_V3
-struct inode_operations nfs3_dir_inode_operations = {
+const struct inode_operations nfs3_dir_inode_operations = {
 	.create		= nfs_create,
 	.lookup		= nfs_lookup,
 	.link		= nfs_link,
@@ -104,7 +104,7 @@ struct inode_operations nfs3_dir_inode_operations = {
 #ifdef CONFIG_NFS_V4
 
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
-struct inode_operations nfs4_dir_inode_operations = {
+const struct inode_operations nfs4_dir_inode_operations = {
 	.create		= nfs_create,
 	.lookup		= nfs_atomic_lookup,
 	.link		= nfs_link,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9e4a2b70995..8e66b5a2d49 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -68,14 +68,14 @@ const struct file_operations nfs_file_operations = {
 	.check_flags	= nfs_check_flags,
 };
 
-struct inode_operations nfs_file_inode_operations = {
+const struct inode_operations nfs_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 };
 
 #ifdef CONFIG_NFS_V3
-struct inode_operations nfs3_file_inode_operations = {
+const struct inode_operations nfs3_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 371b804e7cc..7f86e65182e 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -155,12 +155,12 @@ out_follow:
 	goto out;
 }
 
-struct inode_operations nfs_mountpoint_inode_operations = {
+const struct inode_operations nfs_mountpoint_inode_operations = {
 	.follow_link	= nfs_follow_mountpoint,
 	.getattr	= nfs_getattr,
 };
 
-struct inode_operations nfs_referral_inode_operations = {
+const struct inode_operations nfs_referral_inode_operations = {
 	.follow_link	= nfs_follow_mountpoint,
 };
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c26cd978c7c..e2341766c4f 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -151,7 +151,7 @@ struct nfs4_state_recovery_ops {
 };
 
 extern struct dentry_operations nfs4_dentry_operations;
-extern struct inode_operations nfs4_dir_inode_operations;
+extern const struct inode_operations nfs4_dir_inode_operations;
 
 /* inode.c */
 extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b3fd29baadc..1daee65b517 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3625,7 +3625,7 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
 	.recover_lock	= nfs4_lock_expired,
 };
 
-static struct inode_operations nfs4_file_inode_operations = {
+static const struct inode_operations nfs4_file_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 525c136c7d8..f4a0548b9ce 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -78,7 +78,7 @@ read_failed:
 /*
  * symlinks can't do much...
  */
-struct inode_operations nfs_symlink_inode_operations = {
+const struct inode_operations nfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= nfs_follow_link,
 	.put_link	= page_put_link,
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 076c9420c25..d69c4595ccd 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2328,7 +2328,7 @@ const struct file_operations ntfs_file_ops = {
 						    the data source. */
 };
 
-struct inode_operations ntfs_file_inode_ops = {
+const struct inode_operations ntfs_file_inode_ops = {
 #ifdef NTFS_RW
 	.truncate	= ntfs_truncate_vfs,
 	.setattr	= ntfs_setattr,
@@ -2337,4 +2337,4 @@ struct inode_operations ntfs_file_inode_ops = {
 
 const struct file_operations ntfs_empty_file_ops = {};
 
-struct inode_operations ntfs_empty_inode_ops = {};
+const struct inode_operations ntfs_empty_inode_ops = {};
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index eddb2247cec..bff01a54675 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -359,7 +359,7 @@ err_out:
 /**
  * Inode operations for directories.
  */
-struct inode_operations ntfs_dir_inode_ops = {
+const struct inode_operations ntfs_dir_inode_ops = {
 	.lookup	= ntfs_lookup,	/* VFS: Lookup directory. */
 };
 
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index a12847ae467..d73f5a9ac34 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -61,13 +61,13 @@ extern const struct address_space_operations ntfs_aops;
 extern const struct address_space_operations ntfs_mst_aops;
 
 extern const struct  file_operations ntfs_file_ops;
-extern struct inode_operations ntfs_file_inode_ops;
+extern const struct inode_operations ntfs_file_inode_ops;
 
 extern const struct  file_operations ntfs_dir_ops;
-extern struct inode_operations ntfs_dir_inode_ops;
+extern const struct inode_operations ntfs_dir_inode_ops;
 
 extern const struct  file_operations ntfs_empty_file_ops;
-extern struct inode_operations ntfs_empty_inode_ops;
+extern const struct inode_operations ntfs_empty_inode_ops;
 
 extern struct export_operations ntfs_export_ops;
 
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 30826242bf5..84b33ffb42f 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -63,9 +63,9 @@
 
 static struct super_operations dlmfs_ops;
 static const struct file_operations dlmfs_file_operations;
-static struct inode_operations dlmfs_dir_inode_operations;
-static struct inode_operations dlmfs_root_inode_operations;
-static struct inode_operations dlmfs_file_inode_operations;
+static const struct inode_operations dlmfs_dir_inode_operations;
+static const struct inode_operations dlmfs_root_inode_operations;
+static const struct inode_operations dlmfs_file_inode_operations;
 static struct kmem_cache *dlmfs_inode_cache;
 
 struct workqueue_struct *user_dlm_worker;
@@ -547,14 +547,14 @@ static const struct file_operations dlmfs_file_operations = {
 	.write		= dlmfs_file_write,
 };
 
-static struct inode_operations dlmfs_dir_inode_operations = {
+static const struct inode_operations dlmfs_dir_inode_operations = {
 	.create		= dlmfs_create,
 	.lookup		= simple_lookup,
 	.unlink		= dlmfs_unlink,
 };
 
 /* this way we can restrict mkdir to only the toplevel of the fs. */
-static struct inode_operations dlmfs_root_inode_operations = {
+static const struct inode_operations dlmfs_root_inode_operations = {
 	.lookup		= simple_lookup,
 	.mkdir		= dlmfs_mkdir,
 	.rmdir		= simple_rmdir,
@@ -568,7 +568,7 @@ static struct super_operations dlmfs_ops = {
 	.drop_inode	= generic_delete_inode,
 };
 
-static struct inode_operations dlmfs_file_inode_operations = {
+static const struct inode_operations dlmfs_file_inode_operations = {
 	.getattr	= simple_getattr,
 };
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 10953a508f2..f2cd3bf9efb 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1365,13 +1365,13 @@ bail:
 	return ret;
 }
 
-struct inode_operations ocfs2_file_iops = {
+const struct inode_operations ocfs2_file_iops = {
 	.setattr	= ocfs2_setattr,
 	.getattr	= ocfs2_getattr,
 	.permission	= ocfs2_permission,
 };
 
-struct inode_operations ocfs2_special_file_iops = {
+const struct inode_operations ocfs2_special_file_iops = {
 	.setattr	= ocfs2_setattr,
 	.getattr	= ocfs2_getattr,
 	.permission	= ocfs2_permission,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 601a453f18a..cc973f01f6c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -28,8 +28,8 @@
 
 extern const struct file_operations ocfs2_fops;
 extern const struct file_operations ocfs2_dops;
-extern struct inode_operations ocfs2_file_iops;
-extern struct inode_operations ocfs2_special_file_iops;
+extern const struct inode_operations ocfs2_file_iops;
+extern const struct inode_operations ocfs2_special_file_iops;
 struct ocfs2_alloc_context;
 
 enum ocfs2_alloc_restarted {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f3d7803b4b4..f7fa52bb3f6 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2306,7 +2306,7 @@ leave:
 	return status;
 }
 
-struct inode_operations ocfs2_dir_iops = {
+const struct inode_operations ocfs2_dir_iops = {
 	.create		= ocfs2_create,
 	.lookup		= ocfs2_lookup,
 	.link		= ocfs2_link,
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 8425944fccc..0975c7b7212 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -26,7 +26,7 @@
 #ifndef OCFS2_NAMEI_H
 #define OCFS2_NAMEI_H
 
-extern struct inode_operations ocfs2_dir_iops;
+extern const struct inode_operations ocfs2_dir_iops;
 
 struct dentry *ocfs2_get_parent(struct dentry *child);
 
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 03b0191534d..40dc1a51f4a 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -170,12 +170,12 @@ bail:
 	return ERR_PTR(status);
 }
 
-struct inode_operations ocfs2_symlink_inode_operations = {
+const struct inode_operations ocfs2_symlink_inode_operations = {
 	.readlink	= page_readlink,
 	.follow_link	= ocfs2_follow_link,
 	.getattr	= ocfs2_getattr,
 };
-struct inode_operations ocfs2_fast_symlink_inode_operations = {
+const struct inode_operations ocfs2_fast_symlink_inode_operations = {
 	.readlink	= ocfs2_readlink,
 	.follow_link	= ocfs2_follow_link,
 	.getattr	= ocfs2_getattr,
diff --git a/fs/ocfs2/symlink.h b/fs/ocfs2/symlink.h
index 1ea9e4d9e9e..65a6c9c6ad5 100644
--- a/fs/ocfs2/symlink.h
+++ b/fs/ocfs2/symlink.h
@@ -26,8 +26,8 @@
 #ifndef OCFS2_SYMLINK_H
 #define OCFS2_SYMLINK_H
 
-extern struct inode_operations ocfs2_symlink_inode_operations;
-extern struct inode_operations ocfs2_fast_symlink_inode_operations;
+extern const struct inode_operations ocfs2_symlink_inode_operations;
+extern const struct inode_operations ocfs2_fast_symlink_inode_operations;
 
 /*
  * Test whether an inode is a fast symlink.
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 99c0bc37ba0..327807b86fa 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -169,7 +169,7 @@ static const struct file_operations openprom_operations = {
 
 static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
 
-static struct inode_operations openprom_inode_operations = {
+static const struct inode_operations openprom_inode_operations = {
 	.lookup		= openpromfs_lookup,
 };
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index fafdef357e9..7a8ce610fda 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -84,7 +84,7 @@ struct mqueue_inode_info {
 	unsigned long qsize; /* size of queue in memory (sum of all msgs) */
 };
 
-static struct inode_operations mqueue_dir_inode_operations;
+static const struct inode_operations mqueue_dir_inode_operations;
 static const struct file_operations mqueue_file_operations;
 static struct super_operations mqueue_super_ops;
 static void remove_notification(struct mqueue_inode_info *info);
@@ -1160,7 +1160,7 @@ out:
 	return ret;
 }
 
-static struct inode_operations mqueue_dir_inode_operations = {
+static const struct inode_operations mqueue_dir_inode_operations = {
 	.lookup = simple_lookup,
 	.create = mqueue_create,
 	.unlink = mqueue_unlink,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 232aed2b10f..f382b0f775e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1540,7 +1540,7 @@ static const struct file_operations cpuset_file_operations = {
 	.release = cpuset_file_release,
 };
 
-static struct inode_operations cpuset_dir_inode_operations = {
+static const struct inode_operations cpuset_dir_inode_operations = {
 	.lookup = simple_lookup,
 	.mkdir = cpuset_mkdir,
 	.rmdir = cpuset_rmdir,
diff --git a/mm/shmem.c b/mm/shmem.c
index b38e1716927..882053031aa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -178,9 +178,9 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 static struct super_operations shmem_ops;
 static const struct address_space_operations shmem_aops;
 static const struct file_operations shmem_file_operations;
-static struct inode_operations shmem_inode_operations;
-static struct inode_operations shmem_dir_inode_operations;
-static struct inode_operations shmem_special_inode_operations;
+static const struct inode_operations shmem_inode_operations;
+static const struct inode_operations shmem_dir_inode_operations;
+static const struct inode_operations shmem_special_inode_operations;
 static struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
@@ -1410,8 +1410,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
 }
 
 #ifdef CONFIG_TMPFS
-static struct inode_operations shmem_symlink_inode_operations;
-static struct inode_operations shmem_symlink_inline_operations;
+static const struct inode_operations shmem_symlink_inode_operations;
+static const struct inode_operations shmem_symlink_inline_operations;
 
 /*
  * Normally tmpfs makes no use of shmem_prepare_write, but it
@@ -1904,12 +1904,12 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co
 	}
 }
 
-static struct inode_operations shmem_symlink_inline_operations = {
+static const struct inode_operations shmem_symlink_inline_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= shmem_follow_link_inline,
 };
 
-static struct inode_operations shmem_symlink_inode_operations = {
+static const struct inode_operations shmem_symlink_inode_operations = {
 	.truncate	= shmem_truncate,
 	.readlink	= generic_readlink,
 	.follow_link	= shmem_follow_link,
@@ -2335,7 +2335,7 @@ static const struct file_operations shmem_file_operations = {
 #endif
 };
 
-static struct inode_operations shmem_inode_operations = {
+static const struct inode_operations shmem_inode_operations = {
 	.truncate	= shmem_truncate,
 	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
@@ -2349,7 +2349,7 @@ static struct inode_operations shmem_inode_operations = {
 
 };
 
-static struct inode_operations shmem_dir_inode_operations = {
+static const struct inode_operations shmem_dir_inode_operations = {
 #ifdef CONFIG_TMPFS
 	.create		= shmem_create,
 	.lookup		= simple_lookup,
@@ -2371,7 +2371,7 @@ static struct inode_operations shmem_dir_inode_operations = {
 #endif
 };
 
-static struct inode_operations shmem_special_inode_operations = {
+static const struct inode_operations shmem_special_inode_operations = {
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	.setattr	= shmem_notify_change,
 	.setxattr	= generic_setxattr,
-- 
cgit v1.2.3-70-g09d2