From 96192ff1a9d0c6ef365d21667080259d83ea2f5b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Jun 2005 21:15:16 -0700 Subject: [PATCH] devfs: Remove the miscdevice devfs_name field as it's no longer needed Also fixes all drivers that set this field. Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/microcode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 0a865889b2a..40b44cc0d14 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -493,7 +493,6 @@ static struct file_operations microcode_fops = { static struct miscdevice microcode_dev = { .minor = MICROCODE_MINOR, .name = "microcode", - .devfs_name = "cpu/microcode", .fops = µcode_fops, }; -- cgit v1.2.3-70-g09d2 From 685143ac1f7a579a3fac9c7f2ac8f82e95af6864 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 12 Jun 2006 15:18:31 -0700 Subject: [PATCH] 64bit resource: fix up printks for resources in arch and core code This is needed if we wish to change the size of the resource structures. Based on an original patch from Vivek Goyal and Andrew Morton. (tweaked by Andy Isaacson ) Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Andy Isaacson Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/efi.c | 6 +++-- arch/powerpc/kernel/pci_32.c | 37 +++++++++++++++--------------- arch/powerpc/platforms/83xx/pci.c | 5 +++-- arch/powerpc/platforms/85xx/pci.c | 5 +++-- arch/powerpc/platforms/chrp/pci.c | 4 ++-- arch/powerpc/platforms/maple/pci.c | 5 +++-- arch/powerpc/platforms/powermac/pci.c | 5 +++-- arch/ppc/kernel/pci.c | 42 +++++++++++++++++++++-------------- arch/sparc/kernel/ioport.c | 4 +++- kernel/resource.c | 10 +++++---- 10 files changed, 71 insertions(+), 52 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 9202b67c4b2..8beb0f07d99 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -601,8 +601,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, res) < 0) - printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n", - res->name, res->start, res->end); + printk(KERN_ERR PFX "Failed to allocate res %s : " + "0x%llx-0x%llx\n", res->name, + (unsigned long long)res->start, + (unsigned long long)res->end); /* * We don't know which region contains kernel data so we try * it repeatedly and let the resource manager test it. diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index b5431ccf114..d9e2506db5f 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -99,7 +99,7 @@ pcibios_fixup_resources(struct pci_dev *dev) if (!res->flags) continue; if (res->end == 0xffffffff) { - DBG("PCI:%s Resource %d [%08lx-%08lx] is unassigned\n", + DBG("PCI:%s Resource %d [%016llx-%016llx] is unassigned\n", pci_name(dev), i, res->start, res->end); res->end -= res->start; res->start = 0; @@ -117,7 +117,7 @@ pcibios_fixup_resources(struct pci_dev *dev) res->start += offset; res->end += offset; #ifdef DEBUG - printk("Fixup res %d (%lx) of dev %s: %lx -> %lx\n", + printk("Fixup res %d (%lx) of dev %s: %llx -> %llx\n", i, res->flags, pci_name(dev), res->start - offset, res->start); #endif @@ -183,7 +183,7 @@ void pcibios_align_resource(void *data, struct resource *res, unsigned long size if (size > 0x100) { printk(KERN_ERR "PCI: I/O Region %s/%d too large" - " (%ld bytes)\n", pci_name(dev), + " (%lld bytes)\n", pci_name(dev), dev->resource - res, size); } @@ -255,8 +255,8 @@ pcibios_allocate_bus_resources(struct list_head *bus_list) } } - DBG("PCI: bridge rsrc %lx..%lx (%lx), parent %p\n", - res->start, res->end, res->flags, pr); + DBG("PCI: bridge rsrc %llx..%llx (%lx), parent %p\n", + res->start, res->end, res->flags, pr); if (pr) { if (request_resource(pr, res) == 0) continue; @@ -306,7 +306,7 @@ reparent_resources(struct resource *parent, struct resource *res) *pp = NULL; for (p = res->child; p != NULL; p = p->sibling) { p->parent = res; - DBG(KERN_INFO "PCI: reparented %s [%lx..%lx] under %s\n", + DBG(KERN_INFO "PCI: reparented %s [%llx..%llx] under %s\n", p->name, p->start, p->end, res->name); } return 0; @@ -362,13 +362,14 @@ pci_relocate_bridge_resource(struct pci_bus *bus, int i) try = conflict->start - 1; } if (request_resource(pr, res)) { - DBG(KERN_ERR "PCI: huh? couldn't move to %lx..%lx\n", + DBG(KERN_ERR "PCI: huh? couldn't move to %llx..%llx\n", res->start, res->end); return -1; /* "can't happen" */ } update_bridge_base(bus, i); - printk(KERN_INFO "PCI: bridge %d resource %d moved to %lx..%lx\n", - bus->number, i, res->start, res->end); + printk(KERN_INFO "PCI: bridge %d resource %d moved to %llx..%llx\n", + bus->number, i, (unsigned long long)res->start, + (unsigned long long)res->end); return 0; } @@ -479,14 +480,14 @@ static inline void alloc_resource(struct pci_dev *dev, int idx) { struct resource *pr, *r = &dev->resource[idx]; - DBG("PCI:%s: Resource %d: %08lx-%08lx (f=%lx)\n", + DBG("PCI:%s: Resource %d: %016llx-%016llx (f=%lx)\n", pci_name(dev), idx, r->start, r->end, r->flags); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { printk(KERN_ERR "PCI: Cannot allocate resource region %d" " of device %s\n", idx, pci_name(dev)); if (pr) - DBG("PCI: parent is %p: %08lx-%08lx (f=%lx)\n", + DBG("PCI: parent is %p: %016llx-%016llx (f=%lx)\n", pr, pr->start, pr->end, pr->flags); /* We'll assign a new address later */ r->flags |= IORESOURCE_UNSET; @@ -956,7 +957,7 @@ pci_process_bridge_OF_ranges(struct pci_controller *hose, res = &hose->io_resource; res->flags = IORESOURCE_IO; res->start = ranges[2]; - DBG("PCI: IO 0x%lx -> 0x%lx\n", + DBG("PCI: IO 0x%llx -> 0x%llx\n", res->start, res->start + size - 1); break; case 2: /* memory space */ @@ -978,7 +979,7 @@ pci_process_bridge_OF_ranges(struct pci_controller *hose, if(ranges[0] & 0x40000000) res->flags |= IORESOURCE_PREFETCH; res->start = ranges[na+2]; - DBG("PCI: MEM[%d] 0x%lx -> 0x%lx\n", memno, + DBG("PCI: MEM[%d] 0x%llx -> 0x%llx\n", memno, res->start, res->start + size - 1); } break; @@ -1074,7 +1075,7 @@ do_update_p2p_io_resource(struct pci_bus *bus, int enable_vga) DBG("Remapping Bus %d, bridge: %s\n", bus->number, pci_name(bridge)); res.start -= ((unsigned long) hose->io_base_virt - isa_io_base); res.end -= ((unsigned long) hose->io_base_virt - isa_io_base); - DBG(" IO window: %08lx-%08lx\n", res.start, res.end); + DBG(" IO window: %016llx-%016llx\n", res.start, res.end); /* Set up the top and bottom of the PCI I/O segment for this bus. */ pci_read_config_dword(bridge, PCI_IO_BASE, &l); @@ -1223,8 +1224,8 @@ do_fixup_p2p_level(struct pci_bus *bus) continue; if ((r->flags & IORESOURCE_IO) == 0) continue; - DBG("Trying to allocate from %08lx, size %08lx from parent" - " res %d: %08lx -> %08lx\n", + DBG("Trying to allocate from %016llx, size %016llx from parent" + " res %d: %016llx -> %016llx\n", res->start, res->end, i, r->start, r->end); if (allocate_resource(r, res, res->end + 1, res->start, max, @@ -1574,8 +1575,8 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, else prot |= _PAGE_GUARDED; - printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start, - prot); + printk("PCI map for %s:%llx, prot: %lx\n", pci_name(dev), + (unsigned long long)rp->start, prot); return __pgprot(prot); } diff --git a/arch/powerpc/platforms/83xx/pci.c b/arch/powerpc/platforms/83xx/pci.c index 16f7d3b30e1..3baceb00fef 100644 --- a/arch/powerpc/platforms/83xx/pci.c +++ b/arch/powerpc/platforms/83xx/pci.c @@ -91,9 +91,10 @@ int __init add_bridge(struct device_node *dev) mpc83xx_pci2_busno = hose->first_busno; } - printk(KERN_INFO "Found MPC83xx PCI host bridge at 0x%08lx. " + printk(KERN_INFO "Found MPC83xx PCI host bridge at 0x%016llx. " "Firmware bus number: %d->%d\n", - rsrc.start, hose->first_busno, hose->last_busno); + (unsigned long long)rsrc.start, hose->first_busno, + hose->last_busno); DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", hose, hose->cfg_addr, hose->cfg_data); diff --git a/arch/powerpc/platforms/85xx/pci.c b/arch/powerpc/platforms/85xx/pci.c index bad290110ed..48c8849c07c 100644 --- a/arch/powerpc/platforms/85xx/pci.c +++ b/arch/powerpc/platforms/85xx/pci.c @@ -79,9 +79,10 @@ int __init add_bridge(struct device_node *dev) mpc85xx_pci2_busno = hose->first_busno; } - printk(KERN_INFO "Found MPC85xx PCI host bridge at 0x%08lx. " + printk(KERN_INFO "Found MPC85xx PCI host bridge at 0x%016llx. " "Firmware bus number: %d->%d\n", - rsrc.start, hose->first_busno, hose->last_busno); + (unsigned long long)rsrc.start, hose->first_busno, + hose->last_busno); DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", hose, hose->cfg_addr, hose->cfg_data); diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index ac224876ce5..53515daf01b 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -143,7 +143,7 @@ hydra_init(void) if (np == NULL || of_address_to_resource(np, 0, &r)) return 0; Hydra = ioremap(r.start, r.end-r.start); - printk("Hydra Mac I/O at %lx\n", r.start); + printk("Hydra Mac I/O at %llx\n", (unsigned long long)r.start); printk("Hydra Feature_Control was %x", in_le32(&Hydra->Feature_Control)); out_le32(&Hydra->Feature_Control, (HYDRA_FC_SCC_CELL_EN | @@ -267,7 +267,7 @@ chrp_find_bridges(void) bus_range[0], bus_range[1]); printk(" controlled by %s", dev->type); if (!is_longtrail) - printk(" at %lx", r.start); + printk(" at %llx", (unsigned long long)r.start); printk("\n"); hose = pcibios_alloc_controller(); diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index 9a4efc0c3b2..f7170ff86da 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -376,9 +376,10 @@ static void __init maple_fixup_phb_resources(void) unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; hose->io_resource.start += offset; hose->io_resource.end += offset; - printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", + printk(KERN_INFO "PCI Host %d, io start: %llx; io end: %llx\n", hose->global_number, - hose->io_resource.start, hose->io_resource.end); + (unsigned long long)hose->io_resource.start, + (unsigned long long)hose->io_resource.end); } } diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 80035853467..d524a915aa8 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -939,9 +939,10 @@ static int __init add_bridge(struct device_node *dev) disp_name = "Chaos"; primary = 0; } - printk(KERN_INFO "Found %s PCI host bridge at 0x%08lx. " + printk(KERN_INFO "Found %s PCI host bridge at 0x%016llx. " "Firmware bus number: %d->%d\n", - disp_name, rsrc.start, hose->first_busno, hose->last_busno); + disp_name, (unsigned long long)rsrc.start, hose->first_busno, + hose->last_busno); #endif /* CONFIG_PPC32 */ DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c index d20accf9650..8544e100d71 100644 --- a/arch/ppc/kernel/pci.c +++ b/arch/ppc/kernel/pci.c @@ -95,8 +95,10 @@ pcibios_fixup_resources(struct pci_dev *dev) if (!res->flags) continue; if (res->end == 0xffffffff) { - DBG("PCI:%s Resource %d [%08lx-%08lx] is unassigned\n", - pci_name(dev), i, res->start, res->end); + DBG("PCI:%s Resource %d [%016llx-%016llx] is unassigned\n", + pci_name(dev), i, + (unsigned long long)res->start, + (unsigned long long)res->end); res->end -= res->start; res->start = 0; res->flags |= IORESOURCE_UNSET; @@ -179,8 +181,8 @@ void pcibios_align_resource(void *data, struct resource *res, unsigned long size if (size > 0x100) { printk(KERN_ERR "PCI: I/O Region %s/%d too large" - " (%ld bytes)\n", pci_name(dev), - dev->resource - res, size); + " (%lld bytes)\n", pci_name(dev), + dev->resource - res, (unsigned long long)size); } if (start & 0x300) { @@ -251,8 +253,9 @@ pcibios_allocate_bus_resources(struct list_head *bus_list) } } - DBG("PCI: bridge rsrc %lx..%lx (%lx), parent %p\n", - res->start, res->end, res->flags, pr); + DBG("PCI: bridge rsrc %llx..%llx (%lx), parent %p\n", + (unsigned long long)res->start, + (unsigned long long)res->end, res->flags, pr); if (pr) { if (request_resource(pr, res) == 0) continue; @@ -302,8 +305,9 @@ reparent_resources(struct resource *parent, struct resource *res) *pp = NULL; for (p = res->child; p != NULL; p = p->sibling) { p->parent = res; - DBG(KERN_INFO "PCI: reparented %s [%lx..%lx] under %s\n", - p->name, p->start, p->end, res->name); + DBG(KERN_INFO "PCI: reparented %s [%llx..%llx] under %s\n", + p->name, (unsigned long long)p->start, + (unsigned long long)p->end, res->name); } return 0; } @@ -358,13 +362,15 @@ pci_relocate_bridge_resource(struct pci_bus *bus, int i) try = conflict->start - 1; } if (request_resource(pr, res)) { - DBG(KERN_ERR "PCI: huh? couldn't move to %lx..%lx\n", - res->start, res->end); + DBG(KERN_ERR "PCI: huh? couldn't move to %llx..%llx\n", + (unsigned long long)res->start, + (unsigned long long)res->end); return -1; /* "can't happen" */ } update_bridge_base(bus, i); - printk(KERN_INFO "PCI: bridge %d resource %d moved to %lx..%lx\n", - bus->number, i, res->start, res->end); + printk(KERN_INFO "PCI: bridge %d resource %d moved to %llx..%llx\n", + bus->number, i, (unsigned long long)res->start, + (unsigned long long)res->end); return 0; } @@ -475,15 +481,17 @@ static inline void alloc_resource(struct pci_dev *dev, int idx) { struct resource *pr, *r = &dev->resource[idx]; - DBG("PCI:%s: Resource %d: %08lx-%08lx (f=%lx)\n", - pci_name(dev), idx, r->start, r->end, r->flags); + DBG("PCI:%s: Resource %d: %016llx-%016llx (f=%lx)\n", + pci_name(dev), idx, (unsigned long long)r->start, + (unsigned long long)r->end, r->flags); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { printk(KERN_ERR "PCI: Cannot allocate resource region %d" " of device %s\n", idx, pci_name(dev)); if (pr) - DBG("PCI: parent is %p: %08lx-%08lx (f=%lx)\n", - pr, pr->start, pr->end, pr->flags); + DBG("PCI: parent is %p: %016llx-%016llx (f=%lx)\n", + pr, (unsigned long long)pr->start, + (unsigned long long)pr->end, pr->flags); /* We'll assign a new address later */ r->flags |= IORESOURCE_UNSET; r->end -= r->start; @@ -952,7 +960,7 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, else prot |= _PAGE_GUARDED; - printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start, + printk("PCI map for %s:%llx, prot: %llx\n", pci_name(dev), rp->start, prot); return __pgprot(prot); diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index ae4c667c906..99d716b9507 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -819,7 +819,9 @@ _sparc_io_get_info(char *buf, char **start, off_t fpos, int length, int *eof, if (p + 32 >= e) /* Better than nothing */ break; if ((nm = r->name) == 0) nm = "???"; - p += sprintf(p, "%08lx-%08lx: %s\n", r->start, r->end, nm); + p += sprintf(p, "%016llx-%016llx: %s\n", + (unsigned long long)r->start, + (unsigned long long)r->end, nm); } return p-buf; diff --git a/kernel/resource.c b/kernel/resource.c index e3080fcc66a..ea5f7811a40 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -83,10 +83,10 @@ static int r_show(struct seq_file *m, void *v) for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent) if (p->parent == root) break; - seq_printf(m, "%*s%0*lx-%0*lx : %s\n", + seq_printf(m, "%*s%0*llx-%0*llx : %s\n", depth * 2, "", - width, r->start, - width, r->end, + width, (unsigned long long) r->start, + width, (unsigned long long) r->end, r->name ? r->name : ""); return 0; } @@ -511,7 +511,9 @@ void __release_region(struct resource *parent, unsigned long start, unsigned lon write_unlock(&resource_lock); - printk(KERN_WARNING "Trying to free nonexistent resource <%08lx-%08lx>\n", start, end); + printk(KERN_WARNING "Trying to free nonexistent resource " + "<%016llx-%016llx>\n", (unsigned long long)start, + (unsigned long long)end); } EXPORT_SYMBOL(__release_region); -- cgit v1.2.3-70-g09d2 From e31dd6e4520439ceae4753f32dd2da2c345e929a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 12 Jun 2006 17:06:02 -0700 Subject: [PATCH] 64bit resource: change pci core and arch code to use resource_size_t Based on a patch series originally from Vivek Goyal Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/pci.c | 4 ++-- arch/arm/kernel/bios32.c | 6 +++--- arch/cris/arch-v32/drivers/pci/bios.c | 4 ++-- arch/frv/mb93090-mb00/pci-frv.c | 4 ++-- arch/i386/pci/i386.c | 4 ++-- arch/ia64/pci/pci.c | 2 +- arch/m68knommu/kernel/comempci.c | 3 ++- arch/mips/pci/pci.c | 4 ++-- arch/mips/pmc-sierra/yosemite/ht.c | 4 ++-- arch/parisc/kernel/pci.c | 2 +- arch/powerpc/kernel/pci_32.c | 10 +++++----- arch/powerpc/kernel/pci_64.c | 4 ++-- arch/ppc/kernel/pci.c | 12 ++++++------ arch/sh/boards/mpc1211/pci.c | 4 ++-- arch/sh/boards/overdrive/galileo.c | 2 +- arch/sh/drivers/pci/pci.c | 6 +++--- arch/sh64/kernel/pcibios.c | 4 ++-- arch/sparc/kernel/pcic.c | 2 +- arch/sparc64/kernel/pci.c | 2 +- arch/v850/kernel/rte_mb_a_pci.c | 2 +- arch/xtensa/kernel/pci.c | 6 +++--- drivers/pci/bus.c | 10 +++++----- drivers/pci/pci-sysfs.c | 4 ++-- drivers/pci/pci.h | 6 +++--- drivers/pci/proc.c | 4 ++-- drivers/pci/rom.c | 10 +++++----- drivers/pci/setup-res.c | 6 +++--- include/asm-arm/mach/pci.h | 2 +- include/asm-powerpc/pci.h | 2 +- include/asm-ppc/pci.h | 2 +- include/linux/pci.h | 13 +++++++------ 31 files changed, 76 insertions(+), 74 deletions(-) (limited to 'arch/i386') diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 2a8b364c822..4ea6711e55a 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -124,12 +124,12 @@ DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final); void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; struct pci_controller *hose = dev->sysdata; unsigned long alignto; - unsigned long start = res->start; + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { /* Make sure we start at our min on all hoses */ diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index 302fc140154..45da06fc1ba 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -304,7 +304,7 @@ static inline int pdev_bad_for_parity(struct pci_dev *dev) static void __devinit pdev_fixup_device_resources(struct pci_sys_data *root, struct pci_dev *dev) { - unsigned long offset; + resource_size_t offset; int i; for (i = 0; i < PCI_NUM_RESOURCES; i++) { @@ -634,9 +634,9 @@ char * __init pcibios_setup(char *str) * which might be mirrored at 0x0100-0x03ff.. */ void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { - unsigned long start = res->start; + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO && start & 0x300) start = (start + 0x3ff) & ~0x3ff; diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c index 1e9d062103a..a2b9c60c277 100644 --- a/arch/cris/arch-v32/drivers/pci/bios.c +++ b/arch/cris/arch-v32/drivers/pci/bios.c @@ -43,10 +43,10 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; if (start & 0x300) { start = (start + 0x3ff) & ~0x3ff; diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c index 0a26bf6f1cd..4f165c93be4 100644 --- a/arch/frv/mb93090-mb00/pci-frv.c +++ b/arch/frv/mb93090-mb00/pci-frv.c @@ -64,10 +64,10 @@ pcibios_update_resource(struct pci_dev *dev, struct resource *root, */ void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; if (start & 0x300) { start = (start + 0x3ff) & ~0x3ff; diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c index a151f7a99f5..10154a2cac6 100644 --- a/arch/i386/pci/i386.c +++ b/arch/i386/pci/i386.c @@ -48,10 +48,10 @@ */ void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; if (start & 0x300) { start = (start + 0x3ff) & ~0x3ff; diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 77375a55da3..5bef0e3603f 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -568,7 +568,7 @@ pcibios_disable_device (struct pci_dev *dev) void pcibios_align_resource (void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { } diff --git a/arch/m68knommu/kernel/comempci.c b/arch/m68knommu/kernel/comempci.c index 8670938f110..db7a0c1ceba 100644 --- a/arch/m68knommu/kernel/comempci.c +++ b/arch/m68knommu/kernel/comempci.c @@ -357,7 +357,8 @@ void pcibios_fixup_bus(struct pci_bus *b) /*****************************************************************************/ -void pcibios_align_resource(void *data, struct resource *res, unsigned long size, unsigned long align) +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) { } diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 4dfce154d4a..ba66f8c9bd4 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -51,11 +51,11 @@ unsigned long PCIBIOS_MIN_MEM = 0; */ void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; struct pci_controller *hose = dev->sysdata; - unsigned long start = res->start; + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { /* Make sure we start at our min on all hoses */ diff --git a/arch/mips/pmc-sierra/yosemite/ht.c b/arch/mips/pmc-sierra/yosemite/ht.c index 54b65a80abf..fb523ebcafa 100644 --- a/arch/mips/pmc-sierra/yosemite/ht.c +++ b/arch/mips/pmc-sierra/yosemite/ht.c @@ -383,12 +383,12 @@ void pcibios_update_resource(struct pci_dev *dev, struct resource *root, void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; /* We need to avoid collisions with `mirrored' VGA ports and other strange ISA hardware, so we always want the diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c index 79c7db2705f..7d6967ee367 100644 --- a/arch/parisc/kernel/pci.c +++ b/arch/parisc/kernel/pci.c @@ -289,7 +289,7 @@ EXPORT_SYMBOL(pcibios_bus_to_resource); * than res->start. */ void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long alignment) + resource_size_t size, resource_size_t alignment) { unsigned long mask, align; diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index d9e2506db5f..8474355a1a4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -173,18 +173,18 @@ EXPORT_SYMBOL(pcibios_bus_to_resource); * but we want to try to avoid allocating at 0x2900-0x2bff * which might have be mirrored at 0x0100-0x03ff.. */ -void pcibios_align_resource(void *data, struct resource *res, unsigned long size, - unsigned long align) +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; if (size > 0x100) { printk(KERN_ERR "PCI: I/O Region %s/%d too large" " (%lld bytes)\n", pci_name(dev), - dev->resource - res, size); + dev->resource - res, (unsigned long long)size); } if (start & 0x300) { @@ -1756,7 +1756,7 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, - u64 *start, u64 *end) + resource_size_t *start, resource_size_t *end) { struct pci_controller *hose = pci_bus_to_hose(dev->bus->number); unsigned long offset = 0; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 247937dd8b7..286aa52aae3 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -138,11 +138,11 @@ EXPORT_SYMBOL(pcibios_bus_to_resource); * which might have be mirrored at 0x0100-0x03ff.. */ void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; struct pci_controller *hose = pci_bus_to_host(dev->bus); - unsigned long start = res->start; + resource_size_t start = res->start; unsigned long alignto; if (res->flags & IORESOURCE_IO) { diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c index 8544e100d71..242bb052be6 100644 --- a/arch/ppc/kernel/pci.c +++ b/arch/ppc/kernel/pci.c @@ -171,13 +171,13 @@ EXPORT_SYMBOL(pcibios_bus_to_resource); * but we want to try to avoid allocating at 0x2900-0x2bff * which might have be mirrored at 0x0100-0x03ff.. */ -void pcibios_align_resource(void *data, struct resource *res, unsigned long size, - unsigned long align) +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; if (size > 0x100) { printk(KERN_ERR "PCI: I/O Region %s/%d too large" @@ -960,8 +960,8 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, else prot |= _PAGE_GUARDED; - printk("PCI map for %s:%llx, prot: %llx\n", pci_name(dev), rp->start, - prot); + printk("PCI map for %s:%llx, prot: %lx\n", pci_name(dev), + (unsigned long long)rp->start, prot); return __pgprot(prot); } @@ -1130,7 +1130,7 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, - u64 *start, u64 *end) + resource_size_t *start, resource_size_t *end) { struct pci_controller *hose = pci_bus_to_hose(dev->bus->number); unsigned long offset = 0; diff --git a/arch/sh/boards/mpc1211/pci.c b/arch/sh/boards/mpc1211/pci.c index ba3a6543975..9f7ccd33ffb 100644 --- a/arch/sh/boards/mpc1211/pci.c +++ b/arch/sh/boards/mpc1211/pci.c @@ -273,9 +273,9 @@ void __init pcibios_fixup_irqs(void) } void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { - unsigned long start = res->start; + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { if (start >= 0x10000UL) { diff --git a/arch/sh/boards/overdrive/galileo.c b/arch/sh/boards/overdrive/galileo.c index 276fa11ee4c..b055809d2ac 100644 --- a/arch/sh/boards/overdrive/galileo.c +++ b/arch/sh/boards/overdrive/galileo.c @@ -536,7 +536,7 @@ void __init pcibios_fixup_bus(struct pci_bus *bus) } void pcibios_align_resource(void *data, struct resource *res, - unsigned long size) + resource_size_t size) { } diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index c1669905abe..3d546ba329c 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -75,7 +75,7 @@ pcibios_update_resource(struct pci_dev *dev, struct resource *root, } void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) __attribute__ ((weak)); /* @@ -85,10 +85,10 @@ void pcibios_align_resource(void *data, struct resource *res, * modulo 0x400. */ void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; if (start & 0x300) { start = (start + 0x3ff) & ~0x3ff; diff --git a/arch/sh64/kernel/pcibios.c b/arch/sh64/kernel/pcibios.c index 50c61dcb9fa..945920bc24d 100644 --- a/arch/sh64/kernel/pcibios.c +++ b/arch/sh64/kernel/pcibios.c @@ -69,10 +69,10 @@ pcibios_update_resource(struct pci_dev *dev, struct resource *root, * modulo 0x400. */ void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; if (start & 0x300) { start = (start + 0x3ff) & ~0x3ff; diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index bcfdddd0418..5df3ebdc0ab 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -860,7 +860,7 @@ char * __init pcibios_setup(char *str) } void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { } diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 6c9e3e94aba..20ca9ec8fd3 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -357,7 +357,7 @@ void pcibios_update_irq(struct pci_dev *pdev, int irq) } void pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { } diff --git a/arch/v850/kernel/rte_mb_a_pci.c b/arch/v850/kernel/rte_mb_a_pci.c index ffbb6d073bf..3a7c5c9c3ac 100644 --- a/arch/v850/kernel/rte_mb_a_pci.c +++ b/arch/v850/kernel/rte_mb_a_pci.c @@ -329,7 +329,7 @@ void pcibios_fixup_bus(struct pci_bus *b) void pcibios_align_resource (void *data, struct resource *res, - unsigned long size, unsigned long align) + resource_size_t size, resource_size_t align) { } diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index c6f471b9eaa..eda029fc897 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -71,13 +71,13 @@ static int pci_bus_count; * which might have be mirrored at 0x0100-0x03ff.. */ void -pcibios_align_resource(void *data, struct resource *res, unsigned long size, - unsigned long align) +pcibios_align_resource(void *data, struct resource *res, resource_size_t size, + resource_size_t align) { struct pci_dev *dev = data; if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; + resource_size_t start = res->start; if (size > 0x100) { printk(KERN_ERR "PCI: I/O Region %s/%d too large" diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 72309268202..5f7db9d2436 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -34,11 +34,11 @@ */ int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, - unsigned long size, unsigned long align, unsigned long min, - unsigned int type_mask, - void (*alignf)(void *, struct resource *, - unsigned long, unsigned long), - void *alignf_data) + resource_size_t size, resource_size_t align, + resource_size_t min, unsigned int type_mask, + void (*alignf)(void *, struct resource *, resource_size_t, + resource_size_t), + void *alignf_data) { int i, ret = -ENOMEM; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index bc405c035ce..606f9b6f70e 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -87,7 +87,7 @@ resource_show(struct device * dev, struct device_attribute *attr, char * buf) char * str = buf; int i; int max = 7; - u64 start, end; + resource_size_t start, end; if (pci_dev->subordinate) max = DEVICE_COUNT_RESOURCE; @@ -365,7 +365,7 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, struct device, kobj)); struct resource *res = (struct resource *)attr->private; enum pci_mmap_state mmap_type; - u64 start, end; + resource_size_t start, end; int i; for (i = 0; i < PCI_ROM_RESOURCE; i++) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 29bdeca031a..9cc842b666e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -6,10 +6,10 @@ extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern void pci_cleanup_rom(struct pci_dev *dev); extern int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, - unsigned long size, unsigned long align, - unsigned long min, unsigned int type_mask, + resource_size_t size, resource_size_t align, + resource_size_t min, unsigned int type_mask, void (*alignf)(void *, struct resource *, - unsigned long, unsigned long), + resource_size_t, resource_size_t), void *alignf_data); /* Firmware callbacks */ extern int (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state); diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 20dfd77bd8c..99cf3337976 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -350,14 +350,14 @@ static int show_device(struct seq_file *m, void *v) dev->irq); /* Here should be 7 and not PCI_NUM_RESOURCES as we need to preserve compatibility */ for (i=0; i<7; i++) { - u64 start, end; + resource_size_t start, end; pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); seq_printf(m, "\t%16llx", (unsigned long long)(start | (dev->resource[i].flags & PCI_REGION_FLAG_MASK))); } for (i=0; i<7; i++) { - u64 start, end; + resource_size_t start, end; pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); seq_printf(m, "\t%16llx", dev->resource[i].start < dev->resource[i].end ? diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index 598a115cd00..cbb69cf4131 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -80,8 +80,8 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) } else { if (res->flags & IORESOURCE_ROM_COPY) { *size = pci_resource_len(pdev, PCI_ROM_RESOURCE); - return (void __iomem *)pci_resource_start(pdev, - PCI_ROM_RESOURCE); + return (void __iomem *)(unsigned long) + pci_resource_start(pdev, PCI_ROM_RESOURCE); } else { /* assign the ROM an address if it doesn't have one */ if (res->parent == NULL && @@ -170,11 +170,11 @@ void __iomem *pci_map_rom_copy(struct pci_dev *pdev, size_t *size) return rom; res->end = res->start + *size; - memcpy_fromio((void*)res->start, rom, *size); + memcpy_fromio((void*)(unsigned long)res->start, rom, *size); pci_unmap_rom(pdev, rom); res->flags |= IORESOURCE_ROM_COPY; - return (void __iomem *)res->start; + return (void __iomem *)(unsigned long)res->start; } /** @@ -227,7 +227,7 @@ void pci_cleanup_rom(struct pci_dev *pdev) { struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; if (res->flags & IORESOURCE_ROM_COPY) { - kfree((void*)res->start); + kfree((void*)(unsigned long)res->start); res->flags &= ~IORESOURCE_ROM_COPY; res->start = 0; res->end = 0; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index f5ff0d3ba62..ab78e4bbdd8 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -121,7 +121,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; struct resource *res = dev->resource + resno; - unsigned long size, min, align; + resource_size_t size, min, align; int ret; size = res->end - res->start + 1; @@ -209,7 +209,7 @@ pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) for (i = 0; i < PCI_NUM_RESOURCES; i++) { struct resource *r; struct resource_list *list, *tmp; - unsigned long r_align; + resource_size_t r_align; r = &dev->resource[i]; r_align = r->end - r->start; @@ -225,7 +225,7 @@ pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) } r_align = (i < PCI_BRIDGE_RESOURCES) ? r_align + 1 : r->start; for (list = head; ; list = list->next) { - unsigned long align = 0; + resource_size_t align = 0; struct resource_list *ln = list->next; int idx; diff --git a/include/asm-arm/mach/pci.h b/include/asm-arm/mach/pci.h index 25d540ed007..923e0ca6620 100644 --- a/include/asm-arm/mach/pci.h +++ b/include/asm-arm/mach/pci.h @@ -28,7 +28,7 @@ struct hw_pci { struct pci_sys_data { struct list_head node; int busnr; /* primary bus number */ - unsigned long mem_offset; /* bus->cpu memory mapping offset */ + u64 mem_offset; /* bus->cpu memory mapping offset */ unsigned long io_offset; /* bus->cpu IO mapping offset */ struct pci_bus *bus; /* PCI bus */ struct resource *resource[3]; /* Primary PCI bus resources */ diff --git a/include/asm-powerpc/pci.h b/include/asm-powerpc/pci.h index 5d2c9e6c4be..46afd29b904 100644 --- a/include/asm-powerpc/pci.h +++ b/include/asm-powerpc/pci.h @@ -242,7 +242,7 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file, #define HAVE_ARCH_PCI_RESOURCE_TO_USER extern void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, - u64 *start, u64 *end); + resource_size_t *start, resource_size_t *end); #endif /* CONFIG_PPC_MULTIPLATFORM || CONFIG_PPC32 */ #endif /* __KERNEL__ */ diff --git a/include/asm-ppc/pci.h b/include/asm-ppc/pci.h index 61434edbad7..11ffaaa5da1 100644 --- a/include/asm-ppc/pci.h +++ b/include/asm-ppc/pci.h @@ -133,7 +133,7 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file, #define HAVE_ARCH_PCI_RESOURCE_TO_USER extern void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, - u64 *start, u64 *end); + resource_size_t *start, resource_size_t *end); #endif /* __KERNEL__ */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 62a8c22f5f6..983fca251b2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -404,8 +404,8 @@ int pcibios_enable_device(struct pci_dev *, int mask); char *pcibios_setup (char *str); /* Used only when drivers/pci/setup.c is used */ -void pcibios_align_resource(void *, struct resource *, - unsigned long, unsigned long); +void pcibios_align_resource(void *, struct resource *, resource_size_t, + resource_size_t); void pcibios_update_irq(struct pci_dev *, int irq); /* Generic PCI functions used internally */ @@ -532,10 +532,10 @@ void pci_release_region(struct pci_dev *, int); /* drivers/pci/bus.c */ int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, - unsigned long size, unsigned long align, - unsigned long min, unsigned int type_mask, + resource_size_t size, resource_size_t align, + resource_size_t min, unsigned int type_mask, void (*alignf)(void *, struct resource *, - unsigned long, unsigned long), + resource_size_t, resource_size_t), void *alignf_data); void pci_enable_bridges(struct pci_bus *bus); @@ -730,7 +730,8 @@ static inline char *pci_name(struct pci_dev *pdev) */ #ifndef HAVE_ARCH_PCI_RESOURCE_TO_USER static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, - const struct resource *rsrc, u64 *start, u64 *end) + const struct resource *rsrc, resource_size_t *start, + resource_size_t *end) { *start = rsrc->start; *end = rsrc->end; -- cgit v1.2.3-70-g09d2 From 6550e07f41ce8473ed684dac54fbfbd42183ffda Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 12 Jun 2006 17:11:31 -0700 Subject: [PATCH] 64bit Resource: finally enable 64bit resource sizes Introduce the Kconfig entry and actually switch to a 64bit value, if wanted, for resource_size_t. Based on a patch series originally from Vivek Goyal Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/i386/Kconfig | 1 + include/linux/types.h | 7 ++++++- kernel/resource.c | 8 +++----- mm/Kconfig | 6 ++++++ 4 files changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 47c08bcd9b2..7e46ad7a7b6 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -529,6 +529,7 @@ config X86_PAE bool depends on HIGHMEM64G default y + select RESOURCES_64BIT # Common NUMA Features config NUMA diff --git a/include/linux/types.h b/include/linux/types.h index a021e157733..3f235660a3c 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -178,9 +178,14 @@ typedef __u64 __bitwise __be64; #ifdef __KERNEL__ typedef unsigned __bitwise__ gfp_t; -typedef unsigned long resource_size_t; +#ifdef CONFIG_RESOURCES_64BIT +typedef u64 resource_size_t; +#else +typedef u32 resource_size_t; #endif +#endif /* __KERNEL__ */ + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; diff --git a/kernel/resource.c b/kernel/resource.c index 54835c02ab3..cc73029088a 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -23,20 +23,18 @@ struct resource ioport_resource = { .name = "PCI IO", - .start = 0x0000, + .start = 0, .end = IO_SPACE_LIMIT, .flags = IORESOURCE_IO, }; - EXPORT_SYMBOL(ioport_resource); struct resource iomem_resource = { .name = "PCI mem", - .start = 0UL, - .end = ~0UL, + .start = 0, + .end = -1, .flags = IORESOURCE_MEM, }; - EXPORT_SYMBOL(iomem_resource); static DEFINE_RWLOCK(resource_lock); diff --git a/mm/Kconfig b/mm/Kconfig index 66e65ab3942..e3644b0062b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -145,3 +145,9 @@ config MIGRATION while the virtual addresses are not changed. This is useful for example on NUMA systems to put pages nearer to the processors accessing the page. + +config RESOURCES_64BIT + bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL) + default 64BIT + help + This option allows memory and IO resources to be 64 bit. -- cgit v1.2.3-70-g09d2 From 87937472ff8e34ad5c7b798a8a52e4368af216df Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 1 Jun 2006 20:39:02 -0700 Subject: [PATCH] i386: export memory more than 4G through /proc/iomem Currently /proc/iomem exports physical memory also apart from io device memory. But on i386, it truncates any memory more than 4GB. This leads to problems for kexec/kdump. Kexec reads /proc/iomem to determine the system memory layout and prepares a memory map based on that and passes it to the kernel being kexeced. Given the fact that memory more than 4GB has been truncated, new kernel never gets to see and use that memory. Kdump also reads /proc/iomem to determine the physical memory layout of the system and encodes this informaiton in ELF headers. After a crash new kernel parses these ELF headers being used by previous kernel and vmcore is prepared accordingly. As memory more than 4GB has been truncated, kdump never sees that memory and never prepares ELF headers for it. Hence vmcore is truncated and limited to 4GB even if there is more physical memory in the system. This patch exports memory more than 4GB through /proc/iomem on i386. Cc: Maneesh Soni Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 4a65040cc62..6712f0d2eb3 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1314,8 +1314,10 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat probe_roms(); for (i = 0; i < e820.nr_map; i++) { struct resource *res; +#ifndef CONFIG_RESOURCES_64BIT if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) continue; +#endif res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; -- cgit v1.2.3-70-g09d2 From bc02af93dd2bbddce1b55e0a493f833a1b7cf140 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:30 -0700 Subject: [PATCH] pgdat allocation for new node add (specify node id) Change the name of old add_memory() to arch_add_memory. And use node id to get pgdat for the node at NODE_DATA(). Note: Powerpc's old add_memory() is defined as __devinit. However, add_memory() is usually called only after bootup. I suppose it may be redundant. But, I'm not well known about powerpc. So, I keep it. (But, __meminit is better at least.) Signed-off-by: Yasunori Goto Cc: Dave Hansen Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/init.c | 2 +- arch/ia64/mm/init.c | 5 ++-- arch/powerpc/mm/mem.c | 11 +++++-- arch/x86_64/mm/init.c | 66 ++++++++++++++++++++++++------------------ drivers/acpi/Kconfig | 2 +- drivers/acpi/acpi_memhotplug.c | 3 +- drivers/base/memory.c | 4 ++- include/linux/memory_hotplug.h | 13 ++++++++- mm/memory_hotplug.c | 11 +++++++ 9 files changed, 78 insertions(+), 39 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index bf19513f0ce..46859253179 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -654,7 +654,7 @@ void __init mem_init(void) */ #ifdef CONFIG_MEMORY_HOTPLUG #ifndef CONFIG_NEED_MULTIPLE_NODES -int add_memory(u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size) { struct pglist_data *pgdata = &contig_page_data; struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 11f08001f8c..38306e98f04 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -652,7 +652,7 @@ void online_page(struct page *page) num_physpages++; } -int add_memory(u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size) { pg_data_t *pgdat; struct zone *zone; @@ -660,7 +660,7 @@ int add_memory(u64 start, u64 size) unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - pgdat = NODE_DATA(0); + pgdat = NODE_DATA(nid); zone = pgdat->node_zones + ZONE_NORMAL; ret = __add_pages(zone, start_pfn, nr_pages); @@ -671,7 +671,6 @@ int add_memory(u64 start, u64 size) return ret; } -EXPORT_SYMBOL_GPL(add_memory); int remove_memory(u64 start, u64 size) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 69f3b9a20be..089d939a0b3 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -114,15 +114,20 @@ void online_page(struct page *page) num_physpages++; } -int __devinit add_memory(u64 start, u64 size) +#ifdef CONFIG_NUMA +int memory_add_physaddr_to_nid(u64 start) +{ + return hot_add_scn_to_nid(start); +} +#endif + +int __devinit arch_add_memory(int nid, u64 start, u64 size) { struct pglist_data *pgdata; struct zone *zone; - int nid; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - nid = hot_add_scn_to_nid(start); pgdata = NODE_DATA(nid); start = (unsigned long)__va(start); diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 02add1d1dfa..51fbf3eddf1 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -506,8 +506,6 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) /* * Memory hotplug specific functions */ -#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE) - void online_page(struct page *page) { ClearPageReserved(page); @@ -517,31 +515,17 @@ void online_page(struct page *page) num_physpages++; } -#ifndef CONFIG_MEMORY_HOTPLUG +#ifdef CONFIG_MEMORY_HOTPLUG /* - * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, - * just online the pages. + * XXX: memory_add_physaddr_to_nid() is to find node id from physical address + * via probe interface of sysfs. If acpi notifies hot-add event, then it + * can tell node id by searching dsdt. But, probe interface doesn't have + * node id. So, return 0 as node id at this time. */ -int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) +#ifdef CONFIG_NUMA +int memory_add_physaddr_to_nid(u64 start) { - int err = -EIO; - unsigned long pfn; - unsigned long total = 0, mem = 0; - for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { - if (pfn_valid(pfn)) { - online_page(pfn_to_page(pfn)); - err = 0; - mem++; - } - total++; - } - if (!err) { - z->spanned_pages += total; - z->present_pages += mem; - z->zone_pgdat->node_spanned_pages += total; - z->zone_pgdat->node_present_pages += mem; - } - return err; + return 0; } #endif @@ -549,9 +533,9 @@ int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) * Memory is added always to NORMAL zone. This means you will never get * additional DMA/DMA32 memory. */ -int add_memory(u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size) { - struct pglist_data *pgdat = NODE_DATA(0); + struct pglist_data *pgdat = NODE_DATA(nid); struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -568,7 +552,7 @@ error: printk("%s: Problem encountered in __add_pages!\n", __func__); return ret; } -EXPORT_SYMBOL_GPL(add_memory); +EXPORT_SYMBOL_GPL(arch_add_memory); int remove_memory(u64 start, u64 size) { @@ -576,7 +560,33 @@ int remove_memory(u64 start, u64 size) } EXPORT_SYMBOL_GPL(remove_memory); -#endif +#else /* CONFIG_MEMORY_HOTPLUG */ +/* + * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, + * just online the pages. + */ +int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) +{ + int err = -EIO; + unsigned long pfn; + unsigned long total = 0, mem = 0; + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (pfn_valid(pfn)) { + online_page(pfn_to_page(pfn)); + err = 0; + mem++; + } + total++; + } + if (!err) { + z->spanned_pages += total; + z->present_pages += mem; + z->zone_pgdat->node_spanned_pages += total; + z->zone_pgdat->node_present_pages += mem; + } + return err; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 94b8d820c51..610d2cc02cf 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -328,7 +328,7 @@ config ACPI_CONTAINER config ACPI_HOTPLUG_MEMORY tristate "Memory Hotplug" depends on ACPI - depends on MEMORY_HOTPLUG || X86_64 + depends on MEMORY_HOTPLUG default n help This driver adds supports for ACPI Memory Hotplug. This driver diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 5652569b376..0424326eae1 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -215,6 +215,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) { int result, num_enabled = 0; struct acpi_memory_info *info; + int node = 0; ACPI_FUNCTION_TRACE("acpi_memory_enable_device"); @@ -245,7 +246,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) continue; } - result = add_memory(info->start_addr, info->length); + result = add_memory(node, info->start_addr, info->length); if (result) continue; info->enabled = 1; diff --git a/drivers/base/memory.c b/drivers/base/memory.c index dd547af4681..c6b7d9c4b65 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -306,11 +306,13 @@ static ssize_t memory_probe_store(struct class *class, const char *buf, size_t count) { u64 phys_addr; + int nid; int ret; phys_addr = simple_strtoull(buf, NULL, 0); - ret = add_memory(phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); + nid = memory_add_physaddr_to_nid(phys_addr); + ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); if (ret) count = ret; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 91120638617..29c1472efad 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -63,6 +63,16 @@ extern int online_pages(unsigned long, unsigned long); /* reasonably generic interface to expand the physical pages in a zone */ extern int __add_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); + +#ifdef CONFIG_NUMA +extern int memory_add_physaddr_to_nid(u64 start); +#else +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +#endif + #else /* ! CONFIG_MEMORY_HOTPLUG */ /* * Stub functions for when hotplug is off @@ -99,7 +109,8 @@ static inline int __remove_pages(struct zone *zone, unsigned long start_pfn, return -ENOSYS; } -extern int add_memory(u64 start, u64 size); +extern int add_memory(int nid, u64 start, u64 size); +extern int arch_add_memory(int nid, u64 start, u64 size); extern int remove_memory(u64 start, u64 size); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 841a077d5ae..6cdeabe9f6d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -163,3 +163,14 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) vm_total_pages = nr_free_pagecache_pages(); return 0; } + +int add_memory(int nid, u64 start, u64 size) +{ + int ret; + + /* call arch's memory hotadd */ + ret = arch_add_memory(nid, start, size); + + return ret; +} +EXPORT_SYMBOL_GPL(add_memory); -- cgit v1.2.3-70-g09d2 From 0fc44159bfcb5b0afa178f9c3f50db23aebc76ff Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Tue, 27 Jun 2006 02:53:38 -0700 Subject: [PATCH] Register sysfs file for hotplugged new node When new node becomes enable by hot-add, new sysfs file must be created for new node. So, if new node is enabled by add_memory(), register_one_node() is called to create it. In addition, I386's arch_register_node() and a part of register_nodes() of powerpc are consolidated to register_one_node() as a generic_code(). This is tested by Tiger4(IPF) with node hot-plug emulation. Signed-off-by: Keiichiro Tokunaga Signed-off-by: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/topology.c | 9 +++------ arch/ia64/kernel/topology.c | 15 +++------------ arch/powerpc/kernel/sysfs.c | 15 ++------------- drivers/base/node.c | 25 +++++++++++++++++++++++++ include/asm-i386/cpu.h | 2 -- include/asm-i386/node.h | 29 ----------------------------- include/linux/node.h | 4 ++++ mm/memory_hotplug.c | 12 +++++++++++- 8 files changed, 48 insertions(+), 63 deletions(-) delete mode 100644 include/asm-i386/node.h (limited to 'arch/i386') diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 296355292c7..1eecc2e1bd4 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -38,7 +38,7 @@ int arch_register_cpu(int num){ #ifdef CONFIG_NUMA int node = cpu_to_node(num); if (node_online(node)) - parent = &node_devices[node].node; + parent = &node_devices[parent_node(node)]; #endif /* CONFIG_NUMA */ /* @@ -61,7 +61,7 @@ void arch_unregister_cpu(int num) { #ifdef CONFIG_NUMA int node = cpu_to_node(num); if (node_online(node)) - parent = &node_devices[node].node; + parent = &node_devices[parent_node(node)]; #endif /* CONFIG_NUMA */ return unregister_cpu(&cpu_devices[num].cpu, parent); @@ -74,16 +74,13 @@ EXPORT_SYMBOL(arch_unregister_cpu); #ifdef CONFIG_NUMA #include -#include - -struct i386_node node_devices[MAX_NUMNODES]; static int __init topology_init(void) { int i; for_each_online_node(i) - arch_register_node(i); + register_one_node(i); for_each_present_cpu(i) arch_register_cpu(i); diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 879edb51d1e..42cb05bdc68 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -26,9 +26,6 @@ #include #include -#ifdef CONFIG_NUMA -static struct node *sysfs_nodes; -#endif static struct ia64_cpu *sysfs_cpus; int arch_register_cpu(int num) @@ -36,7 +33,7 @@ int arch_register_cpu(int num) struct node *parent = NULL; #ifdef CONFIG_NUMA - parent = &sysfs_nodes[cpu_to_node(num)]; + parent = &node_devices[cpu_to_node(num)]; #endif /* CONFIG_NUMA */ #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) @@ -59,7 +56,7 @@ void arch_unregister_cpu(int num) #ifdef CONFIG_NUMA int node = cpu_to_node(num); - parent = &sysfs_nodes[node]; + parent = &node_devices[node]; #endif /* CONFIG_NUMA */ return unregister_cpu(&sysfs_cpus[num].cpu, parent); @@ -74,17 +71,11 @@ static int __init topology_init(void) int i, err = 0; #ifdef CONFIG_NUMA - sysfs_nodes = kzalloc(sizeof(struct node) * MAX_NUMNODES, GFP_KERNEL); - if (!sysfs_nodes) { - err = -ENOMEM; - goto out; - } - /* * MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes? */ for_each_online_node(i) { - if ((err = register_node(&sysfs_nodes[i], i, 0))) + if ((err = register_one_node(i))) goto out; } #endif diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5bc2585c803..338491d1604 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -304,23 +304,12 @@ static struct notifier_block sysfs_cpu_nb = { /* NUMA stuff */ #ifdef CONFIG_NUMA -static struct node node_devices[MAX_NUMNODES]; - static void register_nodes(void) { int i; - for (i = 0; i < MAX_NUMNODES; i++) { - if (node_online(i)) { - int p_node = parent_node(i); - struct node *parent = NULL; - - if (p_node != i) - parent = &node_devices[p_node]; - - register_node(&node_devices[i], i, parent); - } - } + for (i = 0; i < MAX_NUMNODES; i++) + register_one_node(i); } int sysfs_add_device_to_node(struct sys_device *dev, int nid) diff --git a/drivers/base/node.c b/drivers/base/node.c index c80c3aeed00..cbd0f62b487 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -190,6 +190,31 @@ void unregister_node(struct node *node) sysdev_unregister(&node->sysdev); } +struct node node_devices[MAX_NUMNODES]; + +int register_one_node(int nid) +{ + int error = 0; + + if (node_online(nid)) { + int p_node = parent_node(nid); + struct node *parent = NULL; + + if (p_node != nid) + parent = &node_devices[p_node]; + + error = register_node(&node_devices[nid], nid, parent); + } + + return error; + +} + +void unregister_one_node(int nid) +{ + unregister_node(&node_devices[nid]); +} + static int __init register_node_type(void) { return sysdev_class_register(&node_class); diff --git a/include/asm-i386/cpu.h b/include/asm-i386/cpu.h index e7252c216ca..b1bc7b1b64b 100644 --- a/include/asm-i386/cpu.h +++ b/include/asm-i386/cpu.h @@ -7,8 +7,6 @@ #include #include -#include - struct i386_cpu { struct cpu cpu; }; diff --git a/include/asm-i386/node.h b/include/asm-i386/node.h deleted file mode 100644 index e13c6ffa72a..00000000000 --- a/include/asm-i386/node.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _ASM_I386_NODE_H_ -#define _ASM_I386_NODE_H_ - -#include -#include -#include -#include -#include - -struct i386_node { - struct node node; -}; -extern struct i386_node node_devices[MAX_NUMNODES]; - -static inline int arch_register_node(int num){ - int p_node; - struct node *parent = NULL; - - if (!node_online(num)) - return 0; - p_node = parent_node(num); - - if (p_node != num) - parent = &node_devices[p_node].node; - - return register_node(&node_devices[num].node, num, parent); -} - -#endif /* _ASM_I386_NODE_H_ */ diff --git a/include/linux/node.h b/include/linux/node.h index 254dc3de650..1e5347527fa 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -26,8 +26,12 @@ struct node { struct sys_device sysdev; }; +extern struct node node_devices[]; + extern int register_node(struct node *, int, struct node *); extern void unregister_node(struct node *node); +extern int register_one_node(int nid); +extern void unregister_one_node(int nid); #define to_node(sys_device) container_of(sys_device, struct node, sysdev) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f13783e81eb..ea4038838b0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -256,9 +256,19 @@ int add_memory(int nid, u64 start, u64 size) if (ret < 0) goto error; - /* we online node here. we have no error path from here. */ + /* we online node here. we can't roll back from here. */ node_set_online(nid); + if (new_pgdat) { + ret = register_one_node(nid); + /* + * If sysfs file of new node can't create, cpu on the node + * can't be hot-added. There is no rollback way now. + * So, check by BUG_ON() to catch it reluctantly.. + */ + BUG_ON(ret); + } + /* register this memory as resource */ register_memory_resource(start, size); -- cgit v1.2.3-70-g09d2 From 76b67ed9dce69a6a329cdd66f94af1787f417b62 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 27 Jun 2006 02:53:41 -0700 Subject: [PATCH] node hotplug: register cpu: remove node struct With Goto-san's patch, we can add new pgdat/node at runtime. I'm now considering node-hot-add with cpu + memory on ACPI. I found acpi container, which describes node, could evaluate cpu before memory. This means cpu-hot-add occurs before memory hot add. In most part, cpu-hot-add doesn't depend on node hot add. But register_cpu(), which creates symbolic link from node to cpu, requires that node should be onlined before register_cpu(). When a node is onlined, its pgdat should be there. This patch-set holds off creating symbolic link from node to cpu until node is onlined. This removes node arguments from register_cpu(). Now, register_cpu() requires 'struct node' as its argument. But the array of struct node is now unified in driver/base/node.c now (By Goto's node hotplug patch). We can get struct node in generic way. So, this argument is not necessary now. This patch also guarantees add cpu under node only when node is onlined. It is necessary for node-hot-add vs. cpu-hot-add patch following this. Moreover, register_cpu calculates cpu->node_id by cpu_to_node() without regard to its 'struct node *root' argument. This patch removes it. Also modify callers of register_cpu()/unregister_cpu, whose args are changed by register-cpu-remove-node-struct patch. [Brice.Goglin@ens-lyon.org: fix it] Signed-off-by: KAMEZAWA Hiroyuki Cc: Yasunori Goto Cc: Ashok Raj Cc: Dave Hansen Signed-off-by: Brice Goglin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/setup.c | 2 +- arch/arm/kernel/setup.c | 2 +- arch/i386/kernel/topology.c | 23 ++++------------------- arch/ia64/kernel/topology.c | 17 ++--------------- arch/m32r/kernel/setup.c | 2 +- arch/mips/kernel/smp.c | 2 +- arch/parisc/kernel/topology.c | 3 +-- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/sysfs.c | 12 +----------- arch/ppc/kernel/setup.c | 2 +- arch/s390/kernel/smp.c | 2 +- arch/sh/kernel/setup.c | 2 +- arch/sh64/kernel/setup.c | 2 +- arch/sparc64/kernel/setup.c | 2 +- drivers/base/cpu.c | 18 ++++++++---------- drivers/base/node.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/cpu.h | 4 ++-- include/linux/node.h | 13 +++++++++++++ 18 files changed, 77 insertions(+), 69 deletions(-) (limited to 'arch/i386') diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 558b8336855..254c507a608 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -481,7 +481,7 @@ register_cpus(void) struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - register_cpu(p, i, NULL); + register_cpu(p, i); } return 0; } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 9fc9af88c60..093ccba0503 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -808,7 +808,7 @@ static int __init topology_init(void) int cpu; for_each_possible_cpu(cpu) - register_cpu(&per_cpu(cpu_data, cpu).cpu, cpu, NULL); + register_cpu(&per_cpu(cpu_data, cpu).cpu, cpu); return 0; } diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 1eecc2e1bd4..e2e281d4bcc 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -32,15 +32,8 @@ static struct i386_cpu cpu_devices[NR_CPUS]; -int arch_register_cpu(int num){ - struct node *parent = NULL; - -#ifdef CONFIG_NUMA - int node = cpu_to_node(num); - if (node_online(node)) - parent = &node_devices[parent_node(node)]; -#endif /* CONFIG_NUMA */ - +int arch_register_cpu(int num) +{ /* * CPU0 cannot be offlined due to several * restrictions and assumptions in kernel. This basically @@ -50,21 +43,13 @@ int arch_register_cpu(int num){ if (!num) cpu_devices[num].cpu.no_control = 1; - return register_cpu(&cpu_devices[num].cpu, num, parent); + return register_cpu(&cpu_devices[num].cpu, num); } #ifdef CONFIG_HOTPLUG_CPU void arch_unregister_cpu(int num) { - struct node *parent = NULL; - -#ifdef CONFIG_NUMA - int node = cpu_to_node(num); - if (node_online(node)) - parent = &node_devices[parent_node(node)]; -#endif /* CONFIG_NUMA */ - - return unregister_cpu(&cpu_devices[num].cpu, parent); + return unregister_cpu(&cpu_devices[num].cpu); } EXPORT_SYMBOL(arch_register_cpu); EXPORT_SYMBOL(arch_unregister_cpu); diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 42cb05bdc68..5737c9a061e 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -30,12 +30,6 @@ static struct ia64_cpu *sysfs_cpus; int arch_register_cpu(int num) { - struct node *parent = NULL; - -#ifdef CONFIG_NUMA - parent = &node_devices[cpu_to_node(num)]; -#endif /* CONFIG_NUMA */ - #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* * If CPEI cannot be re-targetted, and this is @@ -45,21 +39,14 @@ int arch_register_cpu(int num) sysfs_cpus[num].cpu.no_control = 1; #endif - return register_cpu(&sysfs_cpus[num].cpu, num, parent); + return register_cpu(&sysfs_cpus[num].cpu, num); } #ifdef CONFIG_HOTPLUG_CPU void arch_unregister_cpu(int num) { - struct node *parent = NULL; - -#ifdef CONFIG_NUMA - int node = cpu_to_node(num); - parent = &node_devices[node]; -#endif /* CONFIG_NUMA */ - - return unregister_cpu(&sysfs_cpus[num].cpu, parent); + return unregister_cpu(&sysfs_cpus[num].cpu); } EXPORT_SYMBOL(arch_register_cpu); EXPORT_SYMBOL(arch_unregister_cpu); diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 3cd3c2988a4..1ff483c8a4c 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -275,7 +275,7 @@ static int __init topology_init(void) int i; for_each_present_cpu(i) - register_cpu(&cpu_devices[i], i, NULL); + register_cpu(&cpu_devices[i], i); return 0; } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 298f82fe844..9096a5ea422 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -446,7 +446,7 @@ static int __init topology_init(void) int ret; for_each_present_cpu(cpu) { - ret = register_cpu(&per_cpu(cpu_devices, cpu), cpu, NULL); + ret = register_cpu(&per_cpu(cpu_devices, cpu), cpu); if (ret) printk(KERN_WARNING "topology_init: register_cpu %d " "failed (%d)\n", cpu, ret); diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c index 3ba040050e4..068b20d822e 100644 --- a/arch/parisc/kernel/topology.c +++ b/arch/parisc/kernel/topology.c @@ -26,11 +26,10 @@ static struct cpu cpu_devices[NR_CPUS] __read_mostly; static int __init topology_init(void) { - struct node *parent = NULL; int num; for_each_present_cpu(num) { - register_cpu(&cpu_devices[num], num, parent); + register_cpu(&cpu_devices[num], num); } return 0; } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e5a44812441..0932a62a1c9 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -215,7 +215,7 @@ int __init ppc_init(void) /* register CPU devices */ for_each_possible_cpu(i) - register_cpu(&cpu_devices[i], i, NULL); + register_cpu(&cpu_devices[i], i); /* call platform init */ if (ppc_md.init != NULL) { diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 338491d1604..412ad00e222 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -348,23 +348,13 @@ static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL); static int __init topology_init(void) { int cpu; - struct node *parent = NULL; register_nodes(); - register_cpu_notifier(&sysfs_cpu_nb); for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); -#ifdef CONFIG_NUMA - /* The node to which a cpu belongs can't be known - * until the cpu is made present. - */ - parent = NULL; - if (cpu_present(cpu)) - parent = &node_devices[cpu_to_node(cpu)]; -#endif /* * For now, we just see if the system supports making * the RTAS calls for CPU hotplug. But, there may be a @@ -376,7 +366,7 @@ static int __init topology_init(void) c->no_control = 1; if (cpu_online(cpu) || (c->no_control == 0)) { - register_cpu(c, cpu, parent); + register_cpu(c, cpu); sysdev_create_file(&c->sysdev, &attr_physical_id); } diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c index 1f79e84ab46..4b4607d89bf 100644 --- a/arch/ppc/kernel/setup.c +++ b/arch/ppc/kernel/setup.c @@ -475,7 +475,7 @@ int __init ppc_init(void) /* register CPU devices */ for_each_possible_cpu(i) - register_cpu(&cpu_devices[i], i, NULL); + register_cpu(&cpu_devices[i], i); /* call platform init */ if (ppc_md.init != NULL) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 343120c9223..8e03219eea7 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -869,7 +869,7 @@ static int __init topology_init(void) int ret; for_each_possible_cpu(cpu) { - ret = register_cpu(&per_cpu(cpu_devices, cpu), cpu, NULL); + ret = register_cpu(&per_cpu(cpu_devices, cpu), cpu); if (ret) printk(KERN_WARNING "topology_init: register_cpu %d " "failed (%d)\n", cpu, ret); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index bb229ef030f..9af22116c9a 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -402,7 +402,7 @@ static int __init topology_init(void) int cpu_id; for_each_possible_cpu(cpu_id) - register_cpu(&cpu[cpu_id], cpu_id, NULL); + register_cpu(&cpu[cpu_id], cpu_id); return 0; } diff --git a/arch/sh64/kernel/setup.c b/arch/sh64/kernel/setup.c index d2711c9c9d1..da98d8dbcf9 100644 --- a/arch/sh64/kernel/setup.c +++ b/arch/sh64/kernel/setup.c @@ -309,7 +309,7 @@ static struct cpu cpu[1]; static int __init topology_init(void) { - return register_cpu(cpu, 0, NULL); + return register_cpu(cpu, 0); } subsys_initcall(topology_init); diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index a6a7d816834..116d9632002 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -537,7 +537,7 @@ static int __init topology_init(void) for_each_possible_cpu(i) { struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL); if (p) { - register_cpu(p, i, NULL); + register_cpu(p, i); err = 0; } } diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index dd712b24ec9..3972d8ac978 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "base.h" @@ -57,13 +58,12 @@ static void __devinit register_cpu_control(struct cpu *cpu) { sysdev_create_file(&cpu->sysdev, &attr_online); } -void unregister_cpu(struct cpu *cpu, struct node *root) +void unregister_cpu(struct cpu *cpu) { int logical_cpu = cpu->sysdev.id; - if (root) - sysfs_remove_link(&root->sysdev.kobj, - kobject_name(&cpu->sysdev.kobj)); + unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu)); + sysdev_remove_file(&cpu->sysdev, &attr_online); sysdev_unregister(&cpu->sysdev); @@ -109,23 +109,21 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); * * Initialize and register the CPU device. */ -int __devinit register_cpu(struct cpu *cpu, int num, struct node *root) +int __devinit register_cpu(struct cpu *cpu, int num) { int error; - cpu->node_id = cpu_to_node(num); cpu->sysdev.id = num; cpu->sysdev.cls = &cpu_sysdev_class; error = sysdev_register(&cpu->sysdev); - if (!error && root) - error = sysfs_create_link(&root->sysdev.kobj, - &cpu->sysdev.kobj, - kobject_name(&cpu->sysdev.kobj)); + if (!error && !cpu->no_control) register_cpu_control(cpu); if (!error) cpu_sys_devices[num] = &cpu->sysdev; + if (!error) + register_cpu_under_node(num, cpu_to_node(num)); #ifdef CONFIG_KEXEC if (!error) diff --git a/drivers/base/node.c b/drivers/base/node.c index cbd0f62b487..eae2bdc183b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -11,6 +11,7 @@ #include #include #include +#include static struct sysdev_class node_class = { set_kset_name("node"), @@ -192,9 +193,38 @@ void unregister_node(struct node *node) struct node node_devices[MAX_NUMNODES]; +/* + * register cpu under node + */ +int register_cpu_under_node(unsigned int cpu, unsigned int nid) +{ + if (node_online(nid)) { + struct sys_device *obj = get_cpu_sysdev(cpu); + if (!obj) + return 0; + return sysfs_create_link(&node_devices[nid].sysdev.kobj, + &obj->kobj, + kobject_name(&obj->kobj)); + } + + return 0; +} + +int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) +{ + if (node_online(nid)) { + struct sys_device *obj = get_cpu_sysdev(cpu); + if (obj) + sysfs_remove_link(&node_devices[nid].sysdev.kobj, + kobject_name(&obj->kobj)); + } + return 0; +} + int register_one_node(int nid) { int error = 0; + int cpu; if (node_online(nid)) { int p_node = parent_node(nid); @@ -204,6 +234,12 @@ int register_one_node(int nid) parent = &node_devices[p_node]; error = register_node(&node_devices[nid], nid, parent); + + /* link cpu under this node */ + for_each_present_cpu(cpu) { + if (cpu_to_node(cpu) == nid) + register_cpu_under_node(cpu, nid); + } } return error; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 08d50c53aab..b23bf1c8add 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -31,10 +31,10 @@ struct cpu { struct sys_device sysdev; }; -extern int register_cpu(struct cpu *, int, struct node *); +extern int register_cpu(struct cpu *cpu, int num); extern struct sys_device *get_cpu_sysdev(unsigned cpu); #ifdef CONFIG_HOTPLUG_CPU -extern void unregister_cpu(struct cpu *, struct node *); +extern void unregister_cpu(struct cpu *cpu); #endif struct notifier_block; diff --git a/include/linux/node.h b/include/linux/node.h index 1e5347527fa..81dcec84cd8 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -32,6 +32,19 @@ extern int register_node(struct node *, int, struct node *); extern void unregister_node(struct node *node); extern int register_one_node(int nid); extern void unregister_one_node(int nid); +#ifdef CONFIG_NUMA +extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); +extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); +#else +static inline int register_cpu_under_node(unsigned int cpu, unsigned int nid) +{ + return 0; +} +static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) +{ + return 0; +} +#endif #define to_node(sys_device) container_of(sys_device, struct node, sysdev) -- cgit v1.2.3-70-g09d2 From bd9e0b74f52dbac6241643fadca2393808b14c7a Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 27 Jun 2006 02:53:43 -0700 Subject: [PATCH] x86: cpu_init(): avoid GFP_KERNEL allocation while atomic The patch fixes two issues: 1. cpu_init is called with interrupt disabled. Allocating gdt table there isn't good at runtime. 2. gdt table page cause memory leak in CPU hotplug case. Signed-off-by: Shaohua Li Cc: Ashok Raj Cc: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 8 +++++++- arch/i386/kernel/smpboot.c | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 44f2c5f2dda..640364d9b66 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -613,6 +613,12 @@ void __cpuinit cpu_init(void) set_in_cr4(X86_CR4_TSD); } + /* The CPU hotplug case */ + if (cpu_gdt_descr->address) { + gdt = (struct desc_struct *)cpu_gdt_descr->address; + memset(gdt, 0, PAGE_SIZE); + goto old_gdt; + } /* * This is a horrible hack to allocate the GDT. The problem * is that cpu_init() is called really early for the boot CPU @@ -631,7 +637,7 @@ void __cpuinit cpu_init(void) local_irq_enable(); } } - +old_gdt: /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bce5470ecb4..9466a3c9ff0 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1056,6 +1056,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu) struct warm_boot_cpu_info info; struct work_struct task; int apicid, ret; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { @@ -1063,6 +1064,18 @@ static int __cpuinit __smp_prepare_cpu(int cpu) goto exit; } + /* + * the CPU isn't initialized at boot time, allocate gdt table here. + * cpu_init will initialize it + */ + if (!cpu_gdt_descr->address) { + cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL); + if (!cpu_gdt_descr->address) + printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); + ret = -ENOMEM; + goto exit; + } + info.complete = &done; info.apicid = apicid; info.cpu = cpu; -- cgit v1.2.3-70-g09d2 From 19eadf98c8167eac843580683317b99572e2abf0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 27 Jun 2006 02:53:44 -0700 Subject: [PATCH] x86: increase interrupt vector range Remove the limit of 256 interrupt vectors by changing the value stored in orig_{e,r}ax to be the complemented interrupt vector. The orig_{e,r}ax needs to be < 0 to allow the signal code to distinguish between return from interrupt and return from syscall. With this change applied, NR_IRQS can be > 256. Xen extends the IRQ numbering space to include room for dynamically allocated virtual interrupts (in the range 256-511), which requires a more permissive interface to do_IRQ. Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Chris Wright Signed-off-by: Rusty Russell Cc: "Protasevich, Natalie" Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/entry.S | 4 ++-- arch/i386/kernel/irq.c | 4 ++-- arch/x86_64/kernel/entry.S | 2 +- arch/x86_64/kernel/irq.c | 4 ++-- arch/x86_64/kernel/smp.c | 4 ++-- include/asm-x86_64/hw_irq.h | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index e6e4506e749..8713e0248a0 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -515,7 +515,7 @@ ENTRY(irq_entries_start) .if vector CFI_ADJUST_CFA_OFFSET -4 .endif -1: pushl $vector-256 +1: pushl $~(vector) CFI_ADJUST_CFA_OFFSET 4 jmp common_interrupt .data @@ -535,7 +535,7 @@ common_interrupt: #define BUILD_INTERRUPT(name, nr) \ ENTRY(name) \ RING0_INT_FRAME; \ - pushl $nr-256; \ + pushl $~(nr); \ CFI_ADJUST_CFA_OFFSET 4; \ SAVE_ALL; \ movl %esp,%eax; \ diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 061533e0cb5..586b15f6741 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -53,8 +53,8 @@ static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; */ fastcall unsigned int do_IRQ(struct pt_regs *regs) { - /* high bits used in ret_from_ code */ - int irq = regs->orig_eax & 0xff; + /* high bit used in ret_from_ code */ + int irq = ~regs->orig_eax; #ifdef CONFIG_4KSTACKS union irq_ctx *curctx, *irqctx; u32 *isp; diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7290e72b9a3..22cac4487b5 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -588,7 +588,7 @@ END(common_interrupt) */ .macro apicinterrupt num,func INTR_FRAME - pushq $\num-256 + pushq $~(\num) CFI_ADJUST_CFA_OFFSET 8 interrupt \func jmp ret_from_intr diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 59518d4d435..3be0a7e4bf0 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -115,8 +115,8 @@ skip: */ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) { - /* high bits used in ret_from_ code */ - unsigned irq = regs->orig_rax & 0xff; + /* high bit used in ret_from_ code */ + unsigned irq = ~regs->orig_rax; exit_idle(); irq_enter(); diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index acee4bc3f6f..5a1c0a3bf87 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -135,10 +135,10 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) cpu = smp_processor_id(); /* - * orig_rax contains the interrupt vector - 256. + * orig_rax contains the negated interrupt vector. * Use that to determine where the sender put the data. */ - sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START; + sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START; f = &per_cpu(flush_state, sender); if (!cpu_isset(cpu, f->flush_cpumask)) diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h index 1b2ac55d320..93187746278 100644 --- a/include/asm-x86_64/hw_irq.h +++ b/include/asm-x86_64/hw_irq.h @@ -124,7 +124,7 @@ asmlinkage void IRQ_NAME(nr); \ __asm__( \ "\n.p2align\n" \ "IRQ" #nr "_interrupt:\n\t" \ - "push $" #nr "-256 ; " \ + "push $~(" #nr ") ; " \ "jmp common_interrupt"); #if defined(CONFIG_X86_IO_APIC) -- cgit v1.2.3-70-g09d2 From 7f35bf929ffe2b3967c8f398880fcb78fcd2ab5c Mon Sep 17 00:00:00 2001 From: Andreas Mohr Date: Tue, 27 Jun 2006 02:53:45 -0700 Subject: [PATCH] x86: constify some parts of arch/i386/kernel/cpu/ Signed-off-by: Andreas Mohr Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 6 +++--- arch/i386/kernel/cpu/proc.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 6c37b4fd8ce..f24a0145024 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -159,13 +159,13 @@ union l2_cache { unsigned val; }; -static unsigned short assocs[] = { +static const unsigned short assocs[] = { [1] = 1, [2] = 2, [4] = 4, [6] = 8, [8] = 16, [0xf] = 0xffff // ?? }; -static unsigned char levels[] = { 1, 1, 2 }; -static unsigned char types[] = { 1, 2, 3 }; +static const unsigned char levels[] = { 1, 1, 2 }; +static const unsigned char types[] = { 1, 2, 3 }; static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, union _cpuid4_leaf_ebx *ebx, diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index a19fcb262db..4b03f9f5832 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -18,7 +18,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) * applications want to get the raw CPUID data, they should access * /dev/cpu//cpuid instead. */ - static char *x86_cap_flags[] = { + static const char * const x86_cap_flags[] = { /* Intel-defined */ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", @@ -62,7 +62,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; - static char *x86_power_flags[] = { + static const char * const x86_power_flags[] = { "ts", /* temperature sensor */ "fid", /* frequency id control */ "vid", /* voltage id control */ -- cgit v1.2.3-70-g09d2 From 4b89aff930d632be10d557d08d1b60dee7163dbe Mon Sep 17 00:00:00 2001 From: Rohit Seth Date: Tue, 27 Jun 2006 02:53:46 -0700 Subject: [PATCH] i386: move phys_proc_id and cpu_core_id to cpuinfo_x86 Move the phys_core_id and cpu_core_id to cpuinfo_x86 structure. Similar patch for x86_64 is already accepted by Andi earlier this week. [akpm@osdl.org: fix warning] Signed-off-by: Rohit Seth Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/amd.c | 6 +++--- arch/i386/kernel/cpu/common.c | 15 +++++++-------- arch/i386/kernel/cpu/proc.c | 4 ++-- arch/i386/kernel/smpboot.c | 16 +++++----------- include/asm-i386/processor.h | 8 +++++--- include/asm-i386/topology.h | 6 ++---- 6 files changed, 24 insertions(+), 31 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index fd0457c9c82..e6a2d6b80cd 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -235,10 +235,10 @@ static void __init init_amd(struct cpuinfo_x86 *c) while ((1 << bits) < c->x86_max_cores) bits++; } - cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<>= bits; + c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_max_cores, cpu_core_id[cpu]); + cpu, c->x86_max_cores, c->cpu_core_id); } #endif diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 640364d9b66..2fa401f19f3 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -319,7 +319,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) early_intel_workaround(c); #ifdef CONFIG_X86_HT - phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; + c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; #endif } @@ -477,11 +477,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, core_bits; - int cpu = smp_processor_id(); cpuid(1, &eax, &ebx, &ecx, &edx); - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; @@ -492,16 +490,17 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } else if (smp_num_siblings > 1 ) { if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); + printk(KERN_WARNING "CPU: Unsupported number of the " + "siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } index_msb = get_count_order(smp_num_siblings); - phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - phys_proc_id[cpu]); + c->phys_proc_id); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -509,12 +508,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & ((1 << core_bits) - 1); if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", - cpu_core_id[cpu]); + c->cpu_core_id); } } #endif diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 4b03f9f5832..f54a15268ed 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -109,9 +109,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_X86_HT if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); - seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 9466a3c9ff0..ab5275beddf 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -67,12 +67,6 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); #endif -/* Package ID of each logical CPU */ -int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; - -/* Core ID of each logical CPU */ -int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; - /* Last level cache ID of each logical CPU */ int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; @@ -473,8 +467,8 @@ set_cpu_sibling_map(int cpu) if (smp_num_siblings > 1) { for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (phys_proc_id[cpu] == phys_proc_id[i] && - cpu_core_id[cpu] == cpu_core_id[i]) { + if (c[cpu].phys_proc_id == c[i].phys_proc_id && + c[cpu].cpu_core_id == c[i].cpu_core_id) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); @@ -501,7 +495,7 @@ set_cpu_sibling_map(int cpu) cpu_set(i, c[cpu].llc_shared_map); cpu_set(cpu, c[i].llc_shared_map); } - if (phys_proc_id[cpu] == phys_proc_id[i]) { + if (c[cpu].phys_proc_id == c[i].phys_proc_id) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); /* @@ -1353,8 +1347,8 @@ remove_siblinginfo(int cpu) cpu_clear(cpu, cpu_sibling_map[sibling]); cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); - phys_proc_id[cpu] = BAD_APICID; - cpu_core_id[cpu] = BAD_APICID; + c[cpu].phys_proc_id = 0; + c[cpu].cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); } diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 55ea992da32..b32346d62e1 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -71,8 +71,12 @@ struct cpuinfo_x86 { cpumask_t llc_shared_map; /* cpus sharing the last level cache */ #endif unsigned char x86_max_cores; /* cpuid returned max cores value */ - unsigned char booted_cores; /* number of cores as seen by OS */ unsigned char apicid; +#ifdef CONFIG_SMP + unsigned char booted_cores; /* number of cores as seen by OS */ + __u8 phys_proc_id; /* Physical processor id. */ + __u8 cpu_core_id; /* Core id */ +#endif } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 @@ -104,8 +108,6 @@ extern struct cpuinfo_x86 cpu_data[]; #define current_cpu_data boot_cpu_data #endif -extern int phys_proc_id[NR_CPUS]; -extern int cpu_core_id[NR_CPUS]; extern int cpu_llc_id[NR_CPUS]; extern char ignore_fpu_irq; diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index b94e5eeef91..aa4185ee81f 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -28,10 +28,8 @@ #define _ASM_I386_TOPOLOGY_H #ifdef CONFIG_X86_HT -#define topology_physical_package_id(cpu) \ - (phys_proc_id[cpu] == BAD_APICID ? -1 : phys_proc_id[cpu]) -#define topology_core_id(cpu) \ - (cpu_core_id[cpu] == BAD_APICID ? 0 : cpu_core_id[cpu]) +#define topology_physical_package_id(cpu) (cpu_data[cpu].phys_proc_id) +#define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id) #define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_thread_siblings(cpu) (cpu_sibling_map[cpu]) #endif -- cgit v1.2.3-70-g09d2 From 4031ff388138b58e5cd472dccce38828bcb8c706 Mon Sep 17 00:00:00 2001 From: Aleksey Gorelov Date: Tue, 27 Jun 2006 02:53:48 -0700 Subject: [PATCH] fix broken vm86 interrupt/signal handling Commit c3ff8ec31c1249d268cd11390649768a12bec1b9 ("[PATCH] i386: Don't miss pending signals returning to user mode after signal processing") meant that vm86 interrupt/signal handling got broken for the case when vm86 is called from kernel space. In this scenario, if signal is pending because of vm86 interrupt, do_notify_resume/do_signal exits immediately due to user_mode() check, without processing any signals. Thus, resume_userspace handler is spinning in a tight loop with signal pending and TIF_SIGPENDING is set. Previously everything worked Ok. No in-tree usage of vm86() from kernel space exists, but I've heard about a number of projects out there which use vm86 calls from kernel, one of them being this, for instance: http://dev.gentoo.org/~spock/projects/vesafb-tng/ The following patch fixes the issue. Signed-off-by: Aleksey Gorelov Cc: Atsushi Nemoto Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/entry.S | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 8713e0248a0..e8d2630fd19 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -83,6 +83,12 @@ VM_MASK = 0x00020000 #define resume_kernel restore_nocheck #endif +#ifdef CONFIG_VM86 +#define resume_userspace_sig check_userspace +#else +#define resume_userspace_sig resume_userspace +#endif + #define SAVE_ALL \ cld; \ pushl %es; \ @@ -211,6 +217,7 @@ ret_from_exception: preempt_stop ret_from_intr: GET_THREAD_INFO(%ebp) +check_userspace: movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al testl $(VM_MASK | 3), %eax @@ -415,7 +422,7 @@ work_notifysig: # deal with pending signals and # vm86-space xorl %edx, %edx call do_notify_resume - jmp resume_userspace + jmp resume_userspace_sig ALIGN work_notifysig_v86: @@ -428,7 +435,7 @@ work_notifysig_v86: movl %eax, %esp xorl %edx, %edx call do_notify_resume - jmp resume_userspace + jmp resume_userspace_sig #endif # perform syscall exit tracing -- cgit v1.2.3-70-g09d2 From 96c52749036eca2b131f435d3895a3c6aba92e76 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 27 Jun 2006 02:53:49 -0700 Subject: [PATCH] fix subarchitecture breakage with CONFIG_SCHED_SMT Commit 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 ("[PATCH] sched: new sched domain for representing multi-core") incorrectly made SCHED_SMT and some of the structures it uses dependent on SMP. However, this is wrong, the structures are only defined if X86_HT, so SCHED_SMT has to depend on that as well. The patch broke voyager, since it doesn't provide any of the multi-core or hyperthreading structures. Signed-off-by: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 4 ++-- arch/i386/kernel/cpu/common.c | 2 +- arch/i386/kernel/cpu/intel_cacheinfo.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 47c08bcd9b2..6662f8c4479 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -233,7 +233,7 @@ config NR_CPUS config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" - depends on SMP + depends on X86_HT help SMT scheduler support improves the CPU scheduler's decision making when dealing with Intel Pentium 4 chips with HyperThreading at a @@ -242,7 +242,7 @@ config SCHED_SMT config SCHED_MC bool "Multi-core scheduler support" - depends on SMP + depends on X86_HT default y help Multi-core scheduler support improves the CPU scheduler's decision diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 2fa401f19f3..70c87de582c 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -294,7 +294,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); #else c->apicid = (ebx >> 24) & 0xFF; diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index f24a0145024..8a92642ea59 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -261,7 +261,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); #endif @@ -383,14 +383,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l2) { l2 = new_l2; -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT cpu_llc_id[cpu] = l2_id; #endif } if (new_l3) { l3 = new_l3; -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT cpu_llc_id[cpu] = l3_id; #endif } -- cgit v1.2.3-70-g09d2 From d5fb34261dcd32c9cb3b28121fdc46308db513a1 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 27 Jun 2006 02:53:50 -0700 Subject: [PATCH] voyager: fix compile after setup rework The following [PATCH] Clean up and refactor i386 sub-architecture setup Doesn't quite work, since it leaves out an include of asm/io.h, without which the use of inb/outb in the setup file won.t work. This corrects that and also removes a spurious acpi reference that apparently crept in ages ago but should never have been there. Signed-off-by: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mach-voyager/setup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mach-voyager/setup.c b/arch/i386/mach-voyager/setup.c index 0e225054e22..defc6ebbd56 100644 --- a/arch/i386/mach-voyager/setup.c +++ b/arch/i386/mach-voyager/setup.c @@ -5,10 +5,10 @@ #include #include #include -#include #include #include #include +#include #include void __init pre_intr_init_hook(void) @@ -27,8 +27,7 @@ void __init intr_init_hook(void) smp_intr_init(); #endif - if (!acpi_ioapic) - setup_irq(2, &irq2); + setup_irq(2, &irq2); } void __init pre_setup_arch_hook(void) -- cgit v1.2.3-70-g09d2 From e6e5494cb23d1933735ee47cc674ffe1c4afed6f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jun 2006 02:53:50 -0700 Subject: [PATCH] vdso: randomize the i386 vDSO by moving it into a vma Move the i386 VDSO down into a vma and thus randomize it. Besides the security implications, this feature also helps debuggers, which can COW a vma-backed VDSO just like a normal DSO and can thus do single-stepping and other debugging features. It's good for hypervisors (Xen, VMWare) too, which typically live in the same high-mapped address space as the VDSO, hence whenever the VDSO is used, they get lots of guest pagefaults and have to fix such guest accesses up - which slows things down instead of speeding things up (the primary purpose of the VDSO). There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support for older glibcs that still rely on a prelinked high-mapped VDSO. Newer distributions (using glibc 2.3.3 or later) can turn this option off. Turning it off is also recommended for security reasons: attackers cannot use the predictable high-mapped VDSO page as syscall trampoline anymore. There is a new vdso=[0|1] boot option as well, and a runtime /proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned on/off. (This version of the VDSO-randomization patch also has working ELF coredumping, the previous patch crashed in the coredumping code.) This code is a combined work of the exec-shield VDSO randomization code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell started this patch and i completed it. [akpm@osdl.org: cleanups] [akpm@osdl.org: compile fix] [akpm@osdl.org: compile fix 2] [akpm@osdl.org: compile fix 3] [akpm@osdl.org: revernt MAXMEM change] Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Cc: Gerd Hoffmann Cc: Rusty Russell Cc: Zachary Amsden Cc: Andi Kleen Cc: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 4 ++ arch/i386/Kconfig | 11 +++ arch/i386/kernel/asm-offsets.c | 4 +- arch/i386/kernel/entry.S | 7 +- arch/i386/kernel/signal.c | 4 +- arch/i386/kernel/sysenter.c | 128 +++++++++++++++++++++++++++++++++-- arch/i386/kernel/vsyscall-sysenter.S | 4 +- arch/i386/kernel/vsyscall.lds.S | 4 +- fs/proc/task_mmu.c | 30 ++++---- include/asm-i386/elf.h | 53 +++++++++++---- include/asm-i386/fixmap.h | 10 +-- include/asm-i386/mmu.h | 1 + include/asm-i386/page.h | 3 + include/asm-i386/thread_info.h | 1 + include/asm-i386/unwind.h | 4 +- include/linux/mm.h | 2 + include/linux/sysctl.h | 1 + kernel/sysctl.c | 12 ++++ 18 files changed, 235 insertions(+), 48 deletions(-) (limited to 'arch/i386') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2e352a605fc..0d189c93eea 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1669,6 +1669,10 @@ running once the system is up. usbhid.mousepoll= [USBHID] The interval which mice are to be polled at. + vdso= [IA-32] + vdso=1: enable VDSO (default) + vdso=0: disable VDSO mapping + video= [FB] Frame buffer configuration See Documentation/fb/modedb.txt. diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 6662f8c4479..3bb221db164 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -780,6 +780,17 @@ config HOTPLUG_CPU enable suspend on SMP systems. CPUs can be controlled through /sys/devices/system/cpu. +config COMPAT_VDSO + bool "Compat VDSO support" + default y + help + Map the VDSO to the predictable old-style address too. + ---help--- + Say N here if you are running a sufficiently recent glibc + version (2.3.3 or later), to remove the high-mapped + VDSO mapping and to exclusively use the randomized VDSO. + + If unsure, say Y. endmenu diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 1c3a809e642..c80271f8f08 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -14,6 +14,7 @@ #include #include #include +#include #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -54,6 +55,7 @@ void foo(void) OFFSET(TI_preempt_count, thread_info, preempt_count); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_restart_block, thread_info, restart_block); + OFFSET(TI_sysenter_return, thread_info, sysenter_return); BLANK(); OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); @@ -69,7 +71,7 @@ void foo(void) sizeof(struct tss_struct)); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); - DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); + DEFINE(VDSO_PRELINK, VDSO_PRELINK); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index e8d2630fd19..fbdb933251b 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -270,7 +270,12 @@ sysenter_past_esp: pushl $(__USER_CS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET cs, 0*/ - pushl $SYSENTER_RETURN + /* + * Push current_thread_info()->sysenter_return to the stack. + * A tiny bit of offset fixup is necessary - 4*4 means the 4 words + * pushed above; +8 corresponds to copy_thread's esp0 setting. + */ + pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 5c352c3a9e7..43002cfb40c 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -351,7 +351,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, goto give_sigsegv; } - restorer = &__kernel_sigreturn; + restorer = (void *)VDSO_SYM(&__kernel_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -447,7 +447,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Set up to return from userspace. */ - restorer = &__kernel_rt_sigreturn; + restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 0bada1870bd..c60419dee01 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -2,6 +2,8 @@ * linux/arch/i386/kernel/sysenter.c * * (C) Copyright 2002 Linus Torvalds + * Portions based on the vdso-randomization code from exec-shield: + * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar * * This file contains the needed initializations to support sysenter. */ @@ -13,12 +15,31 @@ #include #include #include +#include +#include #include #include #include #include +/* + * Should the kernel map a VDSO page into processes and pass its + * address down to glibc upon exec()? + */ +unsigned int __read_mostly vdso_enabled = 1; + +EXPORT_SYMBOL_GPL(vdso_enabled); + +static int __init vdso_setup(char *s) +{ + vdso_enabled = simple_strtoul(s, NULL, 0); + + return 1; +} + +__setup("vdso=", vdso_setup); + extern asmlinkage void sysenter_entry(void); void enable_sep_cpu(void) @@ -45,23 +66,122 @@ void enable_sep_cpu(void) */ extern const char vsyscall_int80_start, vsyscall_int80_end; extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; +static void *syscall_page; int __init sysenter_setup(void) { - void *page = (void *)get_zeroed_page(GFP_ATOMIC); + syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); - __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); +#ifdef CONFIG_COMPAT_VDSO + __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); + printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); +#else + /* + * In the non-compat case the ELF coredumping code needs the fixmap: + */ + __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO); +#endif if (!boot_cpu_has(X86_FEATURE_SEP)) { - memcpy(page, + memcpy(syscall_page, &vsyscall_int80_start, &vsyscall_int80_end - &vsyscall_int80_start); return 0; } - memcpy(page, + memcpy(syscall_page, &vsyscall_sysenter_start, &vsyscall_sysenter_end - &vsyscall_sysenter_start); return 0; } + +static struct page *syscall_nopage(struct vm_area_struct *vma, + unsigned long adr, int *type) +{ + struct page *p = virt_to_page(adr - vma->vm_start + syscall_page); + get_page(p); + return p; +} + +/* Prevent VMA merging */ +static void syscall_vma_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct syscall_vm_ops = { + .close = syscall_vma_close, + .nopage = syscall_nopage, +}; + +/* Defined in vsyscall-sysenter.S */ +extern void SYSENTER_RETURN; + +/* Setup a VMA at program startup for the vsyscall page */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret; + + down_write(&mm->mmap_sem); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + if (!vma) { + ret = -ENOMEM; + goto up_fail; + } + + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + /* MAYWRITE to allow gdb to COW and set breakpoints */ + vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + vma->vm_ops = &syscall_vm_ops; + vma->vm_mm = mm; + + ret = insert_vm_struct(mm, vma); + if (ret) + goto free_vma; + + current->mm->context.vdso = (void *)addr; + current_thread_info()->sysenter_return = + (void *)VDSO_SYM(&SYSENTER_RETURN); + mm->total_vm++; +up_fail: + up_write(&mm->mmap_sem); + return ret; + +free_vma: + kmem_cache_free(vm_area_cachep, vma); + return ret; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + return "[vdso]"; + return NULL; +} + +struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +{ + return NULL; +} + +int in_gate_area(struct task_struct *task, unsigned long addr) +{ + return 0; +} + +int in_gate_area_no_task(unsigned long addr) +{ + return 0; +} diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 3b62baa6a37..1a36d26e15e 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S @@ -42,10 +42,10 @@ __kernel_vsyscall: /* 7: align return point with nop's to make disassembly easier */ .space 7,0x90 - /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */ + /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ jmp .Lenter_kernel /* 16: System call normal return point is here! */ - .globl SYSENTER_RETURN /* Symbol used by entry.S. */ + .globl SYSENTER_RETURN /* Symbol used by sysenter.c */ SYSENTER_RETURN: pop %ebp .Lpop_ebp: diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index 98699ca6e52..e26975fc68b 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S @@ -7,7 +7,7 @@ SECTIONS { - . = VSYSCALL_BASE + SIZEOF_HEADERS; + . = VDSO_PRELINK + SIZEOF_HEADERS; .hash : { *(.hash) } :text .dynsym : { *(.dynsym) } @@ -20,7 +20,7 @@ SECTIONS For the layouts to match, we need to skip more than enough space for the dynamic symbol table et al. If this amount is insufficient, ld -shared will barf. Just increase it here. */ - . = VSYSCALL_BASE + 0x400; + . = VDSO_PRELINK + 0x400; .text : { *(.text) } :text =0x90909090 .note : { *(.note.*) } :text :note diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 0137ec4c136..0a163a4f776 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -122,6 +122,11 @@ struct mem_size_stats unsigned long private_dirty; }; +__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) { struct proc_maps_private *priv = m->private; @@ -158,22 +163,23 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats pad_len_spaces(m, len); seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); } else { - if (mm) { - if (vma->vm_start <= mm->start_brk && + const char *name = arch_vma_name(vma); + if (!name) { + if (mm) { + if (vma->vm_start <= mm->start_brk && vma->vm_end >= mm->brk) { - pad_len_spaces(m, len); - seq_puts(m, "[heap]"); - } else { - if (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack) { - - pad_len_spaces(m, len); - seq_puts(m, "[stack]"); + name = "[heap]"; + } else if (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack) { + name = "[stack]"; } + } else { + name = "[vdso]"; } - } else { + } + if (name) { pad_len_spaces(m, len); - seq_puts(m, "[vdso]"); + seq_puts(m, name); } } seq_putc(m, '\n'); diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 4153d80e4d2..1eac92cb5b1 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -10,6 +10,7 @@ #include #include /* for savesegment */ #include +#include #include @@ -129,15 +130,41 @@ extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs) #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) -#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL)) -#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) -#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall) +#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) +#define VDSO_BASE ((unsigned long)current->mm->context.vdso) + +#ifdef CONFIG_COMPAT_VDSO +# define VDSO_COMPAT_BASE VDSO_HIGH_BASE +# define VDSO_PRELINK VDSO_HIGH_BASE +#else +# define VDSO_COMPAT_BASE VDSO_BASE +# define VDSO_PRELINK 0 +#endif + +#define VDSO_COMPAT_SYM(x) \ + (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK) + +#define VDSO_SYM(x) \ + (VDSO_BASE + (unsigned long)(x) - VDSO_PRELINK) + +#define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) +#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE) + extern void __kernel_vsyscall; +#define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack); + +extern unsigned int vdso_enabled; + #define ARCH_DLINFO \ -do { \ - NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \ +do if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \ } while (0) /* @@ -148,15 +175,15 @@ do { \ * Dumping its extra ELF program headers includes all the other information * a debugger needs to easily find how the vsyscall DSO was being used. */ -#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum) +#define ELF_CORE_EXTRA_PHDRS (VDSO_HIGH_EHDR->e_phnum) #define ELF_CORE_WRITE_EXTRA_PHDRS \ do { \ const struct elf_phdr *const vsyscall_phdrs = \ - (const struct elf_phdr *) (VSYSCALL_BASE \ - + VSYSCALL_EHDR->e_phoff); \ + (const struct elf_phdr *) (VDSO_HIGH_BASE \ + + VDSO_HIGH_EHDR->e_phoff); \ int i; \ Elf32_Off ofs = 0; \ - for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ + for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \ struct elf_phdr phdr = vsyscall_phdrs[i]; \ if (phdr.p_type == PT_LOAD) { \ BUG_ON(ofs != 0); \ @@ -174,10 +201,10 @@ do { \ #define ELF_CORE_WRITE_EXTRA_DATA \ do { \ const struct elf_phdr *const vsyscall_phdrs = \ - (const struct elf_phdr *) (VSYSCALL_BASE \ - + VSYSCALL_EHDR->e_phoff); \ + (const struct elf_phdr *) (VDSO_HIGH_BASE \ + + VDSO_HIGH_EHDR->e_phoff); \ int i; \ - for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ + for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \ if (vsyscall_phdrs[i].p_type == PT_LOAD) \ DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \ PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \ diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index f7e068f4d2f..a48cc3f7ccc 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -51,7 +51,7 @@ */ enum fixed_addresses { FIX_HOLE, - FIX_VSYSCALL, + FIX_VDSO, #ifdef CONFIG_X86_LOCAL_APIC FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ #endif @@ -115,14 +115,6 @@ extern void __set_fixmap (enum fixed_addresses idx, #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) -/* - * This is the range that is readable by user mode, and things - * acting like user mode such as get_user_pages. - */ -#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL)) -#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) - - extern void __this_fixmap_does_not_exist(void); /* diff --git a/include/asm-i386/mmu.h b/include/asm-i386/mmu.h index f431a0b86d4..8358dd3df7a 100644 --- a/include/asm-i386/mmu.h +++ b/include/asm-i386/mmu.h @@ -12,6 +12,7 @@ typedef struct { int size; struct semaphore sem; void *ldt; + void *vdso; } mm_context_t; #endif diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index e3a552fa553..f5bf544c729 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -96,6 +96,8 @@ typedef struct { unsigned long pgprot; } pgprot_t; #ifndef __ASSEMBLY__ +struct vm_area_struct; + /* * This much address space is reserved for vmalloc() and iomap() * as well as fixmap mappings. @@ -139,6 +141,7 @@ extern int page_is_ram(unsigned long pagenr); #include #include +#define __HAVE_ARCH_GATE_AREA 1 #endif /* __KERNEL__ */ #endif /* _I386_PAGE_H */ diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index ff1e2b1a7c8..2833fa2c0dd 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -37,6 +37,7 @@ struct thread_info { 0-0xBFFFFFFF for user-thead 0-0xFFFFFFFF for kernel-thread */ + void *sysenter_return; struct restart_block restart_block; unsigned long previous_esp; /* ESP of the previous stack in case diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index d480f2e3821..69f0f1df672 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h @@ -78,8 +78,8 @@ static inline int arch_unw_user_mode(const struct unwind_frame_info *info) return user_mode_vm(&info->regs); #else return info->regs.eip < PAGE_OFFSET - || (info->regs.eip >= __fix_to_virt(FIX_VSYSCALL) - && info->regs.eip < __fix_to_virt(FIX_VSYSCALL) + PAGE_SIZE) + || (info->regs.eip >= __fix_to_virt(FIX_VDSO) + && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) || info->regs.esp < PAGE_OFFSET; #endif } diff --git a/include/linux/mm.h b/include/linux/mm.h index a929ea197e4..ff1fa87df8d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1065,5 +1065,7 @@ void drop_slab(void); extern int randomize_va_space; #endif +const char *arch_vma_name(struct vm_area_struct *vma); + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 349ef908a22..bee12a7a057 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -189,6 +189,7 @@ enum VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ + VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f1a4eb1a655..f54afed8426 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -927,6 +927,18 @@ static ctl_table vm_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, +#endif +#ifdef CONFIG_X86_32 + { + .ctl_name = VM_VDSO_ENABLED, + .procname = "vdso_enabled", + .data = &vdso_enabled, + .maxlen = sizeof(vdso_enabled), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, #endif { .ctl_name = 0 } }; -- cgit v1.2.3-70-g09d2 From c9cf55285e87ac423c45d9efca750d3f50234d10 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 27 Jun 2006 02:53:52 -0700 Subject: [PATCH] add poison.h and patch primary users Localize poison values into one header file for better documentation and easier/quicker debugging and so that the same values won't be used for multiple purposes. Use these constants in core arch., mm, driver, and fs code. Signed-off-by: Randy Dunlap Acked-by: Matt Mackall Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: "David S. Miller" Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/init.c | 3 ++- arch/powerpc/mm/init_64.c | 3 ++- arch/sparc64/mm/init.c | 3 ++- arch/x86_64/mm/init.c | 7 +++++-- drivers/base/dmapool.c | 3 +-- fs/jbd/journal.c | 3 ++- include/linux/list.h | 9 +-------- include/linux/poison.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ mm/slab.c | 12 +----------- 9 files changed, 61 insertions(+), 27 deletions(-) create mode 100644 include/linux/poison.h (limited to 'arch/i386') diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 46859253179..f84b16e007f 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -753,7 +754,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) for (addr = begin; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); - memset((void *)addr, 0xcc, PAGE_SIZE); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 9e30f968c18..d454caada26 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -90,7 +91,7 @@ void free_initmem(void) addr = (unsigned long)__init_begin; for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { - memset((void *)addr, 0xcc, PAGE_SIZE); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); free_page(addr); diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 5c2bcf354ce..cb75a27adb5 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1520,7 +1521,7 @@ void free_initmem(void) page = (addr + ((unsigned long) __va(kern_base)) - ((unsigned long) KERNBASE)); - memset((void *)addr, 0xcc, PAGE_SIZE); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); p = virt_to_page(page); ClearPageReserved(p); diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 51fbf3eddf1..95bd232ff0c 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -660,7 +661,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) for (addr = begin; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); - memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); + memset((void *)(addr & ~(PAGE_SIZE-1)), + POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } @@ -668,7 +670,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) void free_initmem(void) { - memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); + memset(__initdata_begin, POISON_FREE_INITDATA, + __initdata_end - __initdata_begin); free_init_pages("unused kernel memory", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); diff --git a/drivers/base/dmapool.c b/drivers/base/dmapool.c index e2f64f91ed0..33c5cce1560 100644 --- a/drivers/base/dmapool.c +++ b/drivers/base/dmapool.c @@ -7,6 +7,7 @@ #include #include #include +#include /* * Pool allocator ... wraps the dma_alloc_coherent page allocator, so @@ -35,8 +36,6 @@ struct dma_page { /* cacheable header for 'allocation' bytes */ }; #define POOL_TIMEOUT_JIFFIES ((100 /* msec */ * HZ) / 1000) -#define POOL_POISON_FREED 0xa7 /* !inuse */ -#define POOL_POISON_ALLOCATED 0xa9 /* !initted */ static DECLARE_MUTEX (pools_lock); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 7f96b5cb678..8c9b28dff11 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1675,7 +1676,7 @@ static void journal_free_journal_head(struct journal_head *jh) { #ifdef CONFIG_JBD_DEBUG atomic_dec(&nr_journal_heads); - memset(jh, 0x5b, sizeof(*jh)); + memset(jh, JBD_POISON_FREE, sizeof(*jh)); #endif kmem_cache_free(journal_head_cache, jh); } diff --git a/include/linux/list.h b/include/linux/list.h index 37ca31b21bb..6b74adf5297 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -4,17 +4,10 @@ #ifdef __KERNEL__ #include +#include #include #include -/* - * These are non-NULL pointers that will result in page faults - * under normal circumstances, used to verify that nobody uses - * non-initialized list entries. - */ -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) - /* * Simple doubly linked list implementation. * diff --git a/include/linux/poison.h b/include/linux/poison.h new file mode 100644 index 00000000000..d536de7dd82 --- /dev/null +++ b/include/linux/poison.h @@ -0,0 +1,45 @@ +#ifndef _LINUX_POISON_H +#define _LINUX_POISON_H + +/********** include/linux/list.h **********/ +/* + * These are non-NULL pointers that will result in page faults + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +/********** mm/slab.c **********/ +/* + * Magic nums for obj red zoning. + * Placed in the first word before and the first word after an obj. + */ +#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ +#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */ + +/* ...and for poisoning */ +#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ +#define POISON_FREE 0x6b /* for use-after-free poisoning */ +#define POISON_END 0xa5 /* end-byte of poisoning */ + +/********** arch/$ARCH/mm/init.c **********/ +#define POISON_FREE_INITMEM 0xcc + +/********** arch/x86_64/mm/init.c **********/ +#define POISON_FREE_INITDATA 0xba + +/********** arch/ia64/hp/common/sba_iommu.c **********/ +/* + * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a + * value of "SBAIOMMU POISON\0" for spill-over poisoning. + */ + +/********** fs/jbd/journal.c **********/ +#define JBD_POISON_FREE 0x5b + +/********** drivers/base/dmapool.c **********/ +#define POOL_POISON_FREED 0xa7 /* !inuse */ +#define POOL_POISON_ALLOCATED 0xa9 /* !initted */ + +#endif diff --git a/mm/slab.c b/mm/slab.c index 98ac20bc0de..47982c2d9f3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -492,17 +493,6 @@ struct kmem_cache { #endif #if DEBUG -/* - * Magic nums for obj red zoning. - * Placed in the first word before and the first word after an obj. - */ -#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ -#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */ - -/* ...and for poisoning */ -#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ -#define POISON_FREE 0x6b /* for use-after-free poisoning */ -#define POISON_END 0xa5 /* end-byte of poisoning */ /* * memory layout of objects: -- cgit v1.2.3-70-g09d2 From 9c7b216d23e820e0e148d5be01bbb5bd2d8378fe Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Tue, 27 Jun 2006 02:54:07 -0700 Subject: [PATCH] cpu hotplug: revert init patch submitted for 2.6.17 In 2.6.17, there was a problem with cpu_notifiers and XFS. I provided a band-aid solution to solve that problem. In the process, i undid all the changes you both were making to ensure that these notifiers were available only at init time (unless CONFIG_HOTPLUG_CPU is defined). We deferred the real fix to 2.6.18. Here is a set of patches that fixes the XFS problem cleanly and makes the cpu notifiers available only at init time (unless CONFIG_HOTPLUG_CPU is defined). If CONFIG_HOTPLUG_CPU is defined then cpu notifiers are available at run time. This patch reverts the notifier_call changes made in 2.6.17 Signed-off-by: Chandra Seetharaman Cc: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- arch/ia64/kernel/palinfo.c | 2 +- arch/ia64/kernel/salinfo.c | 2 +- arch/ia64/kernel/topology.c | 2 +- arch/powerpc/kernel/sysfs.c | 2 +- arch/x86_64/kernel/mce.c | 2 +- drivers/base/topology.c | 2 +- drivers/cpufreq/cpufreq.c | 2 +- kernel/hrtimer.c | 2 +- kernel/profile.c | 2 +- kernel/rcupdate.c | 2 +- kernel/softirq.c | 2 +- kernel/softlockup.c | 2 +- kernel/timer.c | 2 +- kernel/workqueue.c | 2 +- mm/page_alloc.c | 2 +- mm/slab.c | 2 +- mm/vmscan.c | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 8a92642ea59..1d4ab104798 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -729,7 +729,7 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) return; } -static int cacheinfo_cpu_callback(struct notifier_block *nfb, +static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 859fb37ff49..6386f63c413 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -959,7 +959,7 @@ remove_palinfo_proc_entries(unsigned int hcpu) } } -static int palinfo_cpu_callback(struct notifier_block *nfb, +static int __devinit palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 663a186ad19..9d5a823479a 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -572,7 +572,7 @@ static struct file_operations salinfo_data_fops = { }; #ifdef CONFIG_HOTPLUG_CPU -static int +static int __devinit salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { unsigned int i, cpu = (unsigned long)hcpu; diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 5737c9a061e..f07c382b57b 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -404,7 +404,7 @@ static int __cpuinit cache_remove_dev(struct sys_device * sys_dev) * When a cpu is hot-plugged, do a check and initiate * cache kobject if necessary */ -static int cache_cpu_callback(struct notifier_block *nfb, +static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 412ad00e222..0231869613c 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -279,7 +279,7 @@ static void unregister_cpu_online(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int sysfs_cpu_notify(struct notifier_block *self, +static int __devinit sysfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index acd5816b1a6..efe8500a5b9 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -629,7 +629,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) #endif /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int +static __cpuinit int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 8c52421cbc5..915810f6237 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -107,7 +107,7 @@ static int __cpuinit topology_remove_dev(struct sys_device * sys_dev) return 0; } -static int topology_cpu_callback(struct notifier_block *nfb, +static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 44d1eca83a7..486ef666470 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1497,7 +1497,7 @@ int cpufreq_update_policy(unsigned int cpu) } EXPORT_SYMBOL(cpufreq_update_policy); -static int cpufreq_cpu_callback(struct notifier_block *nfb, +static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 55601b3ce60..f9f53191661 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -833,7 +833,7 @@ static void migrate_hrtimers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int hrtimer_cpu_notify(struct notifier_block *self, +static int __devinit hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/profile.c b/kernel/profile.c index 68afe121e50..5a730fdb1a2 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -299,7 +299,7 @@ out: } #ifdef CONFIG_HOTPLUG_CPU -static int profile_cpu_callback(struct notifier_block *info, +static int __devinit profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { int node, cpu = (unsigned long)__cpu; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c0e1cb95dd4..a8d80b7048b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -548,7 +548,7 @@ static void __devinit rcu_online_cpu(int cpu) tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); } -static int rcu_cpu_notify(struct notifier_block *self, +static int __devinit rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/softirq.c b/kernel/softirq.c index 9e2f1c6e73d..db65a311f14 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -446,7 +446,7 @@ static void takeover_tasklets(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int cpu_callback(struct notifier_block *nfb, +static int __devinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index b5c3b94e01c..29da0a847ba 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu) /* * Create/destroy watchdog threads as CPUs come and go: */ -static int +static int __devinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; diff --git a/kernel/timer.c b/kernel/timer.c index 5bb6b7976ee..878194ec8bd 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1652,7 +1652,7 @@ static void __devinit migrate_timers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int timer_cpu_notify(struct notifier_block *self, +static int __devinit timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 565cf7a1feb..59f0b42bd89 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -559,7 +559,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) } /* We're holding the cpucontrol mutex here */ -static int workqueue_cpu_callback(struct notifier_block *nfb, +static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9f86191bb63..e9fb2d4064c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2009,7 +2009,7 @@ static inline void free_zone_pagesets(int cpu) } } -static int pageset_cpuup_callback(struct notifier_block *nfb, +static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/mm/slab.c b/mm/slab.c index 47982c2d9f3..631c0feb964 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1073,7 +1073,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) #endif -static int cpuup_callback(struct notifier_block *nfb, +static int __devinit cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { long cpu = (long)hcpu; diff --git a/mm/vmscan.c b/mm/vmscan.c index f03da33d914..eeacb0d695c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1450,7 +1450,7 @@ out: not required for correctness. So if the last cpu in a node goes away, we get changed to run anywhere: as the first one comes back, restore their cpu bindings. */ -static int cpu_callback(struct notifier_block *nfb, +static int __devinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { pg_data_t *pgdat; -- cgit v1.2.3-70-g09d2 From 74b85f3790aa2550c617fe14439482e13e615fa0 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Tue, 27 Jun 2006 02:54:09 -0700 Subject: [PATCH] cpu hotplug: make cpu_notifier related notifier blocks __cpuinit only Make notifier_blocks associated with cpu_notifier as __cpuinitdata. __cpuinitdata makes sure that the data is init time only unless CONFIG_HOTPLUG_CPU is defined. Signed-off-by: Chandra Seetharaman Cc: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- arch/i386/kernel/cpuid.c | 2 +- arch/i386/kernel/msr.c | 2 +- arch/ia64/kernel/palinfo.c | 4 ++-- arch/ia64/kernel/topology.c | 2 +- arch/powerpc/mm/numa.c | 11 ++++++----- arch/x86_64/kernel/mce.c | 2 +- drivers/base/topology.c | 2 +- drivers/cpufreq/cpufreq.c | 2 +- mm/page-writeback.c | 2 +- mm/page_alloc.c | 2 +- mm/slab.c | 4 +++- 12 files changed, 20 insertions(+), 17 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 1d4ab104798..e9f0b928b0a 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -747,7 +747,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block cacheinfo_cpu_notifier = +static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { .notifier_call = cacheinfo_cpu_callback, }; diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 1d9a4abcdfc..f6dfa9fb675 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -183,7 +183,7 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long ac return NOTIFY_OK; } -static struct notifier_block cpuid_class_cpu_notifier = +static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = { .notifier_call = cpuid_class_cpu_callback, }; diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 7a328230e54..d022cb8fd72 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -266,7 +266,7 @@ static int msr_class_cpu_callback(struct notifier_block *nfb, unsigned long acti return NOTIFY_OK; } -static struct notifier_block msr_class_cpu_notifier = +static struct notifier_block __cpuinitdata msr_class_cpu_notifier = { .notifier_call = msr_class_cpu_callback, }; diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 6386f63c413..303a9afcf2a 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -959,7 +959,7 @@ remove_palinfo_proc_entries(unsigned int hcpu) } } -static int __devinit palinfo_cpu_callback(struct notifier_block *nfb, +static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -978,7 +978,7 @@ static int __devinit palinfo_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block palinfo_cpu_notifier = +static struct notifier_block __cpuinitdata palinfo_cpu_notifier = { .notifier_call = palinfo_cpu_callback, .priority = 0, diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index f07c382b57b..5511d9c6c70 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -422,7 +422,7 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block cache_cpu_notifier = +static struct notifier_block __cpuinitdata cache_cpu_notifier = { .notifier_call = cache_cpu_callback }; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index aa98cb3b59d..fbe23933f73 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -334,7 +334,7 @@ out: return nid; } -static int cpu_numa_callback(struct notifier_block *nfb, +static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -609,14 +609,15 @@ static void __init *careful_allocation(int nid, unsigned long size, return (void *)ret; } +static struct notifier_block __cpuinitdata ppc64_numa_nb = { + .notifier_call = cpu_numa_callback, + .priority = 1 /* Must run before sched domains notifier. */ +}; + void __init do_init_bootmem(void) { int nid; unsigned int i; - static struct notifier_block ppc64_numa_nb = { - .notifier_call = cpu_numa_callback, - .priority = 1 /* Must run before sched domains notifier. */ - }; min_low_pfn = 0; max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index efe8500a5b9..88845674c66 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -647,7 +647,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block mce_cpu_notifier = { +static struct notifier_block __cpuinitdata mce_cpu_notifier = { .notifier_call = mce_cpu_callback, }; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 915810f6237..c2d62163238 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -125,7 +125,7 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block topology_cpu_notifier = +static struct notifier_block __cpuinitdata topology_cpu_notifier = { .notifier_call = topology_cpu_callback, }; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3533e26f837..35e0b9ceecf 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1533,7 +1533,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block cpufreq_cpu_notifier = +static struct notifier_block __cpuinitdata cpufreq_cpu_notifier = { .notifier_call = cpufreq_cpu_callback, }; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8ccf6f1b147..8ac6bfb4007 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -523,7 +523,7 @@ ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) return 0; } -static struct notifier_block ratelimit_nb = { +static struct notifier_block __cpuinitdata ratelimit_nb = { .notifier_call = ratelimit_handler, .next = NULL, }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e9fb2d4064c..dafd12ec7a0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2031,7 +2031,7 @@ static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, return ret; } -static struct notifier_block pageset_notifier = +static struct notifier_block __cpuinitdata pageset_notifier = { &pageset_cpuup_callback, NULL, 0 }; void __init setup_per_cpu_pageset(void) diff --git a/mm/slab.c b/mm/slab.c index 631c0feb964..d1d55279202 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1255,7 +1255,9 @@ bad: return NOTIFY_BAD; } -static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; +static struct notifier_block __cpuinitdata cpucache_notifier = { + &cpuup_callback, NULL, 0 +}; /* * swap the static kmem_list3 with kmalloced memory -- cgit v1.2.3-70-g09d2 From 62c83cde9282a9580994a12b3063e677181b5ebe Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 27 Jun 2006 02:54:13 -0700 Subject: [PATCH] chardev: GPIO for SCx200 & PC-8736x: whitespace pre-clean GPIO SUPPORT FOR SCx200 & PC8736x The patch-set reworks the 2.4 vintage scx200_gpio driver for modern 2.6, and refactors GPIO support to reuse it in a new driver for the GPIO on PC-8736x chips. Its handy for the Soekris.com net-4801, which has both chips. These patches have been seen recently on Kernel-Mentors, and then Kernel-Newbies ML, where Jesper Juhl kindly reviewed it. His feedback has been incorporated. Thanks Jesper ! Its also gone to soekris-tech@soekris.com for possible testing by linux folks, I've gotten 1 promise so far. Theyre mostly BSD folk over there, but we'll see.. Device-file & Sysfs The driver preserves the existing device-file interface, including the write/cmd set, but adds v to 'view' the pin-settings & configs by inducing, via gpio_dump(), a dev_info() call. Its a fairly crappy way to get status, but it sticks to the syslog approach, conservatively. Allowing users to voluntarily trigger logging is good, it gives them a familiar way to confirm their app's control & use of the pins, and I've thus reduced the pin-mode-updates from dev_info to dev_dbg. I've recently bolted on a proto sysfs interface for both new drivers. Im not including those patches here; they (the patch + doc-pre-patch) are still quite raw (and unreviewed on KNML), and since they 'invent' a convention for GPIO, a proper vetting is needed. Since this patchset is much bigger than my previous ones, Id like to keep things simpler, and address it 1st, before bolting on more stuff. The driver-split The Geode CPU and the PC-87366 Super-IO chip have GPIO units which share a common pin-architecture (same pin features, with same bits controlling), but with different addressing mechanics and port organizations. The vintage driver expresses the pin capabilities with pin-mode commands [OoPpTt],etc that change the pin configurations, and since the 2 chips share pin-arch, we can reuse the read(), write() commands, once the implementation is suitably adjusted. The patchset adds a vtable: struct nsc_gpio_ops, to abstract the existing gpio operations, then adjusts fileops.write() code to invoke operations via that vtable. Driver specific open()s set private_data to the vtable so its available for use by write(). The vtable gets the gpio_dump() too, since its user-friendly, and (could be construed as) part of the current device-file interface. To support use of dev_dbg() in write() & _dump(), the vtable gets a dev ptr too, set by both scx200 & pc8736x _gpio drivers. heres how the pins are presented in syslog: [ 1890.176223] scx200_gpio.0: io00: 0x0044 TS OD PUE EDGE LO DEBOUNCE [ 1890.287223] scx200_gpio.0: io01: 0x0003 OE PP PUD EDGE LO nsc_gpio.c: new file is new home of several file-ops methods, which are modified to get their vtable from filp->private_data, and use it where needed. scx200_gpio.c: keeps some of its existing gpio routines, but now wires them up via the vtable (they're invoked by nsc_gpio.c:nsc_gpio_write() thru this vtable). A driver-spcific open() initializes filp->private_data with the vtable. Once the split is clean, and the scx200_gpio driver is working, we copy and modify the function and variable names, and rework the access-method bodies for the different addressing scheme. Heres a working overview of the patchset: # series file for GPIO # Spring Cleaning gpio-scx/patch.preclean # scripts/Lindent fixes, editor-ctrl comments # API Modernization gpio-scx/patch.api26 # what I learned from LDD3 gpio-scx/patch.platform-dev-2 # get pdev, support for dev_dbg() gpio-scx/patch.unsigned-minor # fix to match std practice # Debuggability gpio-scx/patch.dump-diet # shrink gpio_dump() gpio-scx/patch.viewpins # add new 'command' to call dump() gpio-scx/patch.init-refactor # pull shadow-register init to sub # Access-Abstraction (add vtable) gpio-scx/patch.access-vtable # introduce nsg_gpio_ops vtable, w dump gpio-scx/patch.vtable-calls # add & use the vtable in scx200_gpio gpio-scx/patch.nscgpio-shell # add empty driver for common-fops # move code under abstraction gpio-scx/patch.migrate-fops # move file-ops methods from scx200_gpio gpio-scx/patch.common-dump # mv scx200.c:scx200_gpio_dump() to nsc_gpio.c gpio-scx/patch.add-pc8736x-gpio # add new driver, like old, w chip adapt # gpio-scx/patch.add-DEBUG # enable all dev_dbg()s # Cleanups # finish printk -> dev_dbg() etc gpio-scx/patch.pdev-pc8736x # new drvr needs pdev too, gpio-scx/patch.devdbg-nscgpio # add device to 'vtable', use in dev_dbg() # gpio-scx/patch.pin-config-view # another 'c' 'command' # gpio-scx/quiet-getset # take out excess dbg stuff (pretty quiet now) gpio-scx/patch.shadow-current # imitate scx200_gpio's shadow regs in pc87* # post KMentors-post patches .. gpio-scx/patch.mutexes # use mutexes for config-locks gpio-scx/patch.viewpins-values # extend dump to obsolete separate 'c' cmd gpio-scx/patch.kconfig # add stuff for kbuild # TBC # combine api26 with pdev, which is just one step. # merge c&v commands to single do-all-fn # delay viewpins, dump-diet should also un-ifdef it too. diff.sys-gpio-rollup-1 This patch: Removed editor format-control comments, and used scripts/Lindent to clean up whitespace, then deleted the bogus chunks :-( Signed-off-by: Jim Cromie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/scx200.c | 7 ------- drivers/char/scx200_gpio.c | 26 +++++++++----------------- include/linux/scx200.h | 7 ------- include/linux/scx200_gpio.h | 7 ------- 4 files changed, 9 insertions(+), 38 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 321f5fd26e7..4d3d8e811fe 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -159,10 +159,3 @@ EXPORT_SYMBOL(scx200_gpio_base); EXPORT_SYMBOL(scx200_gpio_shadow); EXPORT_SYMBOL(scx200_gpio_configure); EXPORT_SYMBOL(scx200_cb_base); - -/* - Local variables: - compile-command: "make -k -C ../../.. SUBDIRS=arch/i386/kernel modules" - c-basic-offset: 8 - End: -*/ diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c index 664a6e97eb1..dd051ea6755 100644 --- a/drivers/char/scx200_gpio.c +++ b/drivers/char/scx200_gpio.c @@ -1,4 +1,4 @@ -/* linux/drivers/char/scx200_gpio.c +/* linux/drivers/char/scx200_gpio.c National Semiconductor SCx200 GPIO driver. Allows a user space process to play with the GPIO pins. @@ -26,7 +26,7 @@ static int major = 0; /* default to dynamic major */ module_param(major, int, 0); MODULE_PARM_DESC(major, "Major device number"); -static ssize_t scx200_gpio_write(struct file *file, const char __user *data, +static ssize_t scx200_gpio_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { unsigned m = iminor(file->f_dentry->d_inode); @@ -34,15 +34,14 @@ static ssize_t scx200_gpio_write(struct file *file, const char __user *data, for (i = 0; i < len; ++i) { char c; - if (get_user(c, data+i)) + if (get_user(c, data + i)) return -EFAULT; - switch (c) - { - case '0': - scx200_gpio_set(m, 0); + switch (c) { + case '0': + scx200_gpio_set(m, 0); break; - case '1': - scx200_gpio_set(m, 1); + case '1': + scx200_gpio_set(m, 1); break; case 'O': printk(KERN_INFO NAME ": GPIO%d output enabled\n", m); @@ -83,7 +82,7 @@ static ssize_t scx200_gpio_read(struct file *file, char __user *buf, value = scx200_gpio_get(m); if (put_user(value ? '1' : '0', buf)) return -EFAULT; - + return 1; } @@ -140,10 +139,3 @@ static void __exit scx200_gpio_cleanup(void) module_init(scx200_gpio_init); module_exit(scx200_gpio_cleanup); - -/* - Local variables: - compile-command: "make -k -C ../.. SUBDIRS=drivers/char modules" - c-basic-offset: 8 - End: -*/ diff --git a/include/linux/scx200.h b/include/linux/scx200.h index a22f9e173ad..693c0557e70 100644 --- a/include/linux/scx200.h +++ b/include/linux/scx200.h @@ -49,10 +49,3 @@ extern unsigned scx200_cb_base; #define SCx200_REV 0x3d /* Revision Register */ #define SCx200_CBA 0x3e /* Configuration Base Address Register */ #define SCx200_CBA_SCRATCH 0x64 /* Configuration Base Address Scratchpad */ - -/* - Local variables: - compile-command: "make -C ../.. bzImage modules" - c-basic-offset: 8 - End: -*/ diff --git a/include/linux/scx200_gpio.h b/include/linux/scx200_gpio.h index 30cdd648ba7..83ab8a1521f 100644 --- a/include/linux/scx200_gpio.h +++ b/include/linux/scx200_gpio.h @@ -87,10 +87,3 @@ static inline void scx200_gpio_change(int index) { #undef __SCx200_GPIO_SHADOW #undef __SCx200_GPIO_INDEX #undef __SCx200_GPIO_OUT - -/* - Local variables: - compile-command: "make -C ../.. bzImage modules" - c-basic-offset: 8 - End: -*/ -- cgit v1.2.3-70-g09d2 From 55b8c0455b8aeb80f94183fa3aa42e3fa62b1705 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 27 Jun 2006 02:54:15 -0700 Subject: [PATCH] chardev: GPIO for SCx200 & PC-8736x: device minor numbers are unsigned ints Per kernel headers, device minor numbers are unsigned ints. Do the same in this driver. Signed-off-by: Jim Cromie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/scx200.c | 2 +- include/linux/scx200_gpio.h | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 4d3d8e811fe..18f895ce248 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -87,7 +87,7 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_ return 0; } -u32 scx200_gpio_configure(int index, u32 mask, u32 bits) +u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) { u32 config, new_config; unsigned long flags; diff --git a/include/linux/scx200_gpio.h b/include/linux/scx200_gpio.h index 83ab8a1521f..90dd069cc14 100644 --- a/include/linux/scx200_gpio.h +++ b/include/linux/scx200_gpio.h @@ -1,6 +1,6 @@ #include -u32 scx200_gpio_configure(int index, u32 set, u32 clear); +u32 scx200_gpio_configure(unsigned index, u32 set, u32 clear); extern unsigned scx200_gpio_base; extern long scx200_gpio_shadow[2]; @@ -17,7 +17,7 @@ extern long scx200_gpio_shadow[2]; /* returns the value of the GPIO pin */ -static inline int scx200_gpio_get(int index) { +static inline int scx200_gpio_get(unsigned index) { __SCx200_GPIO_BANK; __SCx200_GPIO_IOADDR + 0x04; __SCx200_GPIO_INDEX; @@ -29,7 +29,7 @@ static inline int scx200_gpio_get(int index) { driven if the GPIO is configured as an output, it might not be the state of the GPIO right now if the GPIO is configured as an input) */ -static inline int scx200_gpio_current(int index) { +static inline int scx200_gpio_current(unsigned index) { __SCx200_GPIO_BANK; __SCx200_GPIO_INDEX; @@ -38,7 +38,7 @@ static inline int scx200_gpio_current(int index) { /* drive the GPIO signal high */ -static inline void scx200_gpio_set_high(int index) { +static inline void scx200_gpio_set_high(unsigned index) { __SCx200_GPIO_BANK; __SCx200_GPIO_IOADDR; __SCx200_GPIO_SHADOW; @@ -49,7 +49,7 @@ static inline void scx200_gpio_set_high(int index) { /* drive the GPIO signal low */ -static inline void scx200_gpio_set_low(int index) { +static inline void scx200_gpio_set_low(unsigned index) { __SCx200_GPIO_BANK; __SCx200_GPIO_IOADDR; __SCx200_GPIO_SHADOW; @@ -60,7 +60,7 @@ static inline void scx200_gpio_set_low(int index) { /* drive the GPIO signal to state */ -static inline void scx200_gpio_set(int index, int state) { +static inline void scx200_gpio_set(unsigned index, int state) { __SCx200_GPIO_BANK; __SCx200_GPIO_IOADDR; __SCx200_GPIO_SHADOW; @@ -73,7 +73,7 @@ static inline void scx200_gpio_set(int index, int state) { } /* toggle the GPIO signal */ -static inline void scx200_gpio_change(int index) { +static inline void scx200_gpio_change(unsigned index) { __SCx200_GPIO_BANK; __SCx200_GPIO_IOADDR; __SCx200_GPIO_SHADOW; -- cgit v1.2.3-70-g09d2 From d424aa8744b7b7db1d32476ae6c8015d10eebe1c Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 27 Jun 2006 02:54:16 -0700 Subject: [PATCH] chardev: GPIO for SCx200 & PC-8736x: put gpio_dump on a diet Shrink scx200_gpio_dump() to a single printk with ternary ops. The function is still ifdef'd out, this is corrected in next patch, when it is actually used. The patch 'inadvertently' changed loglevel from DEBUG to INFO. This is Good, because in next patch, its wired to a 'command' which the user can invoke when they want. When they do so, its because they want INFO to support their developement effort, and we want to give it to them without compiling a DEBUG version of the driver. Signed-off-by: Jim Cromie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/scx200.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 18f895ce248..9e96a785dd0 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -108,34 +108,17 @@ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) #if 0 void scx200_gpio_dump(unsigned index) { - u32 config = scx200_gpio_configure(index, ~0, 0); - printk(KERN_DEBUG "GPIO%02u: 0x%08lx", index, (unsigned long)config); - - if (config & 1) - printk(" OE"); /* output enabled */ - else - printk(" TS"); /* tristate */ - if (config & 2) - printk(" PP"); /* push pull */ - else - printk(" OD"); /* open drain */ - if (config & 4) - printk(" PUE"); /* pull up enabled */ - else - printk(" PUD"); /* pull up disabled */ - if (config & 8) - printk(" LOCKED"); /* locked */ - if (config & 16) - printk(" LEVEL"); /* level input */ - else - printk(" EDGE"); /* edge input */ - if (config & 32) - printk(" HI"); /* trigger on rising edge */ - else - printk(" LO"); /* trigger on falling edge */ - if (config & 64) - printk(" DEBOUNCE"); /* debounce */ - printk("\n"); + u32 config = scx200_gpio_configure(index, ~0, 0); + + printk(KERN_INFO NAME ": GPIO-%02u: 0x%08lx %s %s %s %s %s %s %s\n", + index, (unsigned long) config, + (config & 1) ? "OE" : "TS", /* output enabled / tristate */ + (config & 2) ? "PP" : "OD", /* push pull / open drain */ + (config & 4) ? "PUE" : "PUD", /* pull up enabled/disabled */ + (config & 8) ? "LOCKED" : "", /* locked / unlocked */ + (config & 16) ? "LEVEL" : "EDGE", /* level/edge input */ + (config & 32) ? "HI" : "LO", /* trigger on rising/falling edge */ + (config & 64) ? "DEBOUNCE" : ""); /* debounce */ } #endif /* 0 */ -- cgit v1.2.3-70-g09d2 From 9550a339e1ab1709dd53d92a1b76eecae2df9f3c Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 27 Jun 2006 02:54:16 -0700 Subject: [PATCH] chardev: GPIO for SCx200 & PC-8736x: add 'v' command to device-file Add a new driver command: 'v' which calls gpio_dump() on the pin. The output goes to the log, like all other INFO messages in the original driver. Giving the user control over the feedback they 'need' is construed to be a user-friendly feature, and allows us (later) to dial down many INFO messages to DEBUG log-level. Signed-off-by: Jim Cromie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/scx200.c | 3 +-- drivers/char/scx200_gpio.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 9e96a785dd0..009e6aa16f7 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -105,7 +105,6 @@ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) return config; } -#if 0 void scx200_gpio_dump(unsigned index) { u32 config = scx200_gpio_configure(index, ~0, 0); @@ -120,7 +119,6 @@ void scx200_gpio_dump(unsigned index) (config & 32) ? "HI" : "LO", /* trigger on rising/falling edge */ (config & 64) ? "DEBOUNCE" : ""); /* debounce */ } -#endif /* 0 */ static int __init scx200_init(void) { @@ -141,4 +139,5 @@ module_exit(scx200_cleanup); EXPORT_SYMBOL(scx200_gpio_base); EXPORT_SYMBOL(scx200_gpio_shadow); EXPORT_SYMBOL(scx200_gpio_configure); +EXPORT_SYMBOL(scx200_gpio_dump); EXPORT_SYMBOL(scx200_cb_base); diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c index e6e52c48697..a1a56c5c8a8 100644 --- a/drivers/char/scx200_gpio.c +++ b/drivers/char/scx200_gpio.c @@ -41,6 +41,7 @@ static ssize_t scx200_gpio_write(struct file *file, const char __user *data, { unsigned m = iminor(file->f_dentry->d_inode); size_t i; + int err = 0; for (i = 0; i < len; ++i) { char c; @@ -77,8 +78,23 @@ static ssize_t scx200_gpio_write(struct file *file, const char __user *data, printk(KERN_INFO NAME ": GPIO%d pull up disabled\n", m); scx200_gpio_configure(m, ~4, 0); break; + + case 'v': + /* View Current pin settings */ + scx200_gpio_dump(m); + break; + case '\n': + /* end of settings string, do nothing */ + break; + default: + printk(KERN_ERR NAME + ": GPIO-%2d bad setting: chr<0x%2x>\n", m, + (int)c); + err++; } } + if (err) + return -EINVAL; /* full string handled, report error */ return len; } -- cgit v1.2.3-70-g09d2 From 9b170b8fdbd14a197ad26b4354f3810c65a96602 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 27 Jun 2006 02:54:17 -0700 Subject: [PATCH] chardev: GPIO for SCx200 & PC-8736x: refactor scx200_probe to better segregate _gpio initialization Pull shadow-reg initialization into separate function now, rather than doing it 2x later (scx200, pc8736x). When we revisit 2nd drvr below, it will be to reimplement an init function, rather than another refactor. Signed-off-by: Jim Cromie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/scx200.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 009e6aa16f7..27be55349c6 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -47,9 +47,17 @@ static struct pci_driver scx200_pci_driver = { static DEFINE_SPINLOCK(scx200_gpio_config_lock); -static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +static void __devinit scx200_init_shadow(void) { int bank; + + /* read the current values driven on the GPIO signals */ + for (bank = 0; bank < 2; ++bank) + scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); +} + +static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ unsigned base; if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || @@ -63,10 +71,7 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_ } scx200_gpio_base = base; - - /* read the current values driven on the GPIO signals */ - for (bank = 0; bank < 2; ++bank) - scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); + scx200_init_shadow(); } else { /* find the base of the Configuration Block */ -- cgit v1.2.3-70-g09d2 From 0e41ef3c51ea7dbb764616f60a90700647fc8518 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 27 Jun 2006 02:54:20 -0700 Subject: [PATCH] chardev: GPIO for SCx200 & PC-8736x: migrate gpio_dump to common module Since the meaning of config-bits is the same for scx200 and pc8736x _gpios, we can share a function to deliver this to user. Since it is called via the vtable, its also completely replaceable. For now, we keep using printk... Signed-off-by: Jim Cromie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/scx200.c | 16 ---------------- drivers/char/nsc_gpio.c | 16 +++++++++++++++- drivers/char/scx200_gpio.c | 4 ++-- 3 files changed, 17 insertions(+), 19 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 27be55349c6..edead73ebef 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -110,21 +110,6 @@ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) return config; } -void scx200_gpio_dump(unsigned index) -{ - u32 config = scx200_gpio_configure(index, ~0, 0); - - printk(KERN_INFO NAME ": GPIO-%02u: 0x%08lx %s %s %s %s %s %s %s\n", - index, (unsigned long) config, - (config & 1) ? "OE" : "TS", /* output enabled / tristate */ - (config & 2) ? "PP" : "OD", /* push pull / open drain */ - (config & 4) ? "PUE" : "PUD", /* pull up enabled/disabled */ - (config & 8) ? "LOCKED" : "", /* locked / unlocked */ - (config & 16) ? "LEVEL" : "EDGE", /* level/edge input */ - (config & 32) ? "HI" : "LO", /* trigger on rising/falling edge */ - (config & 64) ? "DEBOUNCE" : ""); /* debounce */ -} - static int __init scx200_init(void) { printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); @@ -144,5 +129,4 @@ module_exit(scx200_cleanup); EXPORT_SYMBOL(scx200_gpio_base); EXPORT_SYMBOL(scx200_gpio_shadow); EXPORT_SYMBOL(scx200_gpio_configure); -EXPORT_SYMBOL(scx200_gpio_dump); EXPORT_SYMBOL(scx200_cb_base); diff --git a/drivers/char/nsc_gpio.c b/drivers/char/nsc_gpio.c index 3842c272711..72b0a5791ed 100644 --- a/drivers/char/nsc_gpio.c +++ b/drivers/char/nsc_gpio.c @@ -19,6 +19,19 @@ #define NAME "nsc_gpio" +void nsc_gpio_dump(unsigned index, u32 config) +{ + printk(KERN_INFO NAME ": GPIO-%02u: 0x%08lx %s %s %s %s %s %s %s\n", + index, (unsigned long)config, + (config & 1) ? "OE" : "TS", /* output-enabled/tristate */ + (config & 2) ? "PP" : "OD", /* push pull / open drain */ + (config & 4) ? "PUE" : "PUD", /* pull up enabled/disabled */ + (config & 8) ? "LOCKED" : "", /* locked / unlocked */ + (config & 16) ? "LEVEL" : "EDGE",/* level/edge input */ + (config & 32) ? "HI" : "LO", /* trigger on rise/fall edge */ + (config & 64) ? "DEBOUNCE" : ""); /* debounce */ +} + ssize_t nsc_gpio_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { @@ -99,9 +112,10 @@ ssize_t nsc_gpio_read(struct file *file, char __user * buf, return 1; } -/* common routines for both scx200_gpio and pc87360_gpio */ +/* common file-ops routines for both scx200_gpio and pc87360_gpio */ EXPORT_SYMBOL(nsc_gpio_write); EXPORT_SYMBOL(nsc_gpio_read); +EXPORT_SYMBOL(nsc_gpio_dump); static int __init nsc_gpio_init(void) { diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c index eb9a8477759..442367b3f5d 100644 --- a/drivers/char/scx200_gpio.c +++ b/drivers/char/scx200_gpio.c @@ -35,7 +35,7 @@ static int major = 0; /* default to dynamic major */ module_param(major, int, 0); MODULE_PARM_DESC(major, "Major device number"); -extern void scx200_gpio_dump(unsigned index); +extern void nsc_gpio_dump(unsigned index); extern ssize_t nsc_gpio_write(struct file *file, const char __user *data, size_t len, loff_t *ppos); @@ -46,7 +46,7 @@ extern ssize_t nsc_gpio_read(struct file *file, char __user *buf, struct nsc_gpio_ops scx200_access = { .owner = THIS_MODULE, .gpio_config = scx200_gpio_configure, - .gpio_dump = scx200_gpio_dump, + .gpio_dump = nsc_gpio_dump, .gpio_get = scx200_gpio_get, .gpio_set = scx200_gpio_set, .gpio_set_high = scx200_gpio_set_high, -- cgit v1.2.3-70-g09d2 From 8bcf6135c3e8cdfab375f4041a48721483519eee Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 27 Jun 2006 02:54:25 -0700 Subject: [PATCH] chardev: GPIO for SCx200 & PC-8736x: replace spinlocks w mutexes Replace spinlocks guarding gpio config ops with mutexes. This is a me-too patch, and is justifiable insofar as mutexes have stricter semantics and better debugging support, so are preferred where they are applicable. Signed-off-by: Jim Cromie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/scx200.c | 8 ++++---- drivers/char/pc8736x_gpio.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index edead73ebef..9bf590cefc7 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -45,7 +46,7 @@ static struct pci_driver scx200_pci_driver = { .probe = scx200_probe, }; -static DEFINE_SPINLOCK(scx200_gpio_config_lock); +static DEFINE_MUTEX(scx200_gpio_config_lock); static void __devinit scx200_init_shadow(void) { @@ -95,9 +96,8 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) { u32 config, new_config; - unsigned long flags; - spin_lock_irqsave(&scx200_gpio_config_lock, flags); + mutex_lock(&scx200_gpio_config_lock); outl(index, scx200_gpio_base + 0x20); config = inl(scx200_gpio_base + 0x24); @@ -105,7 +105,7 @@ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) new_config = (config & mask) | bits; outl(new_config, scx200_gpio_base + 0x24); - spin_unlock_irqrestore(&scx200_gpio_config_lock, flags); + mutex_unlock(&scx200_gpio_config_lock); return config; } diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c index b8183d50833..72dcf222f14 100644 --- a/drivers/char/pc8736x_gpio.c +++ b/drivers/char/pc8736x_gpio.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,7 @@ static int major; /* default to dynamic major */ module_param(major, int, 0); MODULE_PARM_DESC(major, "Major device number"); -static DEFINE_SPINLOCK(pc8736x_gpio_config_lock); +static DEFINE_MUTEX(pc8736x_gpio_config_lock); static unsigned pc8736x_gpio_base; static u8 pc8736x_gpio_shadow[4]; @@ -119,9 +120,8 @@ static inline u32 pc8736x_gpio_configure_fn(unsigned index, u32 mask, u32 bits, u32 func_slct) { u32 config, new_config; - unsigned long flags; - spin_lock_irqsave(&pc8736x_gpio_config_lock, flags); + mutex_lock(&pc8736x_gpio_config_lock); device_select(SIO_GPIO_UNIT); select_pin(index); @@ -133,7 +133,7 @@ static inline u32 pc8736x_gpio_configure_fn(unsigned index, u32 mask, u32 bits, new_config = (config & mask) | bits; superio_outb(func_slct, new_config); - spin_unlock_irqrestore(&pc8736x_gpio_config_lock, flags); + mutex_unlock(&pc8736x_gpio_config_lock); return config; } -- cgit v1.2.3-70-g09d2 From 5c45bf279d378d436ce45825c0f136696c7b6109 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Tue, 27 Jun 2006 02:54:42 -0700 Subject: [PATCH] sched: mc/smt power savings sched policy sysfs entries 'sched_mc_power_savings' and 'sched_smt_power_savings' in /sys/devices/system/cpu/ control the MC/SMT power savings policy for the scheduler. Based on the values (1-enable, 0-disable) for these controls, sched groups cpu power will be determined for different domains. When power savings policy is enabled and under light load conditions, scheduler will minimize the physical packages/cpu cores carrying the load and thus conserving power(with a perf impact based on the workload characteristics... see OLS 2005 CMP kernel scheduler paper for more details..) Signed-off-by: Suresh Siddha Cc: Ingo Molnar Cc: Nick Piggin Cc: Con Kolivas Cc: "Chen, Kenneth W" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 8 +- arch/x86_64/kernel/smpboot.c | 8 +- drivers/base/cpu.c | 10 +- include/asm-i386/topology.h | 5 + include/asm-ia64/topology.h | 1 + include/asm-powerpc/topology.h | 5 + include/asm-sparc64/topology.h | 3 + include/asm-x86_64/topology.h | 2 + include/linux/sched.h | 10 ++ include/linux/topology.h | 3 +- kernel/sched.c | 240 ++++++++++++++++++++++++++++++++++++----- 11 files changed, 262 insertions(+), 33 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index ab5275beddf..89e7315e539 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -448,10 +448,12 @@ cpumask_t cpu_coregroup_map(int cpu) struct cpuinfo_x86 *c = cpu_data + cpu; /* * For perf, we return last level cache shared map. - * TBD: when power saving sched policy is added, we will return - * cpu_core_map when power saving policy is enabled + * And for power savings, we return cpu_core_map */ - return c->llc_shared_map; + if (sched_mc_power_savings || sched_smt_power_savings) + return cpu_core_map[cpu]; + else + return c->llc_shared_map; } /* representing cpus for which sibling maps can be computed */ diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 4e9755179ec..540c0ccbccc 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -455,10 +455,12 @@ cpumask_t cpu_coregroup_map(int cpu) struct cpuinfo_x86 *c = cpu_data + cpu; /* * For perf, we return last level cache shared map. - * TBD: when power saving sched policy is added, we will return - * cpu_core_map when power saving policy is enabled + * And for power savings, we return cpu_core_map */ - return c->llc_shared_map; + if (sched_mc_power_savings || sched_smt_power_savings) + return cpu_core_map[cpu]; + else + return c->llc_shared_map; } /* representing cpus for which sibling maps can be computed */ diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 3972d8ac978..4bef76a2f3f 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -143,5 +143,13 @@ EXPORT_SYMBOL_GPL(get_cpu_sysdev); int __init cpu_dev_init(void) { - return sysdev_class_register(&cpu_sysdev_class); + int err; + + err = sysdev_class_register(&cpu_sysdev_class); +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) + if (!err) + err = sched_create_sysfs_power_savings_entries(&cpu_sysdev_class); +#endif + + return err; } diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index aa4185ee81f..6adbd9b1ae8 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -112,4 +112,9 @@ extern unsigned long node_remap_size[]; extern cpumask_t cpu_coregroup_map(int cpu); +#ifdef CONFIG_SMP +#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define smt_capable() (smp_num_siblings > 1) +#endif + #endif /* _ASM_I386_TOPOLOGY_H */ diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index 616b5ed2aa7..937c2125752 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h @@ -112,6 +112,7 @@ void build_cpu_to_node_map(void); #define topology_core_id(cpu) (cpu_data(cpu)->core_id) #define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_thread_siblings(cpu) (cpu_sibling_map[cpu]) +#define smt_capable() (smp_num_siblings > 1) #endif #include diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h index 92f3e5507d2..bbc3844b086 100644 --- a/include/asm-powerpc/topology.h +++ b/include/asm-powerpc/topology.h @@ -93,5 +93,10 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev, #endif /* CONFIG_NUMA */ +#ifdef CONFIG_SMP +#include +#define smt_capable() (cpu_has_feature(CPU_FTR_SMT)) +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TOPOLOGY_H */ diff --git a/include/asm-sparc64/topology.h b/include/asm-sparc64/topology.h index 0e234e201bd..98a6c613589 100644 --- a/include/asm-sparc64/topology.h +++ b/include/asm-sparc64/topology.h @@ -1,6 +1,9 @@ #ifndef _ASM_SPARC64_TOPOLOGY_H #define _ASM_SPARC64_TOPOLOGY_H +#include +#define smt_capable() (tlb_type == hypervisor) + #include #endif /* _ASM_SPARC64_TOPOLOGY_H */ diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index c4e46e7fa7b..6e7a2e976b0 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -59,6 +59,8 @@ extern int __node_distance(int, int); #define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id) #define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_thread_siblings(cpu) (cpu_sibling_map[cpu]) +#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define smt_capable() (smp_num_siblings > 1) #endif #include diff --git a/include/linux/sched.h b/include/linux/sched.h index ab8ffc54423..0bc81a151e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -570,6 +570,11 @@ enum idle_type #define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ #define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ #define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ +#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ + +#define BALANCE_FOR_POWER ((sched_mc_power_savings || sched_smt_power_savings) \ + ? SD_POWERSAVINGS_BALANCE : 0) + struct sched_group { struct sched_group *next; /* Must be a circular list */ @@ -1412,6 +1417,11 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) extern long sched_setaffinity(pid_t pid, cpumask_t new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); +#include +extern int sched_mc_power_savings, sched_smt_power_savings; +extern struct sysdev_attribute attr_sched_mc_power_savings, attr_sched_smt_power_savings; +extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls); + extern void normalize_rt_tasks(void); #ifdef CONFIG_PM diff --git a/include/linux/topology.h b/include/linux/topology.h index a305ae2e44b..ec1eca85290 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -134,7 +134,8 @@ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE, \ + | SD_WAKE_AFFINE \ + | BALANCE_FOR_POWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ diff --git a/kernel/sched.c b/kernel/sched.c index 122b75584a1..54fa282657c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1162,6 +1162,11 @@ static int sched_balance_self(int cpu, int flag) struct sched_domain *tmp, *sd = NULL; for_each_domain(cpu, tmp) { + /* + * If power savings logic is enabled for a domain, stop there. + */ + if (tmp->flags & SD_POWERSAVINGS_BALANCE) + break; if (tmp->flags & flag) sd = tmp; } @@ -2082,6 +2087,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long busiest_load_per_task, busiest_nr_running; unsigned long this_load_per_task, this_nr_running; int load_idx; +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) + int power_savings_balance = 1; + unsigned long leader_nr_running = 0, min_load_per_task = 0; + unsigned long min_nr_running = ULONG_MAX; + struct sched_group *group_min = NULL, *group_leader = NULL; +#endif max_load = this_load = total_load = total_pwr = 0; busiest_load_per_task = busiest_nr_running = 0; @@ -2094,7 +2105,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, load_idx = sd->idle_idx; do { - unsigned long load; + unsigned long load, group_capacity; int local_group; int i; unsigned long sum_nr_running, sum_weighted_load; @@ -2127,18 +2138,76 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, /* Adjust by relative CPU power of the group */ avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; + group_capacity = group->cpu_power / SCHED_LOAD_SCALE; + if (local_group) { this_load = avg_load; this = group; this_nr_running = sum_nr_running; this_load_per_task = sum_weighted_load; } else if (avg_load > max_load && - sum_nr_running > group->cpu_power / SCHED_LOAD_SCALE) { + sum_nr_running > group_capacity) { max_load = avg_load; busiest = group; busiest_nr_running = sum_nr_running; busiest_load_per_task = sum_weighted_load; } + +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) + /* + * Busy processors will not participate in power savings + * balance. + */ + if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) + goto group_next; + + /* + * If the local group is idle or completely loaded + * no need to do power savings balance at this domain + */ + if (local_group && (this_nr_running >= group_capacity || + !this_nr_running)) + power_savings_balance = 0; + + /* + * If a group is already running at full capacity or idle, + * don't include that group in power savings calculations + */ + if (!power_savings_balance || sum_nr_running >= group_capacity + || !sum_nr_running) + goto group_next; + + /* + * Calculate the group which has the least non-idle load. + * This is the group from where we need to pick up the load + * for saving power + */ + if ((sum_nr_running < min_nr_running) || + (sum_nr_running == min_nr_running && + first_cpu(group->cpumask) < + first_cpu(group_min->cpumask))) { + group_min = group; + min_nr_running = sum_nr_running; + min_load_per_task = sum_weighted_load / + sum_nr_running; + } + + /* + * Calculate the group which is almost near its + * capacity but still has some space to pick up some load + * from other group and save more power + */ + if (sum_nr_running <= group_capacity - 1) + if (sum_nr_running > leader_nr_running || + (sum_nr_running == leader_nr_running && + first_cpu(group->cpumask) > + first_cpu(group_leader->cpumask))) { + group_leader = group; + leader_nr_running = sum_nr_running; + } + +group_next: +#endif group = group->next; } while (group != sd->groups); @@ -2247,7 +2316,16 @@ small_imbalance: return busiest; out_balanced: +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) + if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) + goto ret; + if (this == group_leader && group_leader != group_min) { + *imbalance = min_load_per_task; + return group_min; + } +ret: +#endif *imbalance = 0; return NULL; } @@ -2300,7 +2378,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, int active_balance = 0; int sd_idle = 0; - if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER) + if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && + !sched_smt_power_savings) sd_idle = 1; schedstat_inc(sd, lb_cnt[idle]); @@ -2389,7 +2468,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, sd->balance_interval *= 2; } - if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER) + if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && + !sched_smt_power_savings) return -1; return nr_moved; @@ -2404,7 +2484,7 @@ out_one_pinned: (sd->balance_interval < sd->max_interval)) sd->balance_interval *= 2; - if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) + if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) return -1; return 0; } @@ -2425,7 +2505,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, int nr_moved = 0; int sd_idle = 0; - if (sd->flags & SD_SHARE_CPUPOWER) + if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) sd_idle = 1; schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); @@ -2466,7 +2546,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, out_balanced: schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); - if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) + if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) return -1; sd->nr_balance_failed = 0; return 0; @@ -5732,6 +5812,7 @@ static cpumask_t sched_domain_node_span(int node) } #endif +int sched_smt_power_savings = 0, sched_mc_power_savings = 0; /* * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we * can switch it on easily if needed. @@ -6113,37 +6194,72 @@ static int build_sched_domains(const cpumask_t *cpu_map) #endif /* Calculate CPU power for physical packages and nodes */ +#ifdef CONFIG_SCHED_SMT for_each_cpu_mask(i, *cpu_map) { - int power; struct sched_domain *sd; -#ifdef CONFIG_SCHED_SMT sd = &per_cpu(cpu_domains, i); - power = SCHED_LOAD_SCALE; - sd->groups->cpu_power = power; + sd->groups->cpu_power = SCHED_LOAD_SCALE; + } #endif #ifdef CONFIG_SCHED_MC + for_each_cpu_mask(i, *cpu_map) { + int power; + struct sched_domain *sd; sd = &per_cpu(core_domains, i); - power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) + if (sched_smt_power_savings) + power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask); + else + power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) * SCHED_LOAD_SCALE / 10; sd->groups->cpu_power = power; + } +#endif + for_each_cpu_mask(i, *cpu_map) { + struct sched_domain *sd; +#ifdef CONFIG_SCHED_MC sd = &per_cpu(phys_domains, i); + if (i != first_cpu(sd->groups->cpumask)) + continue; - /* - * This has to be < 2 * SCHED_LOAD_SCALE - * Lets keep it SCHED_LOAD_SCALE, so that - * while calculating NUMA group's cpu_power - * we can simply do - * numa_group->cpu_power += phys_group->cpu_power; - * - * See "only add power once for each physical pkg" - * comment below - */ - sd->groups->cpu_power = SCHED_LOAD_SCALE; + sd->groups->cpu_power = 0; + if (sched_mc_power_savings || sched_smt_power_savings) { + int j; + + for_each_cpu_mask(j, sd->groups->cpumask) { + struct sched_domain *sd1; + sd1 = &per_cpu(core_domains, j); + /* + * for each core we will add once + * to the group in physical domain + */ + if (j != first_cpu(sd1->groups->cpumask)) + continue; + + if (sched_smt_power_savings) + sd->groups->cpu_power += sd1->groups->cpu_power; + else + sd->groups->cpu_power += SCHED_LOAD_SCALE; + } + } else + /* + * This has to be < 2 * SCHED_LOAD_SCALE + * Lets keep it SCHED_LOAD_SCALE, so that + * while calculating NUMA group's cpu_power + * we can simply do + * numa_group->cpu_power += phys_group->cpu_power; + * + * See "only add power once for each physical pkg" + * comment below + */ + sd->groups->cpu_power = SCHED_LOAD_SCALE; #else + int power; sd = &per_cpu(phys_domains, i); - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; + if (sched_smt_power_savings) + power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask); + else + power = SCHED_LOAD_SCALE; sd->groups->cpu_power = power; #endif } @@ -6244,6 +6360,80 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) return err; } +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) +int arch_reinit_sched_domains(void) +{ + int err; + + lock_cpu_hotplug(); + detach_destroy_domains(&cpu_online_map); + err = arch_init_sched_domains(&cpu_online_map); + unlock_cpu_hotplug(); + + return err; +} + +static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) +{ + int ret; + + if (buf[0] != '0' && buf[0] != '1') + return -EINVAL; + + if (smt) + sched_smt_power_savings = (buf[0] == '1'); + else + sched_mc_power_savings = (buf[0] == '1'); + + ret = arch_reinit_sched_domains(); + + return ret ? ret : count; +} + +int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +{ + int err = 0; +#ifdef CONFIG_SCHED_SMT + if (smt_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_smt_power_savings.attr); +#endif +#ifdef CONFIG_SCHED_MC + if (!err && mc_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_mc_power_savings.attr); +#endif + return err; +} +#endif + +#ifdef CONFIG_SCHED_MC +static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) +{ + return sprintf(page, "%u\n", sched_mc_power_savings); +} +static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) +{ + return sched_power_savings_store(buf, count, 0); +} +SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, + sched_mc_power_savings_store); +#endif + +#ifdef CONFIG_SCHED_SMT +static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page) +{ + return sprintf(page, "%u\n", sched_smt_power_savings); +} +static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) +{ + return sched_power_savings_store(buf, count, 1); +} +SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, + sched_smt_power_savings_store); +#endif + + #ifdef CONFIG_HOTPLUG_CPU /* * Force a reinitialization of the sched domains hierarchy. The domains -- cgit v1.2.3-70-g09d2 From f9b8404cf8f8456dfa83459510762b700dc00385 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jun 2006 02:54:49 -0700 Subject: [PATCH] pi-futex: introduce debug_check_no_locks_freed() Add debug_check_no_locks_freed(), as a central inline to add bad-lock-free-debugging functionality to. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/pageattr.c | 4 ++-- include/linux/mm.h | 10 ++++++++-- mm/page_alloc.c | 4 ++-- mm/slab.c | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 0887b34bc59..353a836ed63 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -229,8 +229,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) if (PageHighMem(page)) return; if (!enable) - mutex_debug_check_no_locks_freed(page_address(page), - numpages * PAGE_SIZE); + debug_check_no_locks_freed(page_address(page), + numpages * PAGE_SIZE); /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. diff --git a/include/linux/mm.h b/include/linux/mm.h index ff1fa87df8d..0ac255720f3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1030,13 +1030,19 @@ static inline void vm_stat_account(struct mm_struct *mm, } #endif /* CONFIG_PROC_FS */ +static inline void +debug_check_no_locks_freed(const void *from, unsigned long len) +{ + mutex_debug_check_no_locks_freed(from, len); +} + #ifndef CONFIG_DEBUG_PAGEALLOC static inline void kernel_map_pages(struct page *page, int numpages, int enable) { if (!PageHighMem(page) && !enable) - mutex_debug_check_no_locks_freed(page_address(page), - numpages * PAGE_SIZE); + debug_check_no_locks_freed(page_address(page), + numpages * PAGE_SIZE); } #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dafd12ec7a0..084a2de7e52 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -446,8 +446,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) arch_free_page(page, order); if (!PageHighMem(page)) - mutex_debug_check_no_locks_freed(page_address(page), - PAGE_SIZE< Date: Tue, 27 Jun 2006 02:55:09 -0700 Subject: [PATCH] do_IRQ() warning fix arch/i386/kernel/irq.c: In function 'do_IRQ': arch/i386/kernel/irq.c:104: warning: suggest parentheses around arithmetic in operand of | Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 586b15f6741..c703bc7b088 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -100,8 +100,8 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) * softirq checks work in the hardirq context. */ irqctx->tinfo.preempt_count = - irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK | - curctx->tinfo.preempt_count & SOFTIRQ_MASK; + (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | + (curctx->tinfo.preempt_count & SOFTIRQ_MASK); asm volatile( " xchgl %%ebx,%%esp \n" -- cgit v1.2.3-70-g09d2 From 3c101cf024c3f126e174bf0d8946c587127e3c30 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 26 Jun 2006 21:33:09 -0500 Subject: [PATCH] voyager: add cpu_present_map Voyager stopped booting some time in the 2.6.16-2.6.17 timeframe; the reason was that it doesn't have a cpu_present_map, so add one. Signed-off-by: James Bottomley Signed-off-by: Linus Torvalds --- arch/i386/mach-voyager/voyager_smp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 70e560a1b79..8242af9ebc6 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -661,6 +661,7 @@ do_boot_cpu(__u8 cpu) print_cpu_info(&cpu_data[cpu]); wmb(); cpu_set(cpu, cpu_callout_map); + cpu_set(cpu, cpu_present_map); } else { printk("CPU%d FAILED TO BOOT: ", cpu); @@ -1912,6 +1913,7 @@ void __devinit smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callout_map); cpu_set(smp_processor_id(), cpu_possible_map); + cpu_set(smp_processor_id(), cpu_present_map); } int __devinit -- cgit v1.2.3-70-g09d2 From 79bc79b07c9c6f8ae9290704e9e503a9327fcbb2 Mon Sep 17 00:00:00 2001 From: "pageexec@freemail.hu" Date: Wed, 28 Jun 2006 20:44:16 +0200 Subject: [PATCH] small fix for not releasing the mmap semaphore in i386/arch_setup_additional_pages the VDSO randomization code on i386 fails to release the mmap semaphore if insert_vm_struct() fails. [ Made the conditional unlikely. -- Linus ] Signed-off-by: Linus Torvalds --- arch/i386/kernel/sysenter.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index c60419dee01..713ba39d32c 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -148,8 +148,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) vma->vm_mm = mm; ret = insert_vm_struct(mm, vma); - if (ret) - goto free_vma; + if (unlikely(ret)) { + kmem_cache_free(vm_area_cachep, vma); + goto up_fail; + } current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return = @@ -158,10 +160,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) up_fail: up_write(&mm->mmap_sem); return ret; - -free_vma: - kmem_cache_free(vm_area_cachep, vma); - return ret; } const char *arch_vma_name(struct vm_area_struct *vma) -- cgit v1.2.3-70-g09d2 From a052b68b1e7a31f1e6a721290035e9deb0f6fed9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 28 Jun 2006 04:26:43 -0700 Subject: [PATCH] x86: do_IRQ(): check irq number We recently changed x86 to handle more than 256 IRQs. Add a check in do_IRQ() just to make sure that nothing went wrong with that implementation. [chrisw@sous-sol.org: do x86_64 too] Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Rusty Russell Cc: Andi Kleen Cc: Chris Wright Cc: "Protasevich, Natalie" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/irq.c | 6 ++++++ arch/x86_64/kernel/irq.c | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index c703bc7b088..9eec9435318 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -60,6 +60,12 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) u32 *isp; #endif + if (unlikely((unsigned)irq >= NR_IRQS)) { + printk(KERN_EMERG "%s: cannot handle IRQ %d\n", + __FUNCTION__, irq); + BUG(); + } + irq_enter(); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 3be0a7e4bf0..bfa82f52a5c 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -118,6 +118,12 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) /* high bit used in ret_from_ code */ unsigned irq = ~regs->orig_rax; + if (unlikely(irq >= NR_IRQS)) { + printk(KERN_EMERG "%s: cannot handle IRQ %d\n", + __FUNCTION__, irq); + BUG(); + } + exit_idle(); irq_enter(); #ifdef CONFIG_DEBUG_STACKOVERFLOW -- cgit v1.2.3-70-g09d2 From 0686cd8fbe3e5fb1441ae84b9cbc813f9297b879 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 29 Jun 2006 02:24:25 -0700 Subject: [PATCH] fix sgivwfb compile drivers/built-in.o: In function `sgivwfb_set_par': sgivwfb.c:(.text+0x88583): undefined reference to `sgivwfb_mem_phys' sgivwfb.c:(.text+0x88596): undefined reference to `sgivwfb_mem_phys' sgivwfb.c:(.text+0x885a8): undefined reference to `sgivwfb_mem_phys' drivers/built-in.o: In function `sgivwfb_check_var': sgivwfb.c:(.text+0x88ad0): undefined reference to `sgivwfb_mem_size' drivers/built-in.o: In function `sgivwfb_mmap': sgivwfb.c:(.text+0x88c75): undefined reference to `sgivwfb_mem_size' sgivwfb.c:(.text+0x88c7f): undefined reference to `sgivwfb_mem_phys' drivers/built-in.o: In function `sgivwfb_probe': sgivwfb.c:(.init.text+0x4060): undefined reference to `sgivwfb_mem_size' sgivwfb.c:(.init.text+0x4065): undefined reference to `sgivwfb_mem_phys' sgivwfb.c:(.init.text+0x4076): undefined reference to `sgivwfb_mem_phys' sgivwfb.c:(.init.text+0x409c): undefined reference to `sgivwfb_mem_size' sgivwfb.c:(.init.text+0x410e): undefined reference to `sgivwfb_mem_size' sgivwfb.c:(.init.text+0x4113): undefined reference to `sgivwfb_mem_phys' sgivwfb.c:(.init.text+0x4162): undefined reference to `sgivwfb_mem_size' sgivwfb.c:(.init.text+0x4168): undefined reference to `sgivwfb_mem_phys' make: *** [.tmp_vmlinux1] Error 1 Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mach-visws/setup.c | 4 ++-- drivers/video/sgivwfb.c | 6 ++---- include/asm-i386/mach-visws/setup_arch.h | 3 +++ 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c index 8a9e1a6f745..11e83a0e1d6 100644 --- a/arch/i386/mach-visws/setup.c +++ b/arch/i386/mach-visws/setup.c @@ -140,8 +140,8 @@ void __init time_init_hook(void) #define MB (1024 * 1024) -static unsigned long sgivwfb_mem_phys; -static unsigned long sgivwfb_mem_size; +unsigned long sgivwfb_mem_phys; +unsigned long sgivwfb_mem_size; long long mem_size __initdata = 0; diff --git a/drivers/video/sgivwfb.c b/drivers/video/sgivwfb.c index 2e6df1fcb2b..c0cc5e3ba7b 100644 --- a/drivers/video/sgivwfb.c +++ b/drivers/video/sgivwfb.c @@ -23,6 +23,8 @@ #include #include +#include + #define INCLUDE_TIMING_TABLE_DATA #define DBE_REG_BASE par->regs #include