From 4ed0d3e6c64cfd9ba4ceb2099b10d1cf8ece4320 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 24 Apr 2009 17:30:20 -0700 Subject: Intel IOMMU Pass Through Support The patch adds kernel parameter intel_iommu=pt to set up pass through mode in context mapping entry. This disables DMAR in linux kernel; but KVM still runs on VT-d and interrupt remapping still works. In this mode, kernel uses swiotlb for DMA API functions but other VT-d functionalities are enabled for KVM. KVM always uses multi level translation page table in VT-d. By default, pass though mode is disabled in kernel. This is useful when people don't want to enable VT-d DMAR in kernel but still want to use KVM and interrupt remapping for reasons like DMAR performance concern or debug purpose. Signed-off-by: Fenghua Yu Acked-by: Weidong Han Signed-off-by: David Woodhouse --- arch/x86/include/asm/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index af326a2975b..fd6d21bbee6 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,6 +6,7 @@ extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern int iommu_pass_through; /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) -- cgit v1.2.3-70-g09d2 From 1f82de10d6b1d845155363c895c552e61b36b51a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 23 Apr 2009 20:48:32 -0700 Subject: PCI/x86: don't assume prefetchable ranges are 64bit We should not assign 64bit ranges to PCI devices that only take 32bit prefetchable addresses. Try to set IORESOURCE_MEM_64 in 64bit resource of pci_device/pci_bridge and make the bus resource only have that bit set when all devices under it support 64bit prefetchable memory. Use that flag to allocate resources from that range. Reported-by: Yannick Reviewed-by: Ivan Kokshaysky Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci.h | 1 + drivers/pci/bus.c | 7 ++++++- drivers/pci/probe.c | 9 ++++++-- drivers/pci/setup-bus.c | 52 +++++++++++++++++++++++++++++++++++++--------- include/linux/ioport.h | 2 ++ include/linux/pci.h | 4 ++++ 6 files changed, 62 insertions(+), 13 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index b51a1e8b0ba..927958d13c1 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -130,6 +130,7 @@ extern void pci_iommu_alloc(void); /* generic pci stuff */ #include +#define PCIBIOS_MAX_MEM_32 0xffffffff #ifdef CONFIG_NUMA /* Returns the node based on pci bus */ diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 97a8194063b..40af27f3104 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -41,9 +41,14 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, void *alignf_data) { int i, ret = -ENOMEM; + resource_size_t max = -1; type_mask |= IORESOURCE_IO | IORESOURCE_MEM; + /* don't allocate too high if the pref mem doesn't support 64bit*/ + if (!(res->flags & IORESOURCE_MEM_64)) + max = PCIBIOS_MAX_MEM_32; + for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) { struct resource *r = bus->resource[i]; if (!r) @@ -62,7 +67,7 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, /* Ok, try it out.. */ ret = allocate_resource(r, res, size, r->start ? : min, - -1, align, + max, align, alignf, alignf_data); if (ret == 0) break; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f1ae2475fff..b962326e3d9 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -193,7 +193,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN; if (type == pci_bar_io) { l &= PCI_BASE_ADDRESS_IO_MASK; - mask = PCI_BASE_ADDRESS_IO_MASK & 0xffff; + mask = PCI_BASE_ADDRESS_IO_MASK & IO_SPACE_LIMIT; } else { l &= PCI_BASE_ADDRESS_MEM_MASK; mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; @@ -237,6 +237,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, dev_printk(KERN_DEBUG, &dev->dev, "reg %x 64bit mmio: %pR\n", pos, res); } + + res->flags |= IORESOURCE_MEM_64; } else { sz = pci_size(l, sz, mask); @@ -362,7 +364,10 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) } } if (base <= limit) { - res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; + res->flags = (mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) | + IORESOURCE_MEM | IORESOURCE_PREFETCH; + if (res->flags & PCI_PREF_RANGE_TYPE_64) + res->flags |= IORESOURCE_MEM_64; res->start = base; res->end = limit + 0xfffff; dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n", diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index a00f85471b6..e1c360a5b0d 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -143,6 +143,7 @@ static void pci_setup_bridge(struct pci_bus *bus) struct pci_dev *bridge = bus->self; struct pci_bus_region region; u32 l, bu, lu, io_upper16; + int pref_mem64; if (pci_is_enabled(bridge)) return; @@ -198,16 +199,22 @@ static void pci_setup_bridge(struct pci_bus *bus) pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0); /* Set up PREF base/limit. */ + pref_mem64 = 0; bu = lu = 0; pcibios_resource_to_bus(bridge, ®ion, bus->resource[2]); if (bus->resource[2]->flags & IORESOURCE_PREFETCH) { + int width = 8; l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; - bu = upper_32_bits(region.start); - lu = upper_32_bits(region.end); - dev_info(&bridge->dev, " PREFETCH window: %#016llx-%#016llx\n", - (unsigned long long)region.start, - (unsigned long long)region.end); + if (bus->resource[2]->flags & IORESOURCE_MEM_64) { + pref_mem64 = 1; + bu = upper_32_bits(region.start); + lu = upper_32_bits(region.end); + width = 16; + } + dev_info(&bridge->dev, " PREFETCH window: %#0*llx-%#0*llx\n", + width, (unsigned long long)region.start, + width, (unsigned long long)region.end); } else { l = 0x0000fff0; @@ -215,9 +222,11 @@ static void pci_setup_bridge(struct pci_bus *bus) } pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l); - /* Set the upper 32 bits of PREF base & limit. */ - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu); - pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu); + if (pref_mem64) { + /* Set the upper 32 bits of PREF base & limit. */ + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu); + pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu); + } pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl); } @@ -255,8 +264,25 @@ static void pci_bridge_check_ranges(struct pci_bus *bus) pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0); } - if (pmem) + if (pmem) { b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH; + if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) + b_res[2].flags |= IORESOURCE_MEM_64; + } + + /* double check if bridge does support 64 bit pref */ + if (b_res[2].flags & IORESOURCE_MEM_64) { + u32 mem_base_hi, tmp; + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, + &mem_base_hi); + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, + 0xffffffff); + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp); + if (!tmp) + b_res[2].flags &= ~IORESOURCE_MEM_64; + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, + mem_base_hi); + } } /* Helper function for sizing routines: find first available @@ -336,6 +362,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long resource_size_t aligns[12]; /* Alignments from 1Mb to 2Gb */ int order, max_order; struct resource *b_res = find_free_bus_resource(bus, type); + unsigned int mem64_mask = 0; if (!b_res) return 0; @@ -344,9 +371,12 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long max_order = 0; size = 0; + mem64_mask = b_res->flags & IORESOURCE_MEM_64; + b_res->flags &= ~IORESOURCE_MEM_64; + list_for_each_entry(dev, &bus->devices, bus_list) { int i; - + for (i = 0; i < PCI_NUM_RESOURCES; i++) { struct resource *r = &dev->resource[i]; resource_size_t r_size; @@ -372,6 +402,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long aligns[order] += align; if (order > max_order) max_order = order; + mem64_mask &= r->flags & IORESOURCE_MEM_64; } } @@ -396,6 +427,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long b_res->start = min_align; b_res->end = size + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN; + b_res->flags |= mem64_mask; return 1; } diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 32e4b2f7229..786e7b8cece 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,6 +49,8 @@ struct resource_list { #define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */ +#define IORESOURCE_MEM_64 0x00100000 + #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 diff --git a/include/linux/pci.h b/include/linux/pci.h index 72698d89e76..6dfa47d25ba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1097,6 +1097,10 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, #include +#ifndef PCIBIOS_MAX_MEM_32 +#define PCIBIOS_MAX_MEM_32 (-1) +#endif + /* these helpers provide future and backwards compatibility * for accessing popular PCI BAR info */ #define pci_resource_start(dev, bar) ((dev)->resource[(bar)].start) -- cgit v1.2.3-70-g09d2 From 9e9f46c44e487af0a82eb61b624553e2f7118f5b Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 11 Jun 2009 10:58:28 -0700 Subject: PCI: use ACPI _CRS data by default At this point, it seems to solve more problems than it causes, so let's try using it by default. It's an easy revert if it ends up causing trouble. Reviewed-by: Yinghai Lu Acked-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 2 +- arch/x86/include/asm/pci_x86.h | 2 +- arch/x86/pci/acpi.c | 2 +- arch/x86/pci/amd_bus.c | 2 +- arch/x86/pci/common.c | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fd5cac01303..7bdaf508040 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1787,7 +1787,7 @@ and is between 256 and 4096 characters. It is defined in the file IRQ routing is enabled. noacpi [X86] Do not use ACPI for IRQ routing or for PCI scanning. - use_crs [X86] Use _CRS for PCI resource + nocrs [X86] Don't use _CRS for PCI resource allocation. routeirq Do IRQ routing for all PCI devices. This is normally done in pci_enable_device(), diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index e60fd3e14bd..cb739cc0a08 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -25,7 +25,7 @@ #define PCI_BIOS_IRQ_SCAN 0x2000 #define PCI_ASSIGN_ALL_BUSSES 0x4000 #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 -#define PCI_USE__CRS 0x10000 +#define PCI_NO_ROOT_CRS 0x10000 #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 #define PCI_HAS_IO_ECS 0x40000 #define PCI_NOASSIGN_ROMS 0x80000 diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index c0ecf250fe5..8d898e0d360 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -217,7 +217,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do #endif } - if (bus && (pci_probe & PCI_USE__CRS)) + if (bus && !(pci_probe & PCI_NO_ROOT_CRS)) get_current_resources(device, busnum, domain, bus); return bus; } diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index f893d6a6e80..2255f880678 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -101,7 +101,7 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) struct pci_root_info *info; /* don't go for it if _CRS is used */ - if (pci_probe & PCI_USE__CRS) + if (!(pci_probe & PCI_NO_ROOT_CRS)) return; /* if only one root bus, don't need to anything */ diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2202b6257b8..4740119e4bb 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -515,8 +515,8 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "assign-busses")) { pci_probe |= PCI_ASSIGN_ALL_BUSSES; return NULL; - } else if (!strcmp(str, "use_crs")) { - pci_probe |= PCI_USE__CRS; + } else if (!strcmp(str, "nocrs")) { + pci_probe |= PCI_NO_ROOT_CRS; return NULL; } else if (!strcmp(str, "earlydump")) { pci_early_dump_regs = 1; -- cgit v1.2.3-70-g09d2 From c4bf2f372db09ef8d16a25a60d523bfa1c50f7b5 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 11 Jun 2009 23:53:55 -0400 Subject: ACPI, PCI, x86: move MCFG parsing routine from ACPI to PCI file Move arch/x86/kernel/acpi/boot.c: acpi_parse_mcfg() to arch/x86/pci/mmconfig-shared.c: pci_parse_mcfg() where it is used, and make it static. Move associated globals and helper routine with it. No functional change. This code move is in preparation for SFI support, which will allow the PCI code to find the MCFG table on systems which do not support ACPI. Signed-off-by: Len Brown Acked-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 3 ++ arch/x86/kernel/acpi/boot.c | 66 ------------------------------------------ arch/x86/pci/mmconfig-shared.c | 65 ++++++++++++++++++++++++++++++++++++++++- include/linux/acpi.h | 3 -- 4 files changed, 67 insertions(+), 70 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index e60fd3e14bd..b399988eee3 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -121,6 +121,9 @@ extern int __init pcibios_init(void); extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); +extern struct acpi_mcfg_allocation *pci_mmcfg_config; +extern int pci_mmcfg_config_num; + /* * AMD Fam10h CPUs are buggy, and cannot access MMIO config space * on their northbrige except through the * %eax register. As such, you MUST diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 817d6a5e115..f54e0e557cd 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -117,72 +117,6 @@ void __init __acpi_unmap_table(char *map, unsigned long size) early_iounmap(map, size); } -#ifdef CONFIG_PCI_MMCONFIG - -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; - -/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ -struct acpi_mcfg_allocation *pci_mmcfg_config; -int pci_mmcfg_config_num; - -static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) -{ - if (!strcmp(mcfg->header.oem_id, "SGI")) - acpi_mcfg_64bit_base_addr = TRUE; - - return 0; -} - -int __init acpi_parse_mcfg(struct acpi_table_header *header) -{ - struct acpi_table_mcfg *mcfg; - unsigned long i; - int config_size; - - if (!header) - return -EINVAL; - - mcfg = (struct acpi_table_mcfg *)header; - - /* how many config structures do we have */ - pci_mmcfg_config_num = 0; - i = header->length - sizeof(struct acpi_table_mcfg); - while (i >= sizeof(struct acpi_mcfg_allocation)) { - ++pci_mmcfg_config_num; - i -= sizeof(struct acpi_mcfg_allocation); - }; - if (pci_mmcfg_config_num == 0) { - printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); - return -ENODEV; - } - - config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); - pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); - if (!pci_mmcfg_config) { - printk(KERN_WARNING PREFIX - "No memory for MCFG config tables\n"); - return -ENOMEM; - } - - memcpy(pci_mmcfg_config, &mcfg[1], config_size); - - acpi_mcfg_oem_check(mcfg); - - for (i = 0; i < pci_mmcfg_config_num; ++i) { - if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && - !acpi_mcfg_64bit_base_addr) { - printk(KERN_ERR PREFIX - "MMCONFIG not in low 4GB of memory\n"); - kfree(pci_mmcfg_config); - pci_mmcfg_config_num = 0; - return -ENODEV; - } - } - - return 0; -} -#endif /* CONFIG_PCI_MMCONFIG */ - #ifdef CONFIG_X86_LOCAL_APIC static int __init acpi_parse_madt(struct acpi_table_header *table) { diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8766b0e216c..712443ec6d4 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -523,6 +523,69 @@ reject: static int __initdata known_bridge; +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + +/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ +struct acpi_mcfg_allocation *pci_mmcfg_config; +int pci_mmcfg_config_num; + +static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) +{ + if (!strcmp(mcfg->header.oem_id, "SGI")) + acpi_mcfg_64bit_base_addr = TRUE; + + return 0; +} + +static int __init pci_parse_mcfg(struct acpi_table_header *header) +{ + struct acpi_table_mcfg *mcfg; + unsigned long i; + int config_size; + + if (!header) + return -EINVAL; + + mcfg = (struct acpi_table_mcfg *)header; + + /* how many config structures do we have */ + pci_mmcfg_config_num = 0; + i = header->length - sizeof(struct acpi_table_mcfg); + while (i >= sizeof(struct acpi_mcfg_allocation)) { + ++pci_mmcfg_config_num; + i -= sizeof(struct acpi_mcfg_allocation); + }; + if (pci_mmcfg_config_num == 0) { + printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); + return -ENODEV; + } + + config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); + pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); + if (!pci_mmcfg_config) { + printk(KERN_WARNING PREFIX + "No memory for MCFG config tables\n"); + return -ENOMEM; + } + + memcpy(pci_mmcfg_config, &mcfg[1], config_size); + + acpi_mcfg_oem_check(mcfg); + + for (i = 0; i < pci_mmcfg_config_num; ++i) { + if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && + !acpi_mcfg_64bit_base_addr) { + printk(KERN_ERR PREFIX + "MMCONFIG not in low 4GB of memory\n"); + kfree(pci_mmcfg_config); + pci_mmcfg_config_num = 0; + return -ENODEV; + } + } + + return 0; +} + static void __init __pci_mmcfg_init(int early) { /* MMCONFIG disabled */ @@ -543,7 +606,7 @@ static void __init __pci_mmcfg_init(int early) } if (!known_bridge) - acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); + acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); pci_mmcfg_reject_broken(early); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 88be890ee3c..73cb141150d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -113,9 +113,6 @@ void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; extern u32 acpi_irq_not_handled; -extern struct acpi_mcfg_allocation *pci_mmcfg_config; -extern int pci_mmcfg_config_num; - extern int sbf_port; extern unsigned long acpi_realmode_flags; -- cgit v1.2.3-70-g09d2 From ab46feae865c5b96dbf5e261be8638165932bfb1 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 12 Jun 2009 20:47:50 -0400 Subject: ACPI: #define acpi_disabled 1 for CONFIG_ACPI=n SFI will need to test acpi_disabled no matter the value of CONFIG_ACPI. Signed-off-by: Len Brown --- arch/x86/include/asm/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 4518dc50090..20d1465a2ab 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -144,6 +144,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) #else /* !CONFIG_ACPI */ +#define acpi_disabled 1 #define acpi_lapic 0 #define acpi_ioapic 0 static inline void acpi_noirq_set(void) { } -- cgit v1.2.3-70-g09d2 From dfec072ecd35ba6ecad2d51dde325253ac9a2936 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 4 Apr 2008 00:51:41 +0200 Subject: kmemcheck: add the kmemcheck core General description: kmemcheck is a patch to the linux kernel that detects use of uninitialized memory. It does this by trapping every read and write to memory that was allocated dynamically (e.g. using kmalloc()). If a memory address is read that has not previously been written to, a message is printed to the kernel log. Thanks to Andi Kleen for the set_memory_4k() solution. Andrew Morton suggested documenting the shadow member of struct page. Signed-off-by: Vegard Nossum Signed-off-by: Pekka Enberg [export kmemcheck_mark_initialized] [build fix for setup_max_cpus] Signed-off-by: Ingo Molnar [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/Makefile | 5 + arch/x86/include/asm/kmemcheck.h | 42 +++ arch/x86/include/asm/pgtable.h | 9 + arch/x86/include/asm/pgtable_types.h | 4 +- arch/x86/mm/Makefile | 2 + arch/x86/mm/kmemcheck/Makefile | 1 + arch/x86/mm/kmemcheck/error.c | 229 ++++++++++++ arch/x86/mm/kmemcheck/error.h | 15 + arch/x86/mm/kmemcheck/kmemcheck.c | 650 +++++++++++++++++++++++++++++++++++ arch/x86/mm/kmemcheck/opcode.c | 101 ++++++ arch/x86/mm/kmemcheck/opcode.h | 9 + arch/x86/mm/kmemcheck/pte.c | 22 ++ arch/x86/mm/kmemcheck/pte.h | 10 + arch/x86/mm/kmemcheck/shadow.c | 153 +++++++++ arch/x86/mm/kmemcheck/shadow.h | 16 + include/linux/kmemcheck.h | 17 + include/linux/mm_types.h | 8 + init/main.c | 1 + kernel/sysctl.c | 12 + 19 files changed, 1304 insertions(+), 2 deletions(-) create mode 100644 arch/x86/include/asm/kmemcheck.h create mode 100644 arch/x86/mm/kmemcheck/Makefile create mode 100644 arch/x86/mm/kmemcheck/error.c create mode 100644 arch/x86/mm/kmemcheck/error.h create mode 100644 arch/x86/mm/kmemcheck/kmemcheck.c create mode 100644 arch/x86/mm/kmemcheck/opcode.c create mode 100644 arch/x86/mm/kmemcheck/opcode.h create mode 100644 arch/x86/mm/kmemcheck/pte.c create mode 100644 arch/x86/mm/kmemcheck/pte.h create mode 100644 arch/x86/mm/kmemcheck/shadow.c create mode 100644 arch/x86/mm/kmemcheck/shadow.h create mode 100644 include/linux/kmemcheck.h (limited to 'arch/x86/include/asm') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index edbd0ca6206..1b68659c41b 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -81,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR endif endif +# Don't unroll struct assignments with kmemcheck enabled +ifeq ($(CONFIG_KMEMCHECK),y) + KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) +endif + # Stackpointer is addressed different for 32 bit and 64 bit x86 sp-$(CONFIG_X86_32) := esp sp-$(CONFIG_X86_64) := rsp diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h new file mode 100644 index 00000000000..ed01518f297 --- /dev/null +++ b/arch/x86/include/asm/kmemcheck.h @@ -0,0 +1,42 @@ +#ifndef ASM_X86_KMEMCHECK_H +#define ASM_X86_KMEMCHECK_H + +#include +#include + +#ifdef CONFIG_KMEMCHECK +bool kmemcheck_active(struct pt_regs *regs); + +void kmemcheck_show(struct pt_regs *regs); +void kmemcheck_hide(struct pt_regs *regs); + +bool kmemcheck_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code); +bool kmemcheck_trap(struct pt_regs *regs); +#else +static inline bool kmemcheck_active(struct pt_regs *regs) +{ + return false; +} + +static inline void kmemcheck_show(struct pt_regs *regs) +{ +} + +static inline void kmemcheck_hide(struct pt_regs *regs) +{ +} + +static inline bool kmemcheck_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code) +{ + return false; +} + +static inline bool kmemcheck_trap(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_KMEMCHECK */ + +#endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 18ef7ebf263..c5a08079ad5 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -317,6 +317,15 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } +static inline int pte_hidden(pte_t x) +{ +#ifdef CONFIG_KMEMCHECK + return pte_flags(x) & _PAGE_HIDDEN; +#else + return 0; +#endif +} + static inline int pmd_present(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_PRESENT; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4d258ad76a0..9b5c92140aa 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -18,7 +18,7 @@ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ -#define _PAGE_BIT_UNUSED3 11 +#define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 @@ -41,7 +41,7 @@ #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) -#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) +#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fdd30d08ab5..eefdeee8a87 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o +obj-$(CONFIG_KMEMCHECK) += kmemcheck/ + obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile new file mode 100644 index 00000000000..4666b7a778b --- /dev/null +++ b/arch/x86/mm/kmemcheck/Makefile @@ -0,0 +1 @@ +obj-y := error.o kmemcheck.o opcode.o pte.o shadow.o diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c new file mode 100644 index 00000000000..5ec9f5a93f4 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.c @@ -0,0 +1,229 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "error.h" +#include "shadow.h" + +enum kmemcheck_error_type { + KMEMCHECK_ERROR_INVALID_ACCESS, + KMEMCHECK_ERROR_BUG, +}; + +#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) + +struct kmemcheck_error { + enum kmemcheck_error_type type; + + union { + /* KMEMCHECK_ERROR_INVALID_ACCESS */ + struct { + /* Kind of access that caused the error */ + enum kmemcheck_shadow state; + /* Address and size of the erroneous read */ + unsigned long address; + unsigned int size; + }; + }; + + struct pt_regs regs; + struct stack_trace trace; + unsigned long trace_entries[32]; + + /* We compress it to a char. */ + unsigned char shadow_copy[SHADOW_COPY_SIZE]; + unsigned char memory_copy[SHADOW_COPY_SIZE]; +}; + +/* + * Create a ring queue of errors to output. We can't call printk() directly + * from the kmemcheck traps, since this may call the console drivers and + * result in a recursive fault. + */ +static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; +static unsigned int error_count; +static unsigned int error_rd; +static unsigned int error_wr; +static unsigned int error_missed_count; + +static struct kmemcheck_error *error_next_wr(void) +{ + struct kmemcheck_error *e; + + if (error_count == ARRAY_SIZE(error_fifo)) { + ++error_missed_count; + return NULL; + } + + e = &error_fifo[error_wr]; + if (++error_wr == ARRAY_SIZE(error_fifo)) + error_wr = 0; + ++error_count; + return e; +} + +static struct kmemcheck_error *error_next_rd(void) +{ + struct kmemcheck_error *e; + + if (error_count == 0) + return NULL; + + e = &error_fifo[error_rd]; + if (++error_rd == ARRAY_SIZE(error_fifo)) + error_rd = 0; + --error_count; + return e; +} + +static void do_wakeup(unsigned long); +static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); + +/* + * Save the context of an error report. + */ +void kmemcheck_error_save(enum kmemcheck_shadow state, + unsigned long address, unsigned int size, struct pt_regs *regs) +{ + static unsigned long prev_ip; + + struct kmemcheck_error *e; + void *shadow_copy; + void *memory_copy; + + /* Don't report several adjacent errors from the same EIP. */ + if (regs->ip == prev_ip) + return; + prev_ip = regs->ip; + + e = error_next_wr(); + if (!e) + return; + + e->type = KMEMCHECK_ERROR_INVALID_ACCESS; + + e->state = state; + e->address = address; + e->size = size; + + /* Save regs */ + memcpy(&e->regs, regs, sizeof(*regs)); + + /* Save stack trace */ + e->trace.nr_entries = 0; + e->trace.entries = e->trace_entries; + e->trace.max_entries = ARRAY_SIZE(e->trace_entries); + e->trace.skip = 0; + save_stack_trace_bp(&e->trace, regs->bp); + + /* Round address down to nearest 16 bytes */ + shadow_copy = kmemcheck_shadow_lookup(address + & ~(SHADOW_COPY_SIZE - 1)); + BUG_ON(!shadow_copy); + + memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); + + kmemcheck_show_addr(address); + memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); + memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); + kmemcheck_hide_addr(address); + + tasklet_hi_schedule_first(&kmemcheck_tasklet); +} + +/* + * Save the context of a kmemcheck bug. + */ +void kmemcheck_error_save_bug(struct pt_regs *regs) +{ + struct kmemcheck_error *e; + + e = error_next_wr(); + if (!e) + return; + + e->type = KMEMCHECK_ERROR_BUG; + + memcpy(&e->regs, regs, sizeof(*regs)); + + e->trace.nr_entries = 0; + e->trace.entries = e->trace_entries; + e->trace.max_entries = ARRAY_SIZE(e->trace_entries); + e->trace.skip = 1; + save_stack_trace(&e->trace); + + tasklet_hi_schedule_first(&kmemcheck_tasklet); +} + +void kmemcheck_error_recall(void) +{ + static const char *desc[] = { + [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", + [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", + [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", + [KMEMCHECK_SHADOW_FREED] = "freed", + }; + + static const char short_desc[] = { + [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', + [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', + [KMEMCHECK_SHADOW_INITIALIZED] = 'i', + [KMEMCHECK_SHADOW_FREED] = 'f', + }; + + struct kmemcheck_error *e; + unsigned int i; + + e = error_next_rd(); + if (!e) + return; + + switch (e->type) { + case KMEMCHECK_ERROR_INVALID_ACCESS: + printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " + "from %s memory (%p)\n", + 8 * e->size, e->state < ARRAY_SIZE(desc) ? + desc[e->state] : "(invalid shadow state)", + (void *) e->address); + + printk(KERN_INFO); + for (i = 0; i < SHADOW_COPY_SIZE; ++i) + printk("%02x", e->memory_copy[i]); + printk("\n"); + + printk(KERN_INFO); + for (i = 0; i < SHADOW_COPY_SIZE; ++i) { + if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) + printk(" %c", short_desc[e->shadow_copy[i]]); + else + printk(" ?"); + } + printk("\n"); + printk(KERN_INFO "%*c\n", 2 + 2 + * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); + break; + case KMEMCHECK_ERROR_BUG: + printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); + break; + } + + __show_regs(&e->regs, 1); + print_stack_trace(&e->trace, 0); +} + +static void do_wakeup(unsigned long data) +{ + while (error_count > 0) + kmemcheck_error_recall(); + + if (error_missed_count > 0) { + printk(KERN_WARNING "kmemcheck: Lost %d error reports because " + "the queue was too small\n", error_missed_count); + error_missed_count = 0; + } +} diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h new file mode 100644 index 00000000000..0efc2e8d0a2 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.h @@ -0,0 +1,15 @@ +#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H +#define ARCH__X86__MM__KMEMCHECK__ERROR_H + +#include + +#include "shadow.h" + +void kmemcheck_error_save(enum kmemcheck_shadow state, + unsigned long address, unsigned int size, struct pt_regs *regs); + +void kmemcheck_error_save_bug(struct pt_regs *regs); + +void kmemcheck_error_recall(void); + +#endif diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c new file mode 100644 index 00000000000..9de7d8f6b6e --- /dev/null +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -0,0 +1,650 @@ +/** + * kmemcheck - a heavyweight memory checker for the linux kernel + * Copyright (C) 2007, 2008 Vegard Nossum + * (With a lot of help from Ingo Molnar and Pekka Enberg.) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2) as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "error.h" +#include "opcode.h" +#include "pte.h" +#include "shadow.h" + +#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT +# define KMEMCHECK_ENABLED 0 +#endif + +#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT +# define KMEMCHECK_ENABLED 1 +#endif + +#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT +# define KMEMCHECK_ENABLED 2 +#endif + +int kmemcheck_enabled = KMEMCHECK_ENABLED; + +int __init kmemcheck_init(void) +{ + printk(KERN_INFO "kmemcheck: \"Bugs, beware!\"\n"); + +#ifdef CONFIG_SMP + /* + * Limit SMP to use a single CPU. We rely on the fact that this code + * runs before SMP is set up. + */ + if (setup_max_cpus > 1) { + printk(KERN_INFO + "kmemcheck: Limiting number of CPUs to 1.\n"); + setup_max_cpus = 1; + } +#endif + + return 0; +} + +early_initcall(kmemcheck_init); + +#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT +int kmemcheck_enabled = 0; +#endif + +#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT +int kmemcheck_enabled = 1; +#endif + +#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT +int kmemcheck_enabled = 2; +#endif + +/* + * We need to parse the kmemcheck= option before any memory is allocated. + */ +static int __init param_kmemcheck(char *str) +{ + if (!str) + return -EINVAL; + + sscanf(str, "%d", &kmemcheck_enabled); + return 0; +} + +early_param("kmemcheck", param_kmemcheck); + +int kmemcheck_show_addr(unsigned long address) +{ + pte_t *pte; + + pte = kmemcheck_pte_lookup(address); + if (!pte) + return 0; + + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + __flush_tlb_one(address); + return 1; +} + +int kmemcheck_hide_addr(unsigned long address) +{ + pte_t *pte; + + pte = kmemcheck_pte_lookup(address); + if (!pte) + return 0; + + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + __flush_tlb_one(address); + return 1; +} + +struct kmemcheck_context { + bool busy; + int balance; + + /* + * There can be at most two memory operands to an instruction, but + * each address can cross a page boundary -- so we may need up to + * four addresses that must be hidden/revealed for each fault. + */ + unsigned long addr[4]; + unsigned long n_addrs; + unsigned long flags; + + /* Data size of the instruction that caused a fault. */ + unsigned int size; +}; + +static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); + +bool kmemcheck_active(struct pt_regs *regs) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + + return data->balance > 0; +} + +/* Save an address that needs to be shown/hidden */ +static void kmemcheck_save_addr(unsigned long addr) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + + BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); + data->addr[data->n_addrs++] = addr; +} + +static unsigned int kmemcheck_show_all(void) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + unsigned int i; + unsigned int n; + + n = 0; + for (i = 0; i < data->n_addrs; ++i) + n += kmemcheck_show_addr(data->addr[i]); + + return n; +} + +static unsigned int kmemcheck_hide_all(void) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + unsigned int i; + unsigned int n; + + n = 0; + for (i = 0; i < data->n_addrs; ++i) + n += kmemcheck_hide_addr(data->addr[i]); + + return n; +} + +/* + * Called from the #PF handler. + */ +void kmemcheck_show(struct pt_regs *regs) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + + BUG_ON(!irqs_disabled()); + + if (unlikely(data->balance != 0)) { + kmemcheck_show_all(); + kmemcheck_error_save_bug(regs); + data->balance = 0; + return; + } + + /* + * None of the addresses actually belonged to kmemcheck. Note that + * this is not an error. + */ + if (kmemcheck_show_all() == 0) + return; + + ++data->balance; + + /* + * The IF needs to be cleared as well, so that the faulting + * instruction can run "uninterrupted". Otherwise, we might take + * an interrupt and start executing that before we've had a chance + * to hide the page again. + * + * NOTE: In the rare case of multiple faults, we must not override + * the original flags: + */ + if (!(regs->flags & X86_EFLAGS_TF)) + data->flags = regs->flags; + + regs->flags |= X86_EFLAGS_TF; + regs->flags &= ~X86_EFLAGS_IF; +} + +/* + * Called from the #DB handler. + */ +void kmemcheck_hide(struct pt_regs *regs) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + int n; + + BUG_ON(!irqs_disabled()); + + if (data->balance == 0) + return; + + if (unlikely(data->balance != 1)) { + kmemcheck_show_all(); + kmemcheck_error_save_bug(regs); + data->n_addrs = 0; + data->balance = 0; + + if (!(data->flags & X86_EFLAGS_TF)) + regs->flags &= ~X86_EFLAGS_TF; + if (data->flags & X86_EFLAGS_IF) + regs->flags |= X86_EFLAGS_IF; + return; + } + + if (kmemcheck_enabled) + n = kmemcheck_hide_all(); + else + n = kmemcheck_show_all(); + + if (n == 0) + return; + + --data->balance; + + data->n_addrs = 0; + + if (!(data->flags & X86_EFLAGS_TF)) + regs->flags &= ~X86_EFLAGS_TF; + if (data->flags & X86_EFLAGS_IF) + regs->flags |= X86_EFLAGS_IF; +} + +void kmemcheck_show_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) { + unsigned long address; + pte_t *pte; + unsigned int level; + + address = (unsigned long) page_address(&p[i]); + pte = lookup_address(address, &level); + BUG_ON(!pte); + BUG_ON(level != PG_LEVEL_4K); + + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); + __flush_tlb_one(address); + } +} + +bool kmemcheck_page_is_tracked(struct page *p) +{ + /* This will also check the "hidden" flag of the PTE. */ + return kmemcheck_pte_lookup((unsigned long) page_address(p)); +} + +void kmemcheck_hide_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) { + unsigned long address; + pte_t *pte; + unsigned int level; + + address = (unsigned long) page_address(&p[i]); + pte = lookup_address(address, &level); + BUG_ON(!pte); + BUG_ON(level != PG_LEVEL_4K); + + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); + __flush_tlb_one(address); + } +} + +/* Access may NOT cross page boundary */ +static void kmemcheck_read_strict(struct pt_regs *regs, + unsigned long addr, unsigned int size) +{ + void *shadow; + enum kmemcheck_shadow status; + + shadow = kmemcheck_shadow_lookup(addr); + if (!shadow) + return; + + kmemcheck_save_addr(addr); + status = kmemcheck_shadow_test(shadow, size); + if (status == KMEMCHECK_SHADOW_INITIALIZED) + return; + + if (kmemcheck_enabled) + kmemcheck_error_save(status, addr, size, regs); + + if (kmemcheck_enabled == 2) + kmemcheck_enabled = 0; + + /* Don't warn about it again. */ + kmemcheck_shadow_set(shadow, size); +} + +/* Access may cross page boundary */ +static void kmemcheck_read(struct pt_regs *regs, + unsigned long addr, unsigned int size) +{ + unsigned long page = addr & PAGE_MASK; + unsigned long next_addr = addr + size - 1; + unsigned long next_page = next_addr & PAGE_MASK; + + if (likely(page == next_page)) { + kmemcheck_read_strict(regs, addr, size); + return; + } + + /* + * What we do is basically to split the access across the + * two pages and handle each part separately. Yes, this means + * that we may now see reads that are 3 + 5 bytes, for + * example (and if both are uninitialized, there will be two + * reports), but it makes the code a lot simpler. + */ + kmemcheck_read_strict(regs, addr, next_page - addr); + kmemcheck_read_strict(regs, next_page, next_addr - next_page); +} + +static void kmemcheck_write_strict(struct pt_regs *regs, + unsigned long addr, unsigned int size) +{ + void *shadow; + + shadow = kmemcheck_shadow_lookup(addr); + if (!shadow) + return; + + kmemcheck_save_addr(addr); + kmemcheck_shadow_set(shadow, size); +} + +static void kmemcheck_write(struct pt_regs *regs, + unsigned long addr, unsigned int size) +{ + unsigned long page = addr & PAGE_MASK; + unsigned long next_addr = addr + size - 1; + unsigned long next_page = next_addr & PAGE_MASK; + + if (likely(page == next_page)) { + kmemcheck_write_strict(regs, addr, size); + return; + } + + /* See comment in kmemcheck_read(). */ + kmemcheck_write_strict(regs, addr, next_page - addr); + kmemcheck_write_strict(regs, next_page, next_addr - next_page); +} + +/* + * Copying is hard. We have two addresses, each of which may be split across + * a page (and each page will have different shadow addresses). + */ +static void kmemcheck_copy(struct pt_regs *regs, + unsigned long src_addr, unsigned long dst_addr, unsigned int size) +{ + uint8_t shadow[8]; + enum kmemcheck_shadow status; + + unsigned long page; + unsigned long next_addr; + unsigned long next_page; + + uint8_t *x; + unsigned int i; + unsigned int n; + + BUG_ON(size > sizeof(shadow)); + + page = src_addr & PAGE_MASK; + next_addr = src_addr + size - 1; + next_page = next_addr & PAGE_MASK; + + if (likely(page == next_page)) { + /* Same page */ + x = kmemcheck_shadow_lookup(src_addr); + if (x) { + kmemcheck_save_addr(src_addr); + for (i = 0; i < size; ++i) + shadow[i] = x[i]; + } else { + for (i = 0; i < size; ++i) + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } else { + n = next_page - src_addr; + BUG_ON(n > sizeof(shadow)); + + /* First page */ + x = kmemcheck_shadow_lookup(src_addr); + if (x) { + kmemcheck_save_addr(src_addr); + for (i = 0; i < n; ++i) + shadow[i] = x[i]; + } else { + /* Not tracked */ + for (i = 0; i < n; ++i) + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + + /* Second page */ + x = kmemcheck_shadow_lookup(next_page); + if (x) { + kmemcheck_save_addr(next_page); + for (i = n; i < size; ++i) + shadow[i] = x[i - n]; + } else { + /* Not tracked */ + for (i = n; i < size; ++i) + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } + + page = dst_addr & PAGE_MASK; + next_addr = dst_addr + size - 1; + next_page = next_addr & PAGE_MASK; + + if (likely(page == next_page)) { + /* Same page */ + x = kmemcheck_shadow_lookup(dst_addr); + if (x) { + kmemcheck_save_addr(dst_addr); + for (i = 0; i < size; ++i) { + x[i] = shadow[i]; + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } + } else { + n = next_page - dst_addr; + BUG_ON(n > sizeof(shadow)); + + /* First page */ + x = kmemcheck_shadow_lookup(dst_addr); + if (x) { + kmemcheck_save_addr(dst_addr); + for (i = 0; i < n; ++i) { + x[i] = shadow[i]; + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } + + /* Second page */ + x = kmemcheck_shadow_lookup(next_page); + if (x) { + kmemcheck_save_addr(next_page); + for (i = n; i < size; ++i) { + x[i - n] = shadow[i]; + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } + } + + status = kmemcheck_shadow_test(shadow, size); + if (status == KMEMCHECK_SHADOW_INITIALIZED) + return; + + if (kmemcheck_enabled) + kmemcheck_error_save(status, src_addr, size, regs); + + if (kmemcheck_enabled == 2) + kmemcheck_enabled = 0; +} + +enum kmemcheck_method { + KMEMCHECK_READ, + KMEMCHECK_WRITE, +}; + +static void kmemcheck_access(struct pt_regs *regs, + unsigned long fallback_address, enum kmemcheck_method fallback_method) +{ + const uint8_t *insn; + const uint8_t *insn_primary; + unsigned int size; + + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + + /* Recursive fault -- ouch. */ + if (data->busy) { + kmemcheck_show_addr(fallback_address); + kmemcheck_error_save_bug(regs); + return; + } + + data->busy = true; + + insn = (const uint8_t *) regs->ip; + insn_primary = kmemcheck_opcode_get_primary(insn); + + kmemcheck_opcode_decode(insn, &size); + + switch (insn_primary[0]) { +#ifdef CONFIG_KMEMCHECK_BITOPS_OK + /* AND, OR, XOR */ + /* + * Unfortunately, these instructions have to be excluded from + * our regular checking since they access only some (and not + * all) bits. This clears out "bogus" bitfield-access warnings. + */ + case 0x80: + case 0x81: + case 0x82: + case 0x83: + switch ((insn_primary[1] >> 3) & 7) { + /* OR */ + case 1: + /* AND */ + case 4: + /* XOR */ + case 6: + kmemcheck_write(regs, fallback_address, size); + goto out; + + /* ADD */ + case 0: + /* ADC */ + case 2: + /* SBB */ + case 3: + /* SUB */ + case 5: + /* CMP */ + case 7: + break; + } + break; +#endif + + /* MOVS, MOVSB, MOVSW, MOVSD */ + case 0xa4: + case 0xa5: + /* + * These instructions are special because they take two + * addresses, but we only get one page fault. + */ + kmemcheck_copy(regs, regs->si, regs->di, size); + goto out; + + /* CMPS, CMPSB, CMPSW, CMPSD */ + case 0xa6: + case 0xa7: + kmemcheck_read(regs, regs->si, size); + kmemcheck_read(regs, regs->di, size); + goto out; + } + + /* + * If the opcode isn't special in any way, we use the data from the + * page fault handler to determine the address and type of memory + * access. + */ + switch (fallback_method) { + case KMEMCHECK_READ: + kmemcheck_read(regs, fallback_address, size); + goto out; + case KMEMCHECK_WRITE: + kmemcheck_write(regs, fallback_address, size); + goto out; + } + +out: + data->busy = false; +} + +bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + pte_t *pte; + unsigned int level; + + /* + * XXX: Is it safe to assume that memory accesses from virtual 86 + * mode or non-kernel code segments will _never_ access kernel + * memory (e.g. tracked pages)? For now, we need this to avoid + * invoking kmemcheck for PnP BIOS calls. + */ + if (regs->flags & X86_VM_MASK) + return false; + if (regs->cs != __KERNEL_CS) + return false; + + pte = lookup_address(address, &level); + if (!pte) + return false; + if (level != PG_LEVEL_4K) + return false; + if (!pte_hidden(*pte)) + return false; + + if (error_code & 2) + kmemcheck_access(regs, address, KMEMCHECK_WRITE); + else + kmemcheck_access(regs, address, KMEMCHECK_READ); + + kmemcheck_show(regs); + return true; +} + +bool kmemcheck_trap(struct pt_regs *regs) +{ + if (!kmemcheck_active(regs)) + return false; + + /* We're done. */ + kmemcheck_hide(regs); + return true; +} diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c new file mode 100644 index 00000000000..a4100b6e783 --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.c @@ -0,0 +1,101 @@ +#include + +#include "opcode.h" + +static bool opcode_is_prefix(uint8_t b) +{ + return + /* Group 1 */ + b == 0xf0 || b == 0xf2 || b == 0xf3 + /* Group 2 */ + || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 + || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e + /* Group 3 */ + || b == 0x66 + /* Group 4 */ + || b == 0x67; +} + +static bool opcode_is_rex_prefix(uint8_t b) +{ + return (b & 0xf0) == 0x40; +} + +#define REX_W (1 << 3) + +/* + * This is a VERY crude opcode decoder. We only need to find the size of the + * load/store that caused our #PF and this should work for all the opcodes + * that we care about. Moreover, the ones who invented this instruction set + * should be shot. + */ +void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) +{ + /* Default operand size */ + int operand_size_override = 4; + + /* prefixes */ + for (; opcode_is_prefix(*op); ++op) { + if (*op == 0x66) + operand_size_override = 2; + } + +#ifdef CONFIG_X86_64 + /* REX prefix */ + if (opcode_is_rex_prefix(*op)) { + uint8_t rex = *op; + + ++op; + if (rex & REX_W) { + switch (*op) { + case 0x63: + *size = 4; + return; + case 0x0f: + ++op; + + switch (*op) { + case 0xb6: + case 0xbe: + *size = 1; + return; + case 0xb7: + case 0xbf: + *size = 2; + return; + } + + break; + } + + *size = 8; + return; + } + } +#endif + + /* escape opcode */ + if (*op == 0x0f) { + ++op; + + /* + * This is move with zero-extend and sign-extend, respectively; + * we don't have to think about 0xb6/0xbe, because this is + * already handled in the conditional below. + */ + if (*op == 0xb7 || *op == 0xbf) + operand_size_override = 2; + } + + *size = (*op & 1) ? operand_size_override : 1; +} + +const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) +{ + /* skip prefixes */ + while (opcode_is_prefix(*op)) + ++op; + if (opcode_is_rex_prefix(*op)) + ++op; + return op; +} diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h new file mode 100644 index 00000000000..6956aad66b5 --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.h @@ -0,0 +1,9 @@ +#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H +#define ARCH__X86__MM__KMEMCHECK__OPCODE_H + +#include + +void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); +const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); + +#endif diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c new file mode 100644 index 00000000000..4ead26eeaf9 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.c @@ -0,0 +1,22 @@ +#include + +#include + +#include "pte.h" + +pte_t *kmemcheck_pte_lookup(unsigned long address) +{ + pte_t *pte; + unsigned int level; + + pte = lookup_address(address, &level); + if (!pte) + return NULL; + if (level != PG_LEVEL_4K) + return NULL; + if (!pte_hidden(*pte)) + return NULL; + + return pte; +} + diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h new file mode 100644 index 00000000000..9f596645649 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.h @@ -0,0 +1,10 @@ +#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H +#define ARCH__X86__MM__KMEMCHECK__PTE_H + +#include + +#include + +pte_t *kmemcheck_pte_lookup(unsigned long address); + +#endif diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c new file mode 100644 index 00000000000..5544d360087 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.c @@ -0,0 +1,153 @@ +#include +#include +#include + +#include +#include + +#include "pte.h" +#include "shadow.h" + +/* + * Return the shadow address for the given address. Returns NULL if the + * address is not tracked. + * + * We need to be extremely careful not to follow any invalid pointers, + * because this function can be called for *any* possible address. + */ +void *kmemcheck_shadow_lookup(unsigned long address) +{ + pte_t *pte; + struct page *page; + + if (!virt_addr_valid(address)) + return NULL; + + pte = kmemcheck_pte_lookup(address); + if (!pte) + return NULL; + + page = virt_to_page(address); + if (!page->shadow) + return NULL; + return page->shadow + (address & (PAGE_SIZE - 1)); +} + +static void mark_shadow(void *address, unsigned int n, + enum kmemcheck_shadow status) +{ + unsigned long addr = (unsigned long) address; + unsigned long last_addr = addr + n - 1; + unsigned long page = addr & PAGE_MASK; + unsigned long last_page = last_addr & PAGE_MASK; + unsigned int first_n; + void *shadow; + + /* If the memory range crosses a page boundary, stop there. */ + if (page == last_page) + first_n = n; + else + first_n = page + PAGE_SIZE - addr; + + shadow = kmemcheck_shadow_lookup(addr); + if (shadow) + memset(shadow, status, first_n); + + addr += first_n; + n -= first_n; + + /* Do full-page memset()s. */ + while (n >= PAGE_SIZE) { + shadow = kmemcheck_shadow_lookup(addr); + if (shadow) + memset(shadow, status, PAGE_SIZE); + + addr += PAGE_SIZE; + n -= PAGE_SIZE; + } + + /* Do the remaining page, if any. */ + if (n > 0) { + shadow = kmemcheck_shadow_lookup(addr); + if (shadow) + memset(shadow, status, n); + } +} + +void kmemcheck_mark_unallocated(void *address, unsigned int n) +{ + mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); +} + +void kmemcheck_mark_uninitialized(void *address, unsigned int n) +{ + mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); +} + +/* + * Fill the shadow memory of the given address such that the memory at that + * address is marked as being initialized. + */ +void kmemcheck_mark_initialized(void *address, unsigned int n) +{ + mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); +} +EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); + +void kmemcheck_mark_freed(void *address, unsigned int n) +{ + mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); +} + +void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) + kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); +} + +void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) + kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); +} + +enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) +{ + uint8_t *x; + unsigned int i; + + x = shadow; + +#ifdef CONFIG_KMEMCHECK_PARTIAL_OK + /* + * Make sure _some_ bytes are initialized. Gcc frequently generates + * code to access neighboring bytes. + */ + for (i = 0; i < size; ++i) { + if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) + return x[i]; + } +#else + /* All bytes must be initialized. */ + for (i = 0; i < size; ++i) { + if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) + return x[i]; + } +#endif + + return x[0]; +} + +void kmemcheck_shadow_set(void *shadow, unsigned int size) +{ + uint8_t *x; + unsigned int i; + + x = shadow; + for (i = 0; i < size; ++i) + x[i] = KMEMCHECK_SHADOW_INITIALIZED; +} diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h new file mode 100644 index 00000000000..af46d9ab9d8 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.h @@ -0,0 +1,16 @@ +#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H +#define ARCH__X86__MM__KMEMCHECK__SHADOW_H + +enum kmemcheck_shadow { + KMEMCHECK_SHADOW_UNALLOCATED, + KMEMCHECK_SHADOW_UNINITIALIZED, + KMEMCHECK_SHADOW_INITIALIZED, + KMEMCHECK_SHADOW_FREED, +}; + +void *kmemcheck_shadow_lookup(unsigned long address); + +enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); +void kmemcheck_shadow_set(void *shadow, unsigned int size); + +#endif diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h new file mode 100644 index 00000000000..39480c91b2f --- /dev/null +++ b/include/linux/kmemcheck.h @@ -0,0 +1,17 @@ +#ifndef LINUX_KMEMCHECK_H +#define LINUX_KMEMCHECK_H + +#include +#include + +#ifdef CONFIG_KMEMCHECK +extern int kmemcheck_enabled; + +int kmemcheck_show_addr(unsigned long address); +int kmemcheck_hide_addr(unsigned long address); +#else +#define kmemcheck_enabled 0 + +#endif /* CONFIG_KMEMCHECK */ + +#endif /* LINUX_KMEMCHECK_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0e80e26ecf2..0042090a4d7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -98,6 +98,14 @@ struct page { #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS unsigned long debug_flags; /* Use atomic bitops on this */ #endif + +#ifdef CONFIG_KMEMCHECK + /* + * kmemcheck wants to track the status of each byte in a page; this + * is a pointer to such a status block. NULL if not tracked. + */ + void *shadow; +#endif }; /* diff --git a/init/main.c b/init/main.c index 5616661eac0..e3c335e47cd 100644 --- a/init/main.c +++ b/init/main.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ce664f98e3f..9ef80bba350 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -959,6 +960,17 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_KMEMCHECK + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kmemcheck", + .data = &kmemcheck_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif + /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt -- cgit v1.2.3-70-g09d2 From 46e443283891dbd45fba7fa037baab831f5d8f3f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Jun 2009 20:00:54 -0700 Subject: x86: atomic_32.h: Fix kernel-doc warnings Fix kernel-doc warnings in atomic_32.h: Warning(arch/x86/include/asm/atomic_32.h:265): No description found for parameter 'ptr' Warning(arch/x86/include/asm/atomic_32.h:265): Excess function parameter 'v' description in '__atomic64_read' Warning(arch/x86/include/asm/atomic_32.h:305): Excess function parameter 'old_val' description in 'atomic64_xchg' Signed-off-by: Randy Dunlap LKML-Reference: <4A3467E6.6010907@oracle.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic_32.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index aff9f1fcdcd..a6a4b357076 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -257,7 +257,7 @@ typedef struct { /** * atomic64_read - read atomic64 variable - * @v: pointer of type atomic64_t + * @ptr: pointer of type atomic64_t * * Atomically reads the value of @v. * Doesn't imply a read memory barrier. @@ -294,7 +294,6 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, * atomic64_xchg - xchg atomic64 variable * @ptr: pointer to type atomic64_t * @new_val: value to assign - * @old_val: old value that was there * * Atomically xchgs the value of @ptr to @new_val and returns * the old value. -- cgit v1.2.3-70-g09d2 From f85612967c93b67b10dd240e3e8bf8a0eee9def7 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 4 Apr 2008 00:53:23 +0200 Subject: x86: add hooks for kmemcheck The hooks that we modify are: - Page fault handler (to handle kmemcheck faults) - Debug exception handler (to hide pages after single-stepping the instruction that caused the page fault) Also redefine memset() to use the optimized version if kmemcheck is enabled. (Thanks to Pekka Enberg for minimizing the impact on the page fault handler.) As kmemcheck doesn't handle MMX/SSE instructions (yet), we also disable the optimized xor code, and rely instead on the generic C implementation in order to avoid false-positive warnings. Signed-off-by: Vegard Nossum [whitespace fixlet] Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/include/asm/string_32.h | 8 ++++++++ arch/x86/include/asm/string_64.h | 8 ++++++++ arch/x86/include/asm/xor.h | 5 +++++ arch/x86/kernel/cpu/intel.c | 23 +++++++++++++++++++++++ arch/x86/kernel/traps.c | 5 +++++ arch/x86/mm/fault.c | 18 +++++++++++++++--- arch/x86/mm/init.c | 2 +- arch/x86/mm/init_32.c | 2 +- 8 files changed, 66 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 0e0e3ba827f..c86f452256d 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -177,10 +177,18 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) * No 3D Now! */ +#ifndef CONFIG_KMEMCHECK #define memcpy(t, f, n) \ (__builtin_constant_p((n)) \ ? __constant_memcpy((t), (f), (n)) \ : __memcpy((t), (f), (n))) +#else +/* + * kmemcheck becomes very happy if we use the REP instructions unconditionally, + * because it means that we know both memory operands in advance. + */ +#define memcpy(t, f, n) __memcpy((t), (f), (n)) +#endif #endif diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 2afe164bf1e..19e2c468fc2 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -27,6 +27,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t function. */ #define __HAVE_ARCH_MEMCPY 1 +#ifndef CONFIG_KMEMCHECK #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 extern void *memcpy(void *to, const void *from, size_t len); #else @@ -42,6 +43,13 @@ extern void *__memcpy(void *to, const void *from, size_t len); __ret; \ }) #endif +#else +/* + * kmemcheck becomes very happy if we use the REP instructions unconditionally, + * because it means that we know both memory operands in advance. + */ +#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) +#endif #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 11b3bb86e17..7fcf6f3dbcc 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -1,5 +1,10 @@ +#ifdef CONFIG_KMEMCHECK +/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ +# include +#else #ifdef CONFIG_X86_32 # include "xor_32.h" #else # include "xor_64.h" #endif +#endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index daed39ba261..3260ab04499 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -86,6 +86,29 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) */ if (c->x86 == 6 && c->x86_model < 15) clear_cpu_cap(c, X86_FEATURE_PAT); + +#ifdef CONFIG_KMEMCHECK + /* + * P4s have a "fast strings" feature which causes single- + * stepping REP instructions to only generate a #DB on + * cache-line boundaries. + * + * Ingo Molnar reported a Pentium D (model 6) and a Xeon + * (model 2) with the same problem. + */ + if (c->x86 == 15) { + u64 misc_enable; + + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + + if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { + printk(KERN_INFO "kmemcheck: Disabling fast string operations\n"); + + misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; + wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + } + } +#endif } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 07d60c870ce..e7a28e6aa4b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -45,6 +45,7 @@ #include #endif +#include #include #include #include @@ -534,6 +535,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) get_debugreg(condition, 6); + /* Catch kmemcheck conditions first of all! */ + if (condition & DR_STEP && kmemcheck_trap(regs)) + return; + /* * The processor cleared BTF, so don't mark that we need it set. */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c6acc632637..baa0e86adfb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -14,6 +14,7 @@ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ +#include /* kmemcheck_*(), ... */ /* * Page fault error code bits: @@ -956,6 +957,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) /* Get the faulting address: */ address = read_cr2(); + /* + * Detect and handle instructions that would cause a page fault for + * both a tracked kernel page and a userspace page. + */ + if (kmemcheck_active(regs)) + kmemcheck_hide(regs); + if (unlikely(kmmio_fault(regs, address))) return; @@ -973,9 +981,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * protection error (error_code & 9) == 0. */ if (unlikely(fault_in_kernel_space(address))) { - if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && - vmalloc_fault(address) >= 0) - return; + if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { + if (vmalloc_fault(address) >= 0) + return; + + if (kmemcheck_fault(regs, address, error_code)) + return; + } /* Can handle a stale RO->RW TLB: */ if (spurious_fault(error_code, address)) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 34c1bfb64f1..f53b57e4086 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -213,7 +213,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, if (!after_bootmem) init_gbpages(); -#ifdef CONFIG_DEBUG_PAGEALLOC +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) /* * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. * This will simplify cpa(), which otherwise needs to support splitting diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 949708d7a48..80cafd76a2b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -111,7 +111,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) pte_t *page_table = NULL; if (after_bootmem) { -#ifdef CONFIG_DEBUG_PAGEALLOC +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); #endif if (!page_table) -- cgit v1.2.3-70-g09d2 From d7002857dee6e9a3ce1f78d23f37caba106b29c5 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 20 Jul 2008 10:44:54 +0200 Subject: kmemcheck: add DMA hooks This patch hooks into the DMA API to prevent the reporting of the false positives that would otherwise be reported when memory is accessed that is also used directly by devices. [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/include/asm/dma-mapping.h | 2 ++ include/linux/kmemcheck.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index f82fdc412c6..d57d0c1857b 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -6,6 +6,7 @@ * Documentation/DMA-API.txt for documentation. */ +#include #include #include #include @@ -60,6 +61,7 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); + kmemcheck_mark_initialized(ptr, size); addr = ops->map_page(hwdev, virt_to_page(ptr), (unsigned long)ptr & ~PAGE_MASK, size, dir, NULL); diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 5b65f4ebead..71f21ae33d1 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -59,6 +59,22 @@ static inline bool kmemcheck_page_is_tracked(struct page *p) { return false; } + +static inline void kmemcheck_mark_unallocated(void *address, unsigned int n) +{ +} + +static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n) +{ +} + +static inline void kmemcheck_mark_initialized(void *address, unsigned int n) +{ +} + +static inline void kmemcheck_mark_freed(void *address, unsigned int n) +{ +} #endif /* CONFIG_KMEMCHECK */ #endif /* LINUX_KMEMCHECK_H */ -- cgit v1.2.3-70-g09d2 From 9b5cab31897e9e89e36c0c2a89b16b93ff1a971a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 21 Feb 2009 13:52:37 +0100 Subject: kmemcheck: add hooks for page- and sg-dma-mappings This is needed for page allocator support to prevent false positives when accessing pages which are dma-mapped. [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/include/asm/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index d57d0c1857b..b93405b228b 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -89,8 +89,12 @@ dma_map_sg(struct device *hwdev, struct scatterlist *sg, { struct dma_map_ops *ops = get_dma_ops(hwdev); int ents; + struct scatterlist *s; + int i; BUG_ON(!valid_dma_direction(dir)); + for_each_sg(sg, s, nents, i) + kmemcheck_mark_initialized(sg_virt(s), s->length); ents = ops->map_sg(hwdev, sg, nents, dir, NULL); debug_dma_map_sg(hwdev, sg, nents, ents, dir); @@ -202,6 +206,7 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); + kmemcheck_mark_initialized(page_address(page) + offset, size); addr = ops->map_page(dev, page, offset, size, dir, NULL); debug_dma_map_page(dev, page, offset, size, dir, addr, false); -- cgit v1.2.3-70-g09d2 From 0975904276552c8e201dad0ad31152ba8a21505a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Jun 2009 17:52:27 +0200 Subject: amd-iommu: disable IOMMU hardware on shutdown When the IOMMU stays enabled the BIOS may not be able to finish the machine shutdown properly. So disable the hardware on shutdown. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 2 ++ arch/x86/kernel/amd_iommu_init.c | 5 +++++ arch/x86/kernel/pci-dma.c | 2 ++ 3 files changed, 9 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 262e0282004..bdf96f119f0 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -29,9 +29,11 @@ extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); +extern void amd_iommu_shutdown(void); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } +static inline void amd_iommu_shutdown(void) { } #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 238989ec077..575ca46211b 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1273,6 +1273,11 @@ free: goto out; } +void amd_iommu_shutdown(void) +{ + disable_iommus(); +} + /**************************************************************************** * * Early detect code. This code runs at IOMMU detection time in the DMA diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 745579bc825..328592fb604 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -290,6 +290,8 @@ static int __init pci_iommu_init(void) void pci_iommu_shutdown(void) { gart_iommu_shutdown(); + + amd_iommu_shutdown(); } /* Must execute after PCI subsystem */ fs_initcall(pci_iommu_init); -- cgit v1.2.3-70-g09d2 From b1eeab67682a5e397aecf172046b3a8bd4808ae4 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 25 Nov 2008 16:55:53 +0100 Subject: kmemcheck: add hooks for the page allocator This adds support for tracking the initializedness of memory that was allocated with the page allocator. Highmem requests are not tracked. Cc: Dave Hansen Acked-by: Pekka Enberg [build fix for !CONFIG_KMEMCHECK] Signed-off-by: Ingo Molnar [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/include/asm/thread_info.h | 4 ++-- arch/x86/mm/kmemcheck/shadow.c | 8 +++++++ include/linux/gfp.h | 5 +++++ include/linux/kmemcheck.h | 35 ++++++++++++++++++++++++----- mm/kmemcheck.c | 45 +++++++++++++++++++++++++++----------- mm/page_alloc.c | 18 +++++++++++++++ mm/slab.c | 15 ++++++++----- mm/slub.c | 23 ++++++++++++++----- 8 files changed, 122 insertions(+), 31 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 602c769fc98..b0783520988 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -154,9 +154,9 @@ struct thread_info { /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE -#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) +#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) #else -#define THREAD_FLAGS GFP_KERNEL +#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK) #endif #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c index e7346d3873b..e773b6bd007 100644 --- a/arch/x86/mm/kmemcheck/shadow.c +++ b/arch/x86/mm/kmemcheck/shadow.c @@ -116,6 +116,14 @@ void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); } +void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) + kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); +} + enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) { uint8_t *x; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index daeaa8fe1bb..3885e7f7556 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -51,7 +51,12 @@ struct vm_area_struct; #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */ #define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */ + +#ifdef CONFIG_KMEMCHECK #define __GFP_NOTRACK ((__force gfp_t)0x200000u) /* Don't track with kmemcheck */ +#else +#define __GFP_NOTRACK ((__force gfp_t)0) +#endif /* * This may seem redundant, but it's a way of annotating false positives vs. diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 71f21ae33d1..093d23969b1 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -8,13 +8,15 @@ extern int kmemcheck_enabled; /* The slab-related functions. */ -void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, - struct page *page, int order); -void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order); +void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node); +void kmemcheck_free_shadow(struct page *page, int order); void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, size_t size); void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); +void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order, + gfp_t gfpflags); + void kmemcheck_show_pages(struct page *p, unsigned int n); void kmemcheck_hide_pages(struct page *p, unsigned int n); @@ -27,6 +29,7 @@ void kmemcheck_mark_freed(void *address, unsigned int n); void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); +void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n); int kmemcheck_show_addr(unsigned long address); int kmemcheck_hide_addr(unsigned long address); @@ -34,13 +37,12 @@ int kmemcheck_hide_addr(unsigned long address); #define kmemcheck_enabled 0 static inline void -kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, - struct page *page, int order) +kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) { } static inline void -kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order) +kmemcheck_free_shadow(struct page *page, int order) { } @@ -55,6 +57,11 @@ static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, { } +static inline void kmemcheck_pagealloc_alloc(struct page *p, + unsigned int order, gfp_t gfpflags) +{ +} + static inline bool kmemcheck_page_is_tracked(struct page *p) { return false; @@ -75,6 +82,22 @@ static inline void kmemcheck_mark_initialized(void *address, unsigned int n) static inline void kmemcheck_mark_freed(void *address, unsigned int n) { } + +static inline void kmemcheck_mark_unallocated_pages(struct page *p, + unsigned int n) +{ +} + +static inline void kmemcheck_mark_uninitialized_pages(struct page *p, + unsigned int n) +{ +} + +static inline void kmemcheck_mark_initialized_pages(struct page *p, + unsigned int n) +{ +} + #endif /* CONFIG_KMEMCHECK */ #endif /* LINUX_KMEMCHECK_H */ diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index eaa41b80261..fd814fd6131 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -1,10 +1,10 @@ +#include #include #include #include #include -void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, - struct page *page, int order) +void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) { struct page *shadow; int pages; @@ -16,7 +16,7 @@ void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, * With kmemcheck enabled, we need to allocate a memory area for the * shadow bits as well. */ - shadow = alloc_pages_node(node, flags, order); + shadow = alloc_pages_node(node, flags | __GFP_NOTRACK, order); if (!shadow) { if (printk_ratelimit()) printk(KERN_ERR "kmemcheck: failed to allocate " @@ -33,23 +33,17 @@ void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, * the memory accesses. */ kmemcheck_hide_pages(page, pages); - - /* - * Objects from caches that have a constructor don't get - * cleared when they're allocated, so we need to do it here. - */ - if (s->ctor) - kmemcheck_mark_uninitialized_pages(page, pages); - else - kmemcheck_mark_unallocated_pages(page, pages); } -void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order) +void kmemcheck_free_shadow(struct page *page, int order) { struct page *shadow; int pages; int i; + if (!kmemcheck_page_is_tracked(page)) + return; + pages = 1 << order; kmemcheck_show_pages(page, pages); @@ -101,3 +95,28 @@ void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU)) kmemcheck_mark_freed(object, size); } + +void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order, + gfp_t gfpflags) +{ + int pages; + + if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK)) + return; + + pages = 1 << order; + + /* + * NOTE: We choose to track GFP_ZERO pages too; in fact, they + * can become uninitialized by copying uninitialized memory + * into them. + */ + + /* XXX: Can use zone->node for node? */ + kmemcheck_alloc_shadow(page, order, gfpflags, -1); + + if (gfpflags & __GFP_ZERO) + kmemcheck_mark_initialized_pages(page, pages); + else + kmemcheck_mark_uninitialized_pages(page, pages); +} diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 17d5f539a9a..0727896a88a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -546,6 +547,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) int i; int bad = 0; + kmemcheck_free_shadow(page, order); + for (i = 0 ; i < (1 << order) ; ++i) bad += free_pages_check(page + i); if (bad) @@ -994,6 +997,8 @@ static void free_hot_cold_page(struct page *page, int cold) struct per_cpu_pages *pcp; unsigned long flags; + kmemcheck_free_shadow(page, 0); + if (PageAnon(page)) page->mapping = NULL; if (free_pages_check(page)) @@ -1047,6 +1052,16 @@ void split_page(struct page *page, unsigned int order) VM_BUG_ON(PageCompound(page)); VM_BUG_ON(!page_count(page)); + +#ifdef CONFIG_KMEMCHECK + /* + * Split shadow pages too, because free(page[0]) would + * otherwise free the whole shadow. + */ + if (kmemcheck_page_is_tracked(page)) + split_page(virt_to_page(page[0].shadow), order); +#endif + for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); } @@ -1667,7 +1682,10 @@ nopage: dump_stack(); show_mem(); } + return page; got_pg: + if (kmemcheck_enabled) + kmemcheck_pagealloc_alloc(page, order, gfp_mask); return page; } EXPORT_SYMBOL(__alloc_pages_internal); diff --git a/mm/slab.c b/mm/slab.c index 95b6c5eb40b..6a1ad0b9a94 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1612,7 +1612,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) if (cachep->flags & SLAB_RECLAIM_ACCOUNT) flags |= __GFP_RECLAIMABLE; - page = alloc_pages_node(nodeid, flags, cachep->gfporder); + page = alloc_pages_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); if (!page) return NULL; @@ -1626,8 +1626,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) for (i = 0; i < nr_pages; i++) __SetPageSlab(page + i); - if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) - kmemcheck_alloc_shadow(cachep, flags, nodeid, page, cachep->gfporder); + if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { + kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); + + if (cachep->ctor) + kmemcheck_mark_uninitialized_pages(page, nr_pages); + else + kmemcheck_mark_unallocated_pages(page, nr_pages); + } return page_address(page); } @@ -1641,8 +1647,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) struct page *page = virt_to_page(addr); const unsigned long nr_freed = i; - if (kmemcheck_page_is_tracked(page)) - kmemcheck_free_shadow(cachep, page, cachep->gfporder); + kmemcheck_free_shadow(page, cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) sub_zone_page_state(page_zone(page), diff --git a/mm/slub.c b/mm/slub.c index 1cebaa747ad..898fb5047dc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1066,6 +1066,8 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node, { int order = oo_order(oo); + flags |= __GFP_NOTRACK; + if (node == -1) return alloc_pages(flags, order); else @@ -1097,7 +1099,18 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (kmemcheck_enabled && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { - kmemcheck_alloc_shadow(s, flags, node, page, compound_order(page)); + int pages = 1 << oo_order(oo); + + kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); + + /* + * Objects from caches that have a constructor don't get + * cleared when they're allocated, so we need to do it here. + */ + if (s->ctor) + kmemcheck_mark_uninitialized_pages(page, pages); + else + kmemcheck_mark_unallocated_pages(page, pages); } page->objects = oo_objects(oo); @@ -1173,8 +1186,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlubDebug(page); } - if (kmemcheck_page_is_tracked(page)) - kmemcheck_free_shadow(s, page, compound_order(page)); + kmemcheck_free_shadow(page, compound_order(page)); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? @@ -2734,9 +2746,10 @@ EXPORT_SYMBOL(__kmalloc); static void *kmalloc_large_node(size_t size, gfp_t flags, int node) { - struct page *page = alloc_pages_node(node, flags | __GFP_COMP, - get_order(size)); + struct page *page; + flags |= __GFP_COMP | __GFP_NOTRACK; + page = alloc_pages_node(node, flags, get_order(size)); if (page) return page_address(page); else -- cgit v1.2.3-70-g09d2 From 9d31c5068b852deaf02ccfb4a1ed2b54f3b9358a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:05:01 -0800 Subject: x86: make _PAGE_HIDDEN conditional Only _PAGE_HIDDEN when CONFIG_KMEMCHECK is defined, otherwise set it to 0. Allows later cleanups. Signed-off-by: Jeremy Fitzhardinge [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/include/asm/pgtable_types.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 9b5c92140aa..54cb697f490 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -41,13 +41,18 @@ #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) -#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) #define __HAVE_ARCH_PTE_SPECIAL +#ifdef CONFIG_KMEMCHECK +#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) +#else +#define _PAGE_HIDDEN (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #else -- cgit v1.2.3-70-g09d2 From eb63657e1314ae4af5e19a61db8dc1b6e935775a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:05:56 -0800 Subject: x86: unify pte_hidden Unify and demacro pte_hidden. Signed-off-by: Jeremy Fitzhardinge [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/include/asm/pgtable.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c5a08079ad5..3cc06e3fceb 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -317,13 +317,9 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } -static inline int pte_hidden(pte_t x) +static inline int pte_hidden(pte_t pte) { -#ifdef CONFIG_KMEMCHECK - return pte_flags(x) & _PAGE_HIDDEN; -#else - return 0; -#endif + return pte_flags(pte) & _PAGE_HIDDEN; } static inline int pmd_present(pmd_t pmd) -- cgit v1.2.3-70-g09d2 From 3ff0141aa3a03ca3388b40b36167d0a37919f3fd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 12:40:41 +0200 Subject: x86: Add NMI types for kmap_atomic Two new kmap_atomic slots for NMI context. And teach pte_offset_map() about NMI context. Signed-off-by: Peter Zijlstra CC: Nick Piggin Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kmap_types.h | 4 +++- arch/x86/include/asm/pgtable_32.h | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h index 5759c165a5c..ff00a44b7d0 100644 --- a/arch/x86/include/asm/kmap_types.h +++ b/arch/x86/include/asm/kmap_types.h @@ -21,7 +21,9 @@ D(9) KM_IRQ0, D(10) KM_IRQ1, D(11) KM_SOFTIRQ0, D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR +D(13) KM_NMI, +D(14) KM_NMI_PTE, +D(15) KM_TYPE_NR }; #undef D diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a..85464971bca 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -49,13 +49,14 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #endif #if defined(CONFIG_HIGHPTE) +#define __KM_PTE (in_nmi() ? KM_NMI_PTE : KM_PTE0) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ pte_index((address))) #define pte_offset_map_nested(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) +#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) #else #define pte_offset_map(dir, address) \ -- cgit v1.2.3-70-g09d2 From 9974458e2f9a11dbd2f4bd14fab5a79af4907b41 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jun 2009 21:45:16 +1000 Subject: perf_counter: Make set_perf_counter_pending() declaration common At present, every architecture that supports perf_counters has to declare set_perf_counter_pending() in its arch-specific headers. This consolidates the declarations into a single declaration in one common place, include/linux/perf_counter.h. On powerpc, we continue to provide a static inline definition of set_perf_counter_pending() in the powerpc hw_irq.h. Also, this removes from the x86 perf_counter.h the unused null definitions of {test,clear}_perf_counter_pending. Reported-by: Mike Frysinger Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: benh@kernel.crashing.org LKML-Reference: <18998.13388.920691.523227@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/hw_irq.h | 1 - arch/powerpc/include/asm/perf_counter.h | 2 ++ arch/x86/include/asm/perf_counter.h | 5 ----- include/linux/perf_counter.h | 1 + 4 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 53512374e1c..1974cf191b0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -163,7 +163,6 @@ static inline unsigned long test_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index cc7c887705b..b398a84edce 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -10,6 +10,8 @@ */ #include +#include + #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b..5fb33e160ea 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,11 +84,6 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -extern void set_perf_counter_pending(void); - -#define clear_perf_counter_pending() do { } while (0) -#define test_perf_counter_pending() (0) - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1b3118a1023..eccae437fe3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -604,6 +604,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_free_task(struct task_struct *task); +extern void set_perf_counter_pending(void); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void __perf_disable(void); -- cgit v1.2.3-70-g09d2 From 0990b1c65729012a63e0eeca93aaaafea4e9a064 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 16:46:05 +0200 Subject: x86: Add NMI types for kmap_atomic, fix I just realized this has a kmap_atomic bug in... The below would fix it - but it's complicating this code some more. Alternatively I would have to introduce something like pte_offset_map_irq() which would make the irq/nmi detection and leave the regular code paths alone, however that would mean either duplicating the gup_fast() pagewalk or passing down a pte function pointer, which would only duplicate the gup_pte_range() bit, neither is really attractive ... Signed-off-by: Peter Zijlstra CC: Nick Piggin Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kmap_types.h | 11 ++++++----- arch/x86/include/asm/pgtable_32.h | 5 ++++- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h index ff00a44b7d0..f8661384619 100644 --- a/arch/x86/include/asm/kmap_types.h +++ b/arch/x86/include/asm/kmap_types.h @@ -19,11 +19,12 @@ D(7) KM_PTE0, D(8) KM_PTE1, D(9) KM_IRQ0, D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_NMI, -D(14) KM_NMI_PTE, -D(15) KM_TYPE_NR +D(11) KM_IRQ_PTE, +D(12) KM_SOFTIRQ0, +D(13) KM_SOFTIRQ1, +D(14) KM_NMI, +D(15) KM_NMI_PTE, +D(16) KM_TYPE_NR }; #undef D diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 85464971bca..01fd9461d32 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -49,7 +49,10 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #endif #if defined(CONFIG_HIGHPTE) -#define __KM_PTE (in_nmi() ? KM_NMI_PTE : KM_PTE0) +#define __KM_PTE \ + (in_nmi() ? KM_NMI_PTE : \ + in_irq() ? KM_IRQ_PTE : \ + KM_PTE0) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ pte_index((address))) -- cgit v1.2.3-70-g09d2 From 9e55e44e39798541ba39d57f4b569deb555ae1ce Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:22:15 +0900 Subject: x86, mce: unify mce.h There are 2 headers: arch/x86/include/asm/mce.h arch/x86/kernel/cpu/mcheck/mce.h and in the latter small header: #include This patch move all contents in the latter header into the former, and fix all files using the latter to include the former instead. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 36 +++++++++++++++++++++++++++-- arch/x86/kernel/cpu/mcheck/k7.c | 3 +-- arch/x86/kernel/cpu/mcheck/mce.c | 1 - arch/x86/kernel/cpu/mcheck/mce.h | 38 ------------------------------- arch/x86/kernel/cpu/mcheck/mce_intel.c | 3 +-- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 -- arch/x86/kernel/cpu/mcheck/non-fatal.c | 3 +-- arch/x86/kernel/cpu/mcheck/p4.c | 3 +-- arch/x86/kernel/cpu/mcheck/p5.c | 3 +-- arch/x86/kernel/cpu/mcheck/p6.c | 3 +-- arch/x86/kernel/cpu/mcheck/winchip.c | 3 +-- arch/x86/kernel/traps.c | 3 +-- 12 files changed, 42 insertions(+), 59 deletions(-) delete mode 100644 arch/x86/kernel/cpu/mcheck/mce.h (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 540a466e50f..aae6fe2112f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -102,10 +102,42 @@ struct mce_log { #ifdef __KERNEL__ +#include +#include +#include + extern int mce_disabled; -#include -#include +#ifdef CONFIG_X86_OLD_MCE +void amd_mcheck_init(struct cpuinfo_x86 *c); +void intel_p4_mcheck_init(struct cpuinfo_x86 *c); +void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +#endif + +#ifdef CONFIG_X86_ANCIENT_MCE +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); +extern int mce_p5_enable; +static inline int mce_p5_enabled(void) { return mce_p5_enable; } +static inline void enable_p5_mce(void) { mce_p5_enable = 1; } +#else +static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} +static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} +static inline int mce_p5_enabled(void) { return 0; } +static inline void enable_p5_mce(void) { } +#endif + +/* Call the installed machine check handler for this CPU setup. */ +extern void (*machine_check_vector)(struct pt_regs *, long error_code); + +#ifdef CONFIG_X86_OLD_MCE +extern int nr_mce_banks; +extern void intel_set_thermal_handler(void); +#else +static inline void intel_set_thermal_handler(void) { } +#endif + +void intel_init_thermal(struct cpuinfo_x86 *c); void mce_setup(struct mce *m); void mce_log(struct mce *m); diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index 89e51042415..b945d5dbc60 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -10,10 +10,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine Check Handler For AMD Athlon/Duron: */ static void k7_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4b62443b6e7..faedd776847 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -44,7 +44,6 @@ #include #include "mce-internal.h" -#include "mce.h" /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h deleted file mode 100644 index 84a552b458c..00000000000 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include - -#ifdef CONFIG_X86_OLD_MCE -void amd_mcheck_init(struct cpuinfo_x86 *c); -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); -#endif - -#ifdef CONFIG_X86_ANCIENT_MCE -void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void winchip_mcheck_init(struct cpuinfo_x86 *c); -extern int mce_p5_enable; -static inline int mce_p5_enabled(void) { return mce_p5_enable; } -static inline void enable_p5_mce(void) { mce_p5_enable = 1; } -#else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline int mce_p5_enabled(void) { return 0; } -static inline void enable_p5_mce(void) { } -#endif - -/* Call the installed machine check handler for this CPU setup. */ -extern void (*machine_check_vector)(struct pt_regs *, long error_code); - -#ifdef CONFIG_X86_OLD_MCE - -extern int nr_mce_banks; - -void intel_set_thermal_handler(void); - -#else - -static inline void intel_set_thermal_handler(void) { } - -#endif - -void intel_init_thermal(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 2b011d2d857..475478b2088 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -11,10 +11,9 @@ #include #include #include +#include #include -#include "mce.h" - void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index f2ef6952c40..c548111d011 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -16,8 +16,6 @@ #include #include -#include "mce.h" - asmlinkage void smp_thermal_interrupt(void) { __u64 msr_val; diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index 70b710420f7..f5f2d6f71fb 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -17,10 +17,9 @@ #include #include +#include #include -#include "mce.h" - static int firstbank; #define MCE_RATE (15*HZ) /* timer rate is 15s */ diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 82cee108a2d..8d3e40edd64 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -12,10 +12,9 @@ #include #include #include +#include #include -#include "mce.h" - /* as supported by the P4/Xeon family */ struct intel_mce_extended_msrs { u32 eax; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 015f481ab1b..747853f9188 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -10,10 +10,9 @@ #include #include +#include #include -#include "mce.h" - /* By default disabled */ int mce_p5_enable; diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 43c24e66745..01e4f817818 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -10,10 +10,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine Check Handler For PII/PIII */ static void intel_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 81b02487090..54060f56597 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -9,10 +9,9 @@ #include #include +#include #include -#include "mce.h" - /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1e1e27b7d43..71f9c74814d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -64,8 +65,6 @@ #include #include -#include "cpu/mcheck/mce.h" - asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ -- cgit v1.2.3-70-g09d2 From c697836985e18d9c34897428ba563b13044a6dcd Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:22:49 +0900 Subject: x86, mce: make mce_disabled boolean The mce_disabled on 32bit is a tristate variable [1,0,-1], while 64bit version is boolean [0,1]. This patch makes mce_disabled always boolean, and use mce_p5_enabled to indicate the third state instead. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 8 +++----- arch/x86/kernel/cpu/mcheck/mce.c | 8 +++----- arch/x86/kernel/cpu/mcheck/p5.c | 12 +++++------- 3 files changed, 11 insertions(+), 17 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index aae6fe2112f..6568cdedcd8 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -107,6 +107,7 @@ struct mce_log { #include extern int mce_disabled; +extern int mce_p5_enabled; #ifdef CONFIG_X86_OLD_MCE void amd_mcheck_init(struct cpuinfo_x86 *c); @@ -117,14 +118,11 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_ANCIENT_MCE void intel_p5_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); -extern int mce_p5_enable; -static inline int mce_p5_enabled(void) { return mce_p5_enable; } -static inline void enable_p5_mce(void) { mce_p5_enable = 1; } +static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } #else static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline int mce_p5_enabled(void) { return 0; } -static inline void enable_p5_mce(void) { } +static inline void enable_p5_mce(void) {} #endif /* Call the installed machine check handler for this CPU setup. */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index faedd776847..6095e0296ab 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1286,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) return; switch (c->x86_vendor) { case X86_VENDOR_INTEL: - if (mce_p5_enabled()) - intel_p5_mcheck_init(c); + intel_p5_mcheck_init(c); break; case X86_VENDOR_CENTAUR: winchip_mcheck_init(c); @@ -2002,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { - if (mce_disabled == 1) + if (mce_disabled) return; switch (c->x86_vendor) { @@ -2032,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) static int __init mcheck_enable(char *str) { - mce_disabled = -1; + mce_p5_enabled = 1; return 1; } - __setup("mce", mcheck_enable); #endif /* CONFIG_X86_OLD_MCE */ diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 747853f9188..5c0e6533d9b 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -14,7 +14,7 @@ #include /* By default disabled */ -int mce_p5_enable; +int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) @@ -42,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; - /* Check for MCE support: */ - if (!cpu_has(c, X86_FEATURE_MCE)) + /* Default P5 to off as its often misconnected: */ + if (!mce_p5_enabled) return; -#ifdef CONFIG_X86_OLD_MCE - /* Default P5 to off as its often misconnected: */ - if (mce_disabled != -1) + /* Check for MCE support: */ + if (!cpu_has(c, X86_FEATURE_MCE)) return; -#endif machine_check_vector = pentium_machine_check; /* Make sure the vector pointer is visible before we enable MCEs: */ -- cgit v1.2.3-70-g09d2 From e8ce2c5ee826b3787202493effcd08d4b1e1e639 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:24:40 +0900 Subject: x86, mce: unify smp_thermal_interrupt, prepare Let them in same shape. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 25 ++++++++++++++----------- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 20 ++++++++++++++++++-- arch/x86/kernel/cpu/mcheck/p4.c | 10 ++++++---- 3 files changed, 38 insertions(+), 17 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6568cdedcd8..3bc827c0f40 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -110,6 +110,7 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_OLD_MCE +extern int nr_mce_banks; void amd_mcheck_init(struct cpuinfo_x86 *c); void intel_p4_mcheck_init(struct cpuinfo_x86 *c); void intel_p6_mcheck_init(struct cpuinfo_x86 *c); @@ -128,15 +129,6 @@ static inline void enable_p5_mce(void) {} /* Call the installed machine check handler for this CPU setup. */ extern void (*machine_check_vector)(struct pt_regs *, long error_code); -#ifdef CONFIG_X86_OLD_MCE -extern int nr_mce_banks; -extern void intel_set_thermal_handler(void); -#else -static inline void intel_set_thermal_handler(void) { } -#endif - -void intel_init_thermal(struct cpuinfo_x86 *c); - void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); @@ -175,8 +167,6 @@ int mce_available(struct cpuinfo_x86 *c); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); -void mce_log_therm_throt_event(__u64 status); - extern atomic_t mce_entry; void do_machine_check(struct pt_regs *, long); @@ -205,5 +195,18 @@ void mcheck_init(struct cpuinfo_x86 *c); extern void (*mce_threshold_vector)(void); +/* + * Thermal handler + */ + +void intel_set_thermal_handler(void); +void intel_init_thermal(struct cpuinfo_x86 *c); + +#ifdef CONFIG_X86_NEW_MCE +void mce_log_therm_throt_event(__u64 status); +#else +static inline void mce_log_therm_throt_event(__u64 status) {} +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index a5232b2c4ca..922e3a482f6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -16,6 +16,14 @@ #include #include +static void unexpected_thermal_interrupt(void) +{ + printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* P4/Xeon Thermal transition interrupt handler: */ static void intel_thermal_interrupt(void) { __u64 msr_val; @@ -25,14 +33,22 @@ static void intel_thermal_interrupt(void) mce_log_therm_throt_event(msr_val); } +/* Thermal interrupt handler for this CPU setup: */ +static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt; + asmlinkage void smp_thermal_interrupt(void) { - ack_APIC_irq(); exit_idle(); irq_enter(); - intel_thermal_interrupt(); inc_irq_stat(irq_thermal_count); + intel_thermal_interrupt(); irq_exit(); + ack_APIC_irq(); +} + +void intel_set_thermal_handler(void) +{ + vendor_thermal_interrupt = intel_thermal_interrupt; } /* diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index ddeed6e295a..75313f5b624 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -47,10 +48,9 @@ static void intel_thermal_interrupt(void) { __u64 msr_val; - ack_APIC_irq(); - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - therm_throt_process(msr_val & THERM_STATUS_PROCHOT); + if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) + mce_log_therm_throt_event(msr_val); } /* Thermal interrupt handler for this CPU setup: */ @@ -58,10 +58,12 @@ static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt; void smp_thermal_interrupt(struct pt_regs *regs) { + exit_idle(); irq_enter(); - vendor_thermal_interrupt(); inc_irq_stat(irq_thermal_count); + vendor_thermal_interrupt(); irq_exit(); + ack_APIC_irq(); } void intel_set_thermal_handler(void) -- cgit v1.2.3-70-g09d2 From 8363fc82d36c0886292e33925391dca93f03bd50 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:26:36 +0900 Subject: x86, mce: remove intel_set_thermal_handler() and make intel_thermal_interrupt() static. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 1 - arch/x86/kernel/cpu/mcheck/therm_throt.c | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 3bc827c0f40..365a594b41b 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -199,7 +199,6 @@ extern void (*mce_threshold_vector)(void); * Thermal handler */ -void intel_set_thermal_handler(void); void intel_init_thermal(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_NEW_MCE diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7a508aaafce..7c7944cc515 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -202,7 +202,7 @@ device_initcall(thermal_throttle_init_device); #endif /* CONFIG_SYSFS */ /* Thermal transition interrupt handler */ -void intel_thermal_interrupt(void) +static void intel_thermal_interrupt(void) { __u64 msr_val; @@ -231,11 +231,6 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } -void intel_set_thermal_handler(void) -{ - smp_thermal_vector = intel_thermal_interrupt; -} - void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -278,7 +273,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_THERM_INTERRUPT, l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); - intel_set_thermal_handler(); + smp_thermal_vector = intel_thermal_interrupt; rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); -- cgit v1.2.3-70-g09d2 From 1149e7264528723b8c3b075a90386ceb2e07070a Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:27:13 +0900 Subject: x86, mce: remove therm_throt.h Now all symbols in the header are static. Remove the header. Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/therm_throt.h | 9 --------- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 1 - arch/x86/kernel/cpu/mcheck/p4.c | 1 - arch/x86/kernel/cpu/mcheck/therm_throt.c | 5 ++--- 4 files changed, 2 insertions(+), 14 deletions(-) delete mode 100644 arch/x86/include/asm/therm_throt.h (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h deleted file mode 100644 index c62349ee786..00000000000 --- a/arch/x86/include/asm/therm_throt.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _ASM_X86_THERM_THROT_H -#define _ASM_X86_THERM_THROT_H - -#include - -extern atomic_t therm_throt_en; -int therm_throt_process(int curr); - -#endif /* _ASM_X86_THERM_THROT_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 3b7a0572e2f..663a88e8b3c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -11,7 +11,6 @@ #include #include #include -#include /* * Support for Intel Correct Machine Check Interrupts. This allows diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 76c5f0305f9..4482aea9aa2 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7c7944cc515..bff8dd191dd 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -24,7 +24,6 @@ #include #include -#include #include #include #include @@ -38,7 +37,7 @@ static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); -atomic_t therm_throt_en = ATOMIC_INIT(0); +static atomic_t therm_throt_en = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ @@ -93,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { * 1 : Event should be logged further, and a message has been * printed to the syslog. */ -int therm_throt_process(int curr) +static int therm_throt_process(int curr) { unsigned int cpu = smp_processor_id(); __u64 tmp_jiffs = get_jiffies_64(); -- cgit v1.2.3-70-g09d2 From 58995d2d58e8e555bc92582abe7554921deea3aa Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 15 Jun 2009 17:27:47 +0900 Subject: x86, mce: mce.h cleanup Reorder definitions. - static inline dummy mcheck_init() for !CONFIG_X86_MCE - gather defs for exception, threshold handler Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 365a594b41b..5cdd8d100ec 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -109,6 +109,12 @@ struct mce_log { extern int mce_disabled; extern int mce_p5_enabled; +#ifdef CONFIG_X86_MCE +void mcheck_init(struct cpuinfo_x86 *c); +#else +static inline void mcheck_init(struct cpuinfo_x86 *c) {} +#endif + #ifdef CONFIG_X86_OLD_MCE extern int nr_mce_banks; void amd_mcheck_init(struct cpuinfo_x86 *c); @@ -126,13 +132,9 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif -/* Call the installed machine check handler for this CPU setup. */ -extern void (*machine_check_vector)(struct pt_regs *, long error_code); - void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); -extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * To support more than 128 would need to escape the predefined @@ -169,8 +171,6 @@ DECLARE_PER_CPU(unsigned, mce_poll_count); extern atomic_t mce_entry; -void do_machine_check(struct pt_regs *, long); - typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); @@ -187,13 +187,20 @@ void mce_notify_process(void); DECLARE_PER_CPU(struct mce, injectm); extern struct file_operations mce_chrdev_ops; -#ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); -#else -#define mcheck_init(c) do { } while (0) -#endif +/* + * Exception handler + */ + +/* Call the installed machine check handler for this CPU setup. */ +extern void (*machine_check_vector)(struct pt_regs *, long error_code); +void do_machine_check(struct pt_regs *, long); + +/* + * Threshold handler + */ extern void (*mce_threshold_vector)(void); +extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); /* * Thermal handler -- cgit v1.2.3-70-g09d2 From 08604bd9935dc98fb62ef61d5b7baa7ccc10f8c2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Jun 2009 15:31:12 -0700 Subject: time: move PIT_TICK_RATE to linux/timex.h PIT_TICK_RATE is currently defined in four architectures, but in three different places. While linux/timex.h is not the perfect place for it, it is still a reasonable replacement for those drivers that traditionally use asm/timex.h to get CLOCK_TICK_RATE and expect it to be the PIT frequency. Note that for Alpha, the actual value changed from 1193182UL to 1193180UL. This is unlikely to make a difference, and probably can only improve accuracy. There was a discussion on the correct value of CLOCK_TICK_RATE a few years ago, after which every existing instance was getting changed to 1193182. According to the specification, it should be 1193181.818181... Signed-off-by: Arnd Bergmann Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Cc: john stultz Cc: Dmitry Torokhov Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/8253pit.h | 7 ------- arch/alpha/kernel/sys_ruffian.c | 1 + arch/mips/include/asm/i8253.h | 2 -- arch/powerpc/include/asm/8253pit.h | 7 ------- arch/x86/include/asm/timex.h | 4 +--- arch/x86/kernel/i8253.c | 1 + arch/x86/kernel/tsc.c | 1 + drivers/clocksource/acpi_pm.c | 1 + drivers/input/joystick/analog.c | 2 +- drivers/input/misc/pcspkr.c | 1 + include/linux/timex.h | 3 +++ sound/drivers/pcsp/pcsp.h | 1 + sound/oss/pas2_pcm.c | 2 +- 13 files changed, 12 insertions(+), 21 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/alpha/include/asm/8253pit.h b/arch/alpha/include/asm/8253pit.h index fef5c1450e4..a71c9c1455a 100644 --- a/arch/alpha/include/asm/8253pit.h +++ b/arch/alpha/include/asm/8253pit.h @@ -1,10 +1,3 @@ /* * 8253/8254 Programmable Interval Timer */ - -#ifndef _8253PIT_H -#define _8253PIT_H - -#define PIT_TICK_RATE 1193180UL - -#endif diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index f15a329b601..d9f9cfeb993 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h index 5dabc870b32..032ca73f181 100644 --- a/arch/mips/include/asm/i8253.h +++ b/arch/mips/include/asm/i8253.h @@ -12,8 +12,6 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 -#define PIT_TICK_RATE 1193182UL - extern spinlock_t i8253_lock; extern void setup_pit_timer(void); diff --git a/arch/powerpc/include/asm/8253pit.h b/arch/powerpc/include/asm/8253pit.h index b70d6e53b30..a71c9c1455a 100644 --- a/arch/powerpc/include/asm/8253pit.h +++ b/arch/powerpc/include/asm/8253pit.h @@ -1,10 +1,3 @@ -#ifndef _ASM_POWERPC_8253PIT_H -#define _ASM_POWERPC_8253PIT_H - /* * 8253/8254 Programmable Interval Timer */ - -#define PIT_TICK_RATE 1193182UL - -#endif /* _ASM_POWERPC_8253PIT_H */ diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index b5c9d45c981..1375cfc9396 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -4,9 +4,7 @@ #include #include -/* The PIT ticks at this frequency (in HZ): */ -#define PIT_TICK_RATE 1193182 - +/* Assume we use the PIT time source for the clock tick */ #define CLOCK_TICK_RATE PIT_TICK_RATE #define ARCH_HAS_READ_CURRENT_TIMER diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index c2e0bb0890d..5cf36c053ac 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3e1c057e98f..ae3180c506a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index 40bd8c61c7d..72a633a6ec9 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 356b3a25efa..1c0b529c06a 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #define DRIVER_DESC "Analog joystick and gamepad driver" diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c index d6a30cee7bc..6d67af5387a 100644 --- a/drivers/input/misc/pcspkr.c +++ b/drivers/input/misc/pcspkr.c @@ -17,6 +17,7 @@ #include #include #include +#include #include MODULE_AUTHOR("Vojtech Pavlik "); diff --git a/include/linux/timex.h b/include/linux/timex.h index 9910e3bd5b3..e6967d10d9e 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -280,6 +280,9 @@ extern int do_adjtimex(struct timex *); int read_current_timer(unsigned long *timer_val); +/* The clock frequency of the i8253/i8254 PIT */ +#define PIT_TICK_RATE 1193182ul + #endif /* KERNEL */ #endif /* LINUX_TIMEX_H */ diff --git a/sound/drivers/pcsp/pcsp.h b/sound/drivers/pcsp/pcsp.h index cdef2664218..174dd2ff0f2 100644 --- a/sound/drivers/pcsp/pcsp.h +++ b/sound/drivers/pcsp/pcsp.h @@ -10,6 +10,7 @@ #define __PCSP_H__ #include +#include #if defined(CONFIG_MIPS) || defined(CONFIG_X86) /* Use the global PIT lock ! */ #include diff --git a/sound/oss/pas2_pcm.c b/sound/oss/pas2_pcm.c index 36c3ea62086..8f7d175767a 100644 --- a/sound/oss/pas2_pcm.c +++ b/sound/oss/pas2_pcm.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include "sound_config.h" #include "pas2.h" -- cgit v1.2.3-70-g09d2 From e4c9dd0fbad60c098a026e9b06d9de1bc98c5e89 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 16 Jun 2009 15:33:47 -0700 Subject: kmap_types: make most arches use generic header file Convert most arches to use asm-generic/kmap_types.h. Move the KM_FENCE_ macro additions into asm-generic/kmap_types.h, controlled by __WITH_KM_FENCE from each arch's kmap_types.h file. Would be nice to be able to add custom KM_types per arch, but I don't yet see a nice, clean way to do that. Built on x86_64, i386, mips, sparc, alpha(tonyb), powerpc(tonyb), and 68k(tonyb). Note: avr32 should be able to remove KM_PTE2 (since it's not used) and then just use the generic kmap_types.h file. Get avr32 maintainer approval. Signed-off-by: Randy Dunlap Cc: Acked-by: Mike Frysinger Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Bryan Wu Cc: Mikael Starvik Cc: Hirokazu Takata Cc: "Luck Tony" Cc: Geert Uytterhoeven Cc: Ralf Baechle Cc: David Howells Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: Paul Mundt Cc: "David S. Miller" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Arnd Bergmann Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/kmap_types.h | 24 +++--------------------- arch/blackfin/include/asm/kmap_types.h | 17 +---------------- arch/cris/include/asm/kmap_types.h | 17 +---------------- arch/h8300/include/asm/kmap_types.h | 17 +---------------- arch/ia64/include/asm/kmap_types.h | 24 +++--------------------- arch/m32r/include/asm/kmap_types.h | 23 +++-------------------- arch/m68k/include/asm/kmap_types.h | 17 +---------------- arch/microblaze/include/asm/kmap_types.h | 25 +------------------------ arch/mips/include/asm/kmap_types.h | 24 +++--------------------- arch/mn10300/include/asm/kmap_types.h | 27 +-------------------------- arch/parisc/include/asm/kmap_types.h | 24 +++--------------------- arch/s390/include/asm/kmap_types.h | 17 +---------------- arch/sh/include/asm/kmap_types.h | 24 +++--------------------- arch/sparc/include/asm/kmap_types.h | 17 +---------------- arch/x86/include/asm/kmap_types.h | 23 +++-------------------- arch/xtensa/include/asm/kmap_types.h | 27 +-------------------------- include/asm-generic/kmap_types.h | 2 +- 17 files changed, 31 insertions(+), 318 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/alpha/include/asm/kmap_types.h b/arch/alpha/include/asm/kmap_types.h index 3e6735a34c5..a8d4ec8ea4b 100644 --- a/arch/alpha/include/asm/kmap_types.h +++ b/arch/alpha/include/asm/kmap_types.h @@ -3,30 +3,12 @@ /* Dummy header just to define km_type. */ - #ifdef CONFIG_DEBUG_HIGHMEM -# define D(n) __KM_FENCE_##n , -#else -# define D(n) +#define __WITH_KM_FENCE #endif -enum km_type { -D(0) KM_BOUNCE_READ, -D(1) KM_SKB_SUNRPC_DATA, -D(2) KM_SKB_DATA_SOFTIRQ, -D(3) KM_USER0, -D(4) KM_USER1, -D(5) KM_BIO_SRC_IRQ, -D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_IRQ0, -D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR -}; +#include -#undef D +#undef __WITH_KM_FENCE #endif diff --git a/arch/blackfin/include/asm/kmap_types.h b/arch/blackfin/include/asm/kmap_types.h index e215f710497..0a88622339e 100644 --- a/arch/blackfin/include/asm/kmap_types.h +++ b/arch/blackfin/include/asm/kmap_types.h @@ -1,21 +1,6 @@ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include #endif diff --git a/arch/cris/include/asm/kmap_types.h b/arch/cris/include/asm/kmap_types.h index 492988cb907..d2d643c4ea5 100644 --- a/arch/cris/include/asm/kmap_types.h +++ b/arch/cris/include/asm/kmap_types.h @@ -5,21 +5,6 @@ * is actually used on cris. */ -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include #endif diff --git a/arch/h8300/include/asm/kmap_types.h b/arch/h8300/include/asm/kmap_types.h index 1ec8a342712..be12a716011 100644 --- a/arch/h8300/include/asm/kmap_types.h +++ b/arch/h8300/include/asm/kmap_types.h @@ -1,21 +1,6 @@ #ifndef _ASM_H8300_KMAP_TYPES_H #define _ASM_H8300_KMAP_TYPES_H -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include #endif diff --git a/arch/ia64/include/asm/kmap_types.h b/arch/ia64/include/asm/kmap_types.h index 5d1658aa2b3..05d5f999610 100644 --- a/arch/ia64/include/asm/kmap_types.h +++ b/arch/ia64/include/asm/kmap_types.h @@ -1,30 +1,12 @@ #ifndef _ASM_IA64_KMAP_TYPES_H #define _ASM_IA64_KMAP_TYPES_H - #ifdef CONFIG_DEBUG_HIGHMEM -# define D(n) __KM_FENCE_##n , -#else -# define D(n) +#define __WITH_KM_FENCE #endif -enum km_type { -D(0) KM_BOUNCE_READ, -D(1) KM_SKB_SUNRPC_DATA, -D(2) KM_SKB_DATA_SOFTIRQ, -D(3) KM_USER0, -D(4) KM_USER1, -D(5) KM_BIO_SRC_IRQ, -D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_IRQ0, -D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR -}; +#include -#undef D +#undef __WITH_KM_FENCE #endif /* _ASM_IA64_KMAP_TYPES_H */ diff --git a/arch/m32r/include/asm/kmap_types.h b/arch/m32r/include/asm/kmap_types.h index fa94dc6410e..4cdb5e3a06b 100644 --- a/arch/m32r/include/asm/kmap_types.h +++ b/arch/m32r/include/asm/kmap_types.h @@ -2,28 +2,11 @@ #define __M32R_KMAP_TYPES_H #ifdef CONFIG_DEBUG_HIGHMEM -# define D(n) __KM_FENCE_##n , -#else -# define D(n) +#define __WITH_KM_FENCE #endif -enum km_type { -D(0) KM_BOUNCE_READ, -D(1) KM_SKB_SUNRPC_DATA, -D(2) KM_SKB_DATA_SOFTIRQ, -D(3) KM_USER0, -D(4) KM_USER1, -D(5) KM_BIO_SRC_IRQ, -D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_IRQ0, -D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR -}; +#include -#undef D +#undef __WITH_KM_FENCE #endif /* __M32R_KMAP_TYPES_H */ diff --git a/arch/m68k/include/asm/kmap_types.h b/arch/m68k/include/asm/kmap_types.h index c843c63d380..3413cc1390e 100644 --- a/arch/m68k/include/asm/kmap_types.h +++ b/arch/m68k/include/asm/kmap_types.h @@ -1,21 +1,6 @@ #ifndef __ASM_M68K_KMAP_TYPES_H #define __ASM_M68K_KMAP_TYPES_H -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include #endif /* __ASM_M68K_KMAP_TYPES_H */ diff --git a/arch/microblaze/include/asm/kmap_types.h b/arch/microblaze/include/asm/kmap_types.h index 4d7e222f5dd..25975252d83 100644 --- a/arch/microblaze/include/asm/kmap_types.h +++ b/arch/microblaze/include/asm/kmap_types.h @@ -1,29 +1,6 @@ -/* - * Copyright (C) 2006 Atmark Techno, Inc. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - #ifndef _ASM_MICROBLAZE_KMAP_TYPES_H #define _ASM_MICROBLAZE_KMAP_TYPES_H -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR, -}; +#include #endif /* _ASM_MICROBLAZE_KMAP_TYPES_H */ diff --git a/arch/mips/include/asm/kmap_types.h b/arch/mips/include/asm/kmap_types.h index 806aae3c533..58e91ed0388 100644 --- a/arch/mips/include/asm/kmap_types.h +++ b/arch/mips/include/asm/kmap_types.h @@ -1,30 +1,12 @@ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H - #ifdef CONFIG_DEBUG_HIGHMEM -# define D(n) __KM_FENCE_##n , -#else -# define D(n) +#define __WITH_KM_FENCE #endif -enum km_type { -D(0) KM_BOUNCE_READ, -D(1) KM_SKB_SUNRPC_DATA, -D(2) KM_SKB_DATA_SOFTIRQ, -D(3) KM_USER0, -D(4) KM_USER1, -D(5) KM_BIO_SRC_IRQ, -D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_IRQ0, -D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR -}; +#include -#undef D +#undef __WITH_KM_FENCE #endif diff --git a/arch/mn10300/include/asm/kmap_types.h b/arch/mn10300/include/asm/kmap_types.h index 3398f9f3560..76d093b58d4 100644 --- a/arch/mn10300/include/asm/kmap_types.h +++ b/arch/mn10300/include/asm/kmap_types.h @@ -1,31 +1,6 @@ -/* MN10300 kmap_atomic() slot IDs - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include #endif /* _ASM_KMAP_TYPES_H */ diff --git a/arch/parisc/include/asm/kmap_types.h b/arch/parisc/include/asm/kmap_types.h index 806aae3c533..58e91ed0388 100644 --- a/arch/parisc/include/asm/kmap_types.h +++ b/arch/parisc/include/asm/kmap_types.h @@ -1,30 +1,12 @@ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H - #ifdef CONFIG_DEBUG_HIGHMEM -# define D(n) __KM_FENCE_##n , -#else -# define D(n) +#define __WITH_KM_FENCE #endif -enum km_type { -D(0) KM_BOUNCE_READ, -D(1) KM_SKB_SUNRPC_DATA, -D(2) KM_SKB_DATA_SOFTIRQ, -D(3) KM_USER0, -D(4) KM_USER1, -D(5) KM_BIO_SRC_IRQ, -D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_IRQ0, -D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR -}; +#include -#undef D +#undef __WITH_KM_FENCE #endif diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h index fd157464822..94ec3ee0798 100644 --- a/arch/s390/include/asm/kmap_types.h +++ b/arch/s390/include/asm/kmap_types.h @@ -2,22 +2,7 @@ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include #endif #endif /* __KERNEL__ */ diff --git a/arch/sh/include/asm/kmap_types.h b/arch/sh/include/asm/kmap_types.h index 84d565c696b..5962b08b6dd 100644 --- a/arch/sh/include/asm/kmap_types.h +++ b/arch/sh/include/asm/kmap_types.h @@ -3,30 +3,12 @@ /* Dummy header just to define km_type. */ - #ifdef CONFIG_DEBUG_HIGHMEM -# define D(n) __KM_FENCE_##n , -#else -# define D(n) +#define __WITH_KM_FENCE #endif -enum km_type { -D(0) KM_BOUNCE_READ, -D(1) KM_SKB_SUNRPC_DATA, -D(2) KM_SKB_DATA_SOFTIRQ, -D(3) KM_USER0, -D(4) KM_USER1, -D(5) KM_BIO_SRC_IRQ, -D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_IRQ0, -D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR -}; +#include -#undef D +#undef __WITH_KM_FENCE #endif diff --git a/arch/sparc/include/asm/kmap_types.h b/arch/sparc/include/asm/kmap_types.h index 602f5e034f7..aad21745fbb 100644 --- a/arch/sparc/include/asm/kmap_types.h +++ b/arch/sparc/include/asm/kmap_types.h @@ -5,21 +5,6 @@ * is actually used on sparc. -DaveM */ -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include #endif diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h index 5759c165a5c..9e00a731a7f 100644 --- a/arch/x86/include/asm/kmap_types.h +++ b/arch/x86/include/asm/kmap_types.h @@ -2,28 +2,11 @@ #define _ASM_X86_KMAP_TYPES_H #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) -# define D(n) __KM_FENCE_##n , -#else -# define D(n) +#define __WITH_KM_FENCE #endif -enum km_type { -D(0) KM_BOUNCE_READ, -D(1) KM_SKB_SUNRPC_DATA, -D(2) KM_SKB_DATA_SOFTIRQ, -D(3) KM_USER0, -D(4) KM_USER1, -D(5) KM_BIO_SRC_IRQ, -D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_IRQ0, -D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_TYPE_NR -}; +#include -#undef D +#undef __WITH_KM_FENCE #endif /* _ASM_X86_KMAP_TYPES_H */ diff --git a/arch/xtensa/include/asm/kmap_types.h b/arch/xtensa/include/asm/kmap_types.h index 9e822d2e3bc..11c687e527f 100644 --- a/arch/xtensa/include/asm/kmap_types.h +++ b/arch/xtensa/include/asm/kmap_types.h @@ -1,31 +1,6 @@ -/* - * include/asm-xtensa/kmap_types.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ - #ifndef _XTENSA_KMAP_TYPES_H #define _XTENSA_KMAP_TYPES_H -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include #endif /* _XTENSA_KMAP_TYPES_H */ diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h index 58c33055c30..54e8b3d956b 100644 --- a/include/asm-generic/kmap_types.h +++ b/include/asm-generic/kmap_types.h @@ -1,7 +1,7 @@ #ifndef _ASM_GENERIC_KMAP_TYPES_H #define _ASM_GENERIC_KMAP_TYPES_H -#ifdef CONFIG_DEBUG_HIGHMEM +#ifdef __WITH_KM_FENCE # define D(n) __KM_FENCE_##n , #else # define D(n) -- cgit v1.2.3-70-g09d2 From 84599f8a59e77699f18f06948cea171a349a3f0f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jun 2009 12:34:17 -0700 Subject: sched, x86: Fix cpufreq + sched_clock() TSC scaling For freqency dependent TSCs we only scale the cycles, we do not account for the discrepancy in absolute value. Our current formula is: time = cycles * mult (where mult is a function of the cpu-speed on variable tsc machines) Suppose our current cycle count is 10, and we have a multiplier of 5, then our time value would end up being 50. Now cpufreq comes along and changes the multiplier to say 3 or 7, which would result in our time being resp. 30 or 70. That means that we can observe random jumps in the time value due to frequency changes in both fwd and bwd direction. So what this patch does is change the formula to: time = cycles * frequency + offset And we calculate offset so that time_before == time_after, thereby ridding us of these jumps in time. [ Impact: fix/reduce sched_clock() jumps across frequency changing events ] Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar Chucked-on-by: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/include/asm/timer.h | 6 +++++- arch/x86/kernel/tsc.c | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index bd37ed444a2..20ca9c4d468 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -45,12 +45,16 @@ extern int no_timer_check; */ DECLARE_PER_CPU(unsigned long, cyc2ns); +DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; + int cpu = smp_processor_id(); + unsigned long long ns = per_cpu(cyc2ns_offset, cpu); + ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR; + return ns; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3e1c057e98f..ef4dac50143 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -589,22 +589,26 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); */ DEFINE_PER_CPU(unsigned long, cyc2ns); +DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - unsigned long long tsc_now, ns_now; + unsigned long long tsc_now, ns_now, *offset; unsigned long flags, *scale; local_irq_save(flags); sched_clock_idle_sleep_event(); scale = &per_cpu(cyc2ns, cpu); + offset = &per_cpu(cyc2ns_offset, cpu); rdtscll(tsc_now); ns_now = __cycles_2_ns(tsc_now); - if (cpu_khz) + if (cpu_khz) { *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); + } sched_clock_idle_wakeup_event(0); local_irq_restore(flags); -- cgit v1.2.3-70-g09d2 From 8fa62ad9d24e24707164ac7a97cbe59fe78a8591 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 17 Jun 2009 14:11:10 +0530 Subject: x86: msr.h linux/types.h is only required for __KERNEL__ is only required for __KERNEL__ as whole file is covered with it Also fixed some spacing issues for usr/include/asm-x86/msr.h Signed-off-by: Jaswinder Singh Rajput Cc: "H. Peter Anvin" LKML-Reference: <1245228070.2662.1.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 22603764e7d..48ad9d29484 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -3,13 +3,10 @@ #include -#ifndef __ASSEMBLY__ -# include -#endif - #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +#include #include #include #include @@ -264,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) #endif /* CONFIG_SMP */ #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ - - #endif /* _ASM_X86_MSR_H */ -- cgit v1.2.3-70-g09d2 From bc3f5d3dbd576da94a575b1477b8e38551bf11da Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Thu, 18 Jun 2009 00:35:59 +0200 Subject: x86: de-assembler-ize asm/desc.h asm/desc.h is included in three assembly files, but the only macro it defines, GET_DESC_BASE, is never used. This patch removes the includes, removes the macro GET_DESC_BASE and the ASSEMBLY guard from asm/desc.h. Signed-off-by: Alexander van Heukelum Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/desc.h | 26 -------------------------- arch/x86/kernel/entry_32.S | 1 - arch/x86/kernel/head_32.S | 1 - arch/x86/kernel/head_64.S | 1 - 4 files changed, 29 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index c45f415ce31..c993e9e0fed 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_DESC_H #define _ASM_X86_DESC_H -#ifndef __ASSEMBLY__ #include #include #include @@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); } -#else -/* - * GET_DESC_BASE reads the descriptor base of the specified segment. - * - * Args: - * idx - descriptor index - * gdt - GDT pointer - * base - 32bit register to which the base will be written - * lo_w - lo word of the "base" register - * lo_b - lo byte of the "base" register - * hi_b - hi byte of the low word of the "base" register - * - * Example: - * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) - * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. - */ -#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ - movb idx * 8 + 4(gdt), lo_b; \ - movb idx * 8 + 7(gdt), hi_b; \ - shll $16, base; \ - movw idx * 8 + 2(gdt), lo_w; - - -#endif /* __ASSEMBLY__ */ - #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 848f73f0954..9f8ce77dbc6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index dc5ed4bdd88..8663afb5653 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 54b29bb24e7..fa54f78e2a0 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-70-g09d2 From 7c095e4603dd6ce78ff5b9b70896fe3e05c13f5c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 17 Jun 2009 16:28:12 -0700 Subject: dma-mapping: x86: use asm-generic/dma-mapping-common.h Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Acked-by: Ingo Molnar Cc: "Luck, Tony" Cc: Arnd Bergmann Cc: James Bottomley Cc: "David S. Miller" Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/dma-mapping.h | 173 +------------------------------------ 2 files changed, 3 insertions(+), 171 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cf42fc30541..73c0bda73fc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -28,6 +28,7 @@ config X86 select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS + select HAVE_DMA_ATTRS select HAVE_KRETPROBES select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index b93405b228b..1c3f9435f1c 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -33,6 +33,8 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) #endif } +#include + /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { @@ -53,177 +55,6 @@ extern int dma_set_mask(struct device *dev, u64 mask); extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag); -static inline dma_addr_t -dma_map_single(struct device *hwdev, void *ptr, size_t size, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - dma_addr_t addr; - - BUG_ON(!valid_dma_direction(dir)); - kmemcheck_mark_initialized(ptr, size); - addr = ops->map_page(hwdev, virt_to_page(ptr), - (unsigned long)ptr & ~PAGE_MASK, size, - dir, NULL); - debug_dma_map_page(hwdev, virt_to_page(ptr), - (unsigned long)ptr & ~PAGE_MASK, size, - dir, addr, true); - return addr; -} - -static inline void -dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_page) - ops->unmap_page(dev, addr, size, dir, NULL); - debug_dma_unmap_page(dev, addr, size, dir, true); -} - -static inline int -dma_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - int ents; - struct scatterlist *s; - int i; - - BUG_ON(!valid_dma_direction(dir)); - for_each_sg(sg, s, nents, i) - kmemcheck_mark_initialized(sg_virt(s), s->length); - ents = ops->map_sg(hwdev, sg, nents, dir, NULL); - debug_dma_map_sg(hwdev, sg, nents, ents, dir); - - return ents; -} - -static inline void -dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - - BUG_ON(!valid_dma_direction(dir)); - debug_dma_unmap_sg(hwdev, sg, nents, dir); - if (ops->unmap_sg) - ops->unmap_sg(hwdev, sg, nents, dir, NULL); -} - -static inline void -dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_cpu) - ops->sync_single_for_cpu(hwdev, dma_handle, size, dir); - debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir); - flush_write_buffers(); -} - -static inline void -dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_device) - ops->sync_single_for_device(hwdev, dma_handle, size, dir); - debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir); - flush_write_buffers(); -} - -static inline void -dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_range_for_cpu) - ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, - size, dir); - debug_dma_sync_single_range_for_cpu(hwdev, dma_handle, - offset, size, dir); - flush_write_buffers(); -} - -static inline void -dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_range_for_device) - ops->sync_single_range_for_device(hwdev, dma_handle, - offset, size, dir); - debug_dma_sync_single_range_for_device(hwdev, dma_handle, - offset, size, dir); - flush_write_buffers(); -} - -static inline void -dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_sg_for_cpu) - ops->sync_sg_for_cpu(hwdev, sg, nelems, dir); - debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir); - flush_write_buffers(); -} - -static inline void -dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(hwdev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_sg_for_device) - ops->sync_sg_for_device(hwdev, sg, nelems, dir); - debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir); - - flush_write_buffers(); -} - -static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, - size_t offset, size_t size, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; - - BUG_ON(!valid_dma_direction(dir)); - kmemcheck_mark_initialized(page_address(page) + offset, size); - addr = ops->map_page(dev, page, offset, size, dir, NULL); - debug_dma_map_page(dev, page, offset, size, dir, addr, false); - - return addr; -} - -static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_page) - ops->unmap_page(dev, addr, size, dir, NULL); - debug_dma_unmap_page(dev, addr, size, dir, false); -} - static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) -- cgit v1.2.3-70-g09d2 From 0c87197142427063e096f11603543ca874045952 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Jun 2009 11:35:01 +0200 Subject: perf_counter, x86: Improve interactions with fast-gup Improve a few details in perfcounter call-chain recording that makes use of fast-GUP: - Use ACCESS_ONCE() to observe the pte value. ptes are fundamentally racy and can be changed on another CPU, so we have to be careful about how we access them. The PAE branch is already careful with read-barriers - but the non-PAE and 64-bit side needs an ACCESS_ONCE() to make sure the pte value is observed only once. - make the checks a bit stricter so that we can feed it any kind of cra^H^H^H user-space input ;-) Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 7 ++++++- arch/x86/mm/gup.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b685ece89d5..512ee87062c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -25,7 +25,12 @@ #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) #define KERNEL_DS MAKE_MM_SEG(-1UL) -#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#ifdef CONFIG_X86_32 +# define USER_DS MAKE_MM_SEG(PAGE_OFFSET) +#else +# define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK) +#endif #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 697d5727c11..2d1d784ad3f 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -14,7 +14,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) { #ifndef CONFIG_X86_PAE - return *ptep; + return ACCESS_ONCE(*ptep); #else /* * With get_user_pages_fast, we walk down the pagetables without taking -- cgit v1.2.3-70-g09d2 From 9063c61fd5cbd6f42e95929aa0e02380c9e15656 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 20 Jun 2009 15:40:00 -0700 Subject: x86, 64-bit: Clean up user address masking The discussion about using "access_ok()" in get_user_pages_fast() (see commit 7f8189068726492950bf1a2dcfd9b51314560abf: "x86: don't use 'access_ok()' as a range check in get_user_pages_fast()" for details and end result), made us notice that x86-64 was really being very sloppy about virtual address checking. So be way more careful and straightforward about masking x86-64 virtual addresses: - All the VIRTUAL_MASK* variants now cover half of the address space, it's not like we can use the full mask on a signed integer, and the larger mask just invites mistakes when applying it to either half of the 48-bit address space. - /proc/kcore's kc_offset_to_vaddr() becomes a lot more obvious when it transforms a file offset into a (kernel-half) virtual address. - Unify/simplify the 32-bit and 64-bit USER_DS definition to be based on TASK_SIZE_MAX. This cleanup and more careful/obvious user virtual address checking also uncovered a buglet in the x86-64 implementation of strnlen_user(): it would do an "access_ok()" check on the whole potential area, even if the string itself was much shorter, and thus return an error even for valid strings. Our sloppy checking had hidden this. So this fixes 'strnlen_user()' to do this properly, the same way we already handled user strings in 'strncpy_from_user()'. Namely by just checking the first byte, and then relying on fault handling for the rest. That always works, since we impose a guard page that cannot be mapped at the end of the user space address space (and even if we didn't, we'd have the address space hole). Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: Nick Piggin Cc: Hugh Dickins Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Alan Cox Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_64_types.h | 2 +- arch/x86/include/asm/pgtable_64.h | 5 +---- arch/x86/include/asm/uaccess.h | 7 +------ arch/x86/lib/usercopy_64.c | 2 +- 4 files changed, 4 insertions(+), 12 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 8d382d3abf3..7639dbf5d22 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -41,7 +41,7 @@ /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 46 -#define __VIRTUAL_MASK_SHIFT 48 +#define __VIRTUAL_MASK_SHIFT 47 /* * Kernel image size is limited to 512 MB (see level2_kernel_pgt in diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index abde308fdb0..c57a3011714 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -165,10 +165,7 @@ extern void cleanup_highmap(void); /* fs/proc/kcore.c */ #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) -#define kc_offset_to_vaddr(o) \ - (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \ - ? ((o) | ~__VIRTUAL_MASK) \ - : (o)) +#define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) #define __HAVE_ARCH_PTE_SAME #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 512ee87062c..20e6a795e16 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -25,12 +25,7 @@ #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) #define KERNEL_DS MAKE_MM_SEG(-1UL) - -#ifdef CONFIG_X86_32 -# define USER_DS MAKE_MM_SEG(PAGE_OFFSET) -#else -# define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK) -#endif +#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index ec13cb5f17e..b7c2849ffb6 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL(__strnlen_user); long strnlen_user(const char __user *s, long n) { - if (!access_ok(VERIFY_READ, s, n)) + if (!access_ok(VERIFY_READ, s, 1)) return 0; return __strnlen_user(s, n); } -- cgit v1.2.3-70-g09d2 From e59a1bb2fdfb745c685f5b40ffbed126331d3223 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Jun 2009 11:56:24 +0900 Subject: x86: fix pageattr handling for lpage percpu allocator and re-enable it lpage allocator aliases a PMD page for each cpu and returns whatever is unused to the page allocator. When the pageattr of the recycled pages are changed, this makes the two aliases point to the overlapping regions with different attributes which isn't allowed and known to cause subtle data corruption in certain cases. This can be handled in simliar manner to the x86_64 highmap alias. pageattr code should detect if the target pages have PMD alias and split the PMD alias and synchronize the attributes. pcpur allocator is updated to keep the allocated PMD pages map sorted in ascending address order and provide pcpu_lpage_remapped() function which binary searches the array to determine whether the given address is aliased and if so to which address. pageattr is updated to use pcpu_lpage_remapped() to detect the PMD alias and split it up as necessary from cpa_process_alias(). Jan Beulich spotted the original problem and incorrect usage of vaddr instead of laddr for lookup. With this, lpage percpu allocator should work correctly. Re-enable it. [ Impact: fix subtle lpage pageattr bug and re-enable lpage ] Signed-off-by: Tejun Heo Reported-by: Jan Beulich Cc: Andi Kleen Cc: Ingo Molnar --- arch/x86/include/asm/percpu.h | 10 ++++++ arch/x86/kernel/setup_percpu.c | 72 ++++++++++++++++++++++++++++++++++++------ arch/x86/mm/pageattr.c | 21 +++++++++++- 3 files changed, 93 insertions(+), 10 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 02ecb30982a..103f1ddb0d8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -42,6 +42,7 @@ #else /* ...!ASSEMBLY */ +#include #include #ifdef CONFIG_SMP @@ -155,6 +156,15 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); +#ifdef CONFIG_NEED_MULTIPLE_NODES +void *pcpu_lpage_remapped(void *kaddr); +#else +static inline void *pcpu_lpage_remapped(void *kaddr) +{ + return NULL; +} +#endif + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 7d38941e2b8..bad2fd22311 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -142,8 +142,8 @@ struct pcpul_ent { void *ptr; }; -static size_t pcpul_size __initdata; -static struct pcpul_ent *pcpul_map __initdata; +static size_t pcpul_size; +static struct pcpul_ent *pcpul_map; static struct vm_struct pcpul_vm; static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) @@ -160,15 +160,14 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size) { size_t map_size, dyn_size; unsigned int cpu; + int i, j; ssize_t ret; /* * If large page isn't supported, there's no benefit in doing * this. Also, on non-NUMA, embedding is better. - * - * NOTE: disabled for now. */ - if (true || !cpu_has_pse || !pcpu_need_numa()) + if (!cpu_has_pse || !pcpu_need_numa()) return -EINVAL; /* @@ -231,16 +230,71 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size) ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, PERCPU_FIRST_CHUNK_RESERVE, dyn_size, PMD_SIZE, pcpul_vm.addr, NULL); - goto out_free_map; + + /* sort pcpul_map array for pcpu_lpage_remapped() */ + for (i = 0; i < num_possible_cpus() - 1; i++) + for (j = i + 1; j < num_possible_cpus(); j++) + if (pcpul_map[i].ptr > pcpul_map[j].ptr) { + struct pcpul_ent tmp = pcpul_map[i]; + pcpul_map[i] = pcpul_map[j]; + pcpul_map[j] = tmp; + } + + return ret; enomem: for_each_possible_cpu(cpu) if (pcpul_map[cpu].ptr) free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); - ret = -ENOMEM; -out_free_map: free_bootmem(__pa(pcpul_map), map_size); - return ret; + return -ENOMEM; +} + +/** + * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area + * @kaddr: the kernel address in question + * + * Determine whether @kaddr falls in the pcpul recycled area. This is + * used by pageattr to detect VM aliases and break up the pcpu PMD + * mapping such that the same physical page is not mapped under + * different attributes. + * + * The recycled area is always at the tail of a partially used PMD + * page. + * + * RETURNS: + * Address of corresponding remapped pcpu address if match is found; + * otherwise, NULL. + */ +void *pcpu_lpage_remapped(void *kaddr) +{ + void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); + unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; + int left = 0, right = num_possible_cpus() - 1; + int pos; + + /* pcpul in use at all? */ + if (!pcpul_map) + return NULL; + + /* okay, perform binary search */ + while (left <= right) { + pos = (left + right) / 2; + + if (pcpul_map[pos].ptr < pmd_addr) + left = pos + 1; + else if (pcpul_map[pos].ptr > pmd_addr) + right = pos - 1; + else { + /* it shouldn't be in the area for the first chunk */ + WARN_ON(offset < pcpul_size); + + return pcpul_vm.addr + + pcpul_map[pos].cpu * PMD_SIZE + offset; + } + } + + return NULL; } #else static ssize_t __init setup_pcpu_lpage(size_t static_size) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2ab058b0947..1b734d7a896 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -682,7 +683,7 @@ static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr; + unsigned long vaddr, remapped; int ret; if (cpa->pfn >= max_pfn_mapped) @@ -737,6 +738,24 @@ static int cpa_process_alias(struct cpa_data *cpa) } #endif + /* + * If the PMD page was partially used for per-cpu remapping, + * the recycled area needs to be split and modified. Because + * the area is always proper subset of a PMD page + * cpa->numpages is guaranteed to be 1 for these areas, so + * there's no need to loop over and check for further remaps. + */ + remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); + if (remapped) { + WARN_ON(cpa->numpages > 1); + alias_cpa = *cpa; + alias_cpa.vaddr = &remapped; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + ret = __change_page_attr_set_clr(&alias_cpa, 0); + if (ret) + return ret; + } + return 0; } -- cgit v1.2.3-70-g09d2 From 236e946b53ffd5e2f5d7e6abebbe72a9f0826d15 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 24 Jun 2009 16:23:03 -0700 Subject: Revert "PCI: use ACPI _CRS data by default" This reverts commit 9e9f46c44e487af0a82eb61b624553e2f7118f5b. Quoting from the commit message: "At this point, it seems to solve more problems than it causes, so let's try using it by default. It's an easy revert if it ends up causing trouble." And guess what? The _CRS code causes trouble. Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 2 +- arch/x86/include/asm/pci_x86.h | 2 +- arch/x86/pci/acpi.c | 2 +- arch/x86/pci/amd_bus.c | 2 +- arch/x86/pci/common.c | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 040fee60728..d08759aa090 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1855,7 +1855,7 @@ and is between 256 and 4096 characters. It is defined in the file IRQ routing is enabled. noacpi [X86] Do not use ACPI for IRQ routing or for PCI scanning. - nocrs [X86] Don't use _CRS for PCI resource + use_crs [X86] Use _CRS for PCI resource allocation. routeirq Do IRQ routing for all PCI devices. This is normally done in pci_enable_device(), diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index d419f5c0266..b399988eee3 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -25,7 +25,7 @@ #define PCI_BIOS_IRQ_SCAN 0x2000 #define PCI_ASSIGN_ALL_BUSSES 0x4000 #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 -#define PCI_NO_ROOT_CRS 0x10000 +#define PCI_USE__CRS 0x10000 #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 #define PCI_HAS_IO_ECS 0x40000 #define PCI_NOASSIGN_ROMS 0x80000 diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 16c3fda85bb..b26626dc517 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -238,7 +238,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do #endif } - if (bus && !(pci_probe & PCI_NO_ROOT_CRS)) + if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); return bus; } diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 2255f880678..f893d6a6e80 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -101,7 +101,7 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) struct pci_root_info *info; /* don't go for it if _CRS is used */ - if (!(pci_probe & PCI_NO_ROOT_CRS)) + if (pci_probe & PCI_USE__CRS) return; /* if only one root bus, don't need to anything */ diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 4740119e4bb..2202b6257b8 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -515,8 +515,8 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "assign-busses")) { pci_probe |= PCI_ASSIGN_ALL_BUSSES; return NULL; - } else if (!strcmp(str, "nocrs")) { - pci_probe |= PCI_NO_ROOT_CRS; + } else if (!strcmp(str, "use_crs")) { + pci_probe |= PCI_USE__CRS; return NULL; } else if (!strcmp(str, "earlydump")) { pci_early_dump_regs = 1; -- cgit v1.2.3-70-g09d2 From 22f4319d6bc0155e6c0ae560729baa6c09dc09e7 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 25 Jun 2009 16:20:48 -0400 Subject: x86, setup: Fix typo "CONFIG_x86_64" in CONFIG_X86_64 was misspelled (wrong case), which caused the x86-64 kernel to advertise itself as more relocatable than it really is. This could in theory cause boot failures once bootloaders start support the new relocation fields. Signed-off-by: Robert P. J. Day Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/boot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 418e632d4a8..6f02b9a5384 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -16,7 +16,7 @@ & ~(CONFIG_PHYSICAL_ALIGN - 1)) /* Minimum kernel alignment, as a power of two */ -#ifdef CONFIG_x86_64 +#ifdef CONFIG_X86_64 #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT #else #define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) -- cgit v1.2.3-70-g09d2 From 658dbfeb5e7ab35d440c665d643a6285e43fddcd Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 25 Jun 2009 15:16:06 -0700 Subject: x86, setup: correct include file in needs , not in order to resolve PMD_SHIFT. Also, correct a +1 which really should be + THREAD_ORDER. This is a build error which was masked by a typoed #ifdef. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/boot.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 6f02b9a5384..7a1065958ba 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -8,7 +8,7 @@ #ifdef __KERNEL__ -#include +#include /* Physical address where kernel should be loaded. */ #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ @@ -19,7 +19,7 @@ #ifdef CONFIG_X86_64 #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT #else -#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) +#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) #endif #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) -- cgit v1.2.3-70-g09d2