diff options
-rw-r--r-- | Documentation/ia64/aliasing.txt | 208 | ||||
-rw-r--r-- | arch/ia64/Kconfig | 2 | ||||
-rw-r--r-- | arch/ia64/Makefile | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/asm-offsets.c | 16 | ||||
-rw-r--r-- | arch/ia64/kernel/efi.c | 156 | ||||
-rw-r--r-- | arch/ia64/kernel/efi_stub.S | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.h | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_asm.S | 28 | ||||
-rw-r--r-- | arch/ia64/kernel/sal.c | 6 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 1 | ||||
-rw-r--r-- | arch/ia64/mm/ioremap.c | 27 | ||||
-rw-r--r-- | arch/ia64/pci/pci.c | 17 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn_hwperf.c | 50 | ||||
-rw-r--r-- | arch/ia64/sn/pci/tioce_provider.c | 4 | ||||
-rw-r--r-- | include/asm-ia64/io.h | 1 | ||||
-rw-r--r-- | include/asm-ia64/mca.h | 9 | ||||
-rw-r--r-- | include/asm-ia64/pgtable.h | 22 | ||||
-rw-r--r-- | include/asm-ia64/sn/sn_sal.h | 2 | ||||
-rw-r--r-- | include/linux/efi.h | 1 |
19 files changed, 432 insertions, 123 deletions
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt new file mode 100644 index 00000000000..38f9a52d182 --- /dev/null +++ b/Documentation/ia64/aliasing.txt @@ -0,0 +1,208 @@ + MEMORY ATTRIBUTE ALIASING ON IA-64 + + Bjorn Helgaas + <bjorn.helgaas@hp.com> + May 4, 2006 + + +MEMORY ATTRIBUTES + + Itanium supports several attributes for virtual memory references. + The attribute is part of the virtual translation, i.e., it is + contained in the TLB entry. The ones of most interest to the Linux + kernel are: + + WB Write-back (cacheable) + UC Uncacheable + WC Write-coalescing + + System memory typically uses the WB attribute. The UC attribute is + used for memory-mapped I/O devices. The WC attribute is uncacheable + like UC is, but writes may be delayed and combined to increase + performance for things like frame buffers. + + The Itanium architecture requires that we avoid accessing the same + page with both a cacheable mapping and an uncacheable mapping[1]. + + The design of the chipset determines which attributes are supported + on which regions of the address space. For example, some chipsets + support either WB or UC access to main memory, while others support + only WB access. + +MEMORY MAP + + Platform firmware describes the physical memory map and the + supported attributes for each region. At boot-time, the kernel uses + the EFI GetMemoryMap() interface. ACPI can also describe memory + devices and the attributes they support, but Linux/ia64 currently + doesn't use this information. + + The kernel uses the efi_memmap table returned from GetMemoryMap() to + learn the attributes supported by each region of physical address + space. Unfortunately, this table does not completely describe the + address space because some machines omit some or all of the MMIO + regions from the map. + + The kernel maintains another table, kern_memmap, which describes the + memory Linux is actually using and the attribute for each region. + This contains only system memory; it does not contain MMIO space. + + The kern_memmap table typically contains only a subset of the system + memory described by the efi_memmap. Linux/ia64 can't use all memory + in the system because of constraints imposed by the identity mapping + scheme. + + The efi_memmap table is preserved unmodified because the original + boot-time information is required for kexec. + +KERNEL IDENTITY MAPPINGS + + Linux/ia64 identity mappings are done with large pages, currently + either 16MB or 64MB, referred to as "granules." Cacheable mappings + are speculative[2], so the processor can read any location in the + page at any time, independent of the programmer's intentions. This + means that to avoid attribute aliasing, Linux can create a cacheable + identity mapping only when the entire granule supports cacheable + access. + + Therefore, kern_memmap contains only full granule-sized regions that + can referenced safely by an identity mapping. + + Uncacheable mappings are not speculative, so the processor will + generate UC accesses only to locations explicitly referenced by + software. This allows UC identity mappings to cover granules that + are only partially populated, or populated with a combination of UC + and WB regions. + +USER MAPPINGS + + User mappings are typically done with 16K or 64K pages. The smaller + page size allows more flexibility because only 16K or 64K has to be + homogeneous with respect to memory attributes. + +POTENTIAL ATTRIBUTE ALIASING CASES + + There are several ways the kernel creates new mappings: + + mmap of /dev/mem + + This uses remap_pfn_range(), which creates user mappings. These + mappings may be either WB or UC. If the region being mapped + happens to be in kern_memmap, meaning that it may also be mapped + by a kernel identity mapping, the user mapping must use the same + attribute as the kernel mapping. + + If the region is not in kern_memmap, the user mapping should use + an attribute reported as being supported in the EFI memory map. + + Since the EFI memory map does not describe MMIO on some + machines, this should use an uncacheable mapping as a fallback. + + mmap of /sys/class/pci_bus/.../legacy_mem + + This is very similar to mmap of /dev/mem, except that legacy_mem + only allows mmap of the one megabyte "legacy MMIO" area for a + specific PCI bus. Typically this is the first megabyte of + physical address space, but it may be different on machines with + several VGA devices. + + "X" uses this to access VGA frame buffers. Using legacy_mem + rather than /dev/mem allows multiple instances of X to talk to + different VGA cards. + + The /dev/mem mmap constraints apply. + + However, since this is for mapping legacy MMIO space, WB access + does not make sense. This matters on machines without legacy + VGA support: these machines may have WB memory for the entire + first megabyte (or even the entire first granule). + + On these machines, we could mmap legacy_mem as WB, which would + be safe in terms of attribute aliasing, but X has no way of + knowing that it is accessing regular memory, not a frame buffer, + so the kernel should fail the mmap rather than doing it with WB. + + read/write of /dev/mem + + This uses copy_from_user(), which implicitly uses a kernel + identity mapping. This is obviously safe for things in + kern_memmap. + + There may be corner cases of things that are not in kern_memmap, + but could be accessed this way. For example, registers in MMIO + space are not in kern_memmap, but could be accessed with a UC + mapping. This would not cause attribute aliasing. But + registers typically can be accessed only with four-byte or + eight-byte accesses, and the copy_from_user() path doesn't allow + any control over the access size, so this would be dangerous. + + ioremap() + + This returns a kernel identity mapping for use inside the + kernel. + + If the region is in kern_memmap, we should use the attribute + specified there. Otherwise, if the EFI memory map reports that + the entire granule supports WB, we should use that (granules + that are partially reserved or occupied by firmware do not appear + in kern_memmap). Otherwise, we should use a UC mapping. + +PAST PROBLEM CASES + + mmap of various MMIO regions from /dev/mem by "X" on Intel platforms + + The EFI memory map may not report these MMIO regions. + + These must be allowed so that X will work. This means that + when the EFI memory map is incomplete, every /dev/mem mmap must + succeed. It may create either WB or UC user mappings, depending + on whether the region is in kern_memmap or the EFI memory map. + + mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled + + See https://bugzilla.novell.com/show_bug.cgi?id=140858. + + The EFI memory map reports the following attributes: + 0x00000-0x9FFFF WB only + 0xA0000-0xBFFFF UC only (VGA frame buffer) + 0xC0000-0xFFFFF WB only + + This mmap is done with user pages, not kernel identity mappings, + so it is safe to use WB mappings. + + The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, + which will use a granule-sized UC mapping covering 0-0xFFFFF. This + granule covers some WB-only memory, but since UC is non-speculative, + the processor will never generate an uncacheable reference to the + WB-only areas unless the driver explicitly touches them. + + mmap of 0x0-0xFFFFF legacy_mem by "X" + + If the EFI memory map reports this entire range as WB, there + is no VGA MMIO hole, and the mmap should fail or be done with + a WB mapping. + + There's no easy way for X to determine whether the 0xA0000-0xBFFFF + region is a frame buffer or just memory, so I think it's best to + just fail this mmap request rather than using a WB mapping. As + far as I know, there's no need to map legacy_mem with WB + mappings. + + Otherwise, a UC mapping of the entire region is probably safe. + The VGA hole means the region will not be in kern_memmap. The + HP sx1000 chipset doesn't support UC access to the memory surrounding + the VGA hole, but X doesn't need that area anyway and should not + reference it. + + mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled + + The EFI memory map reports the following attributes: + 0x00000-0xFFFFF WB only (no VGA MMIO hole) + + This is a special case of the previous case, and the mmap should + fail for the same reason as above. + +NOTES + + [1] SDM rev 2.2, vol 2, sec 4.4.1. + [2] SDM rev 2.2, vol 2, sec 4.4.6. diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index fbb25b00629..18318749884 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -449,6 +449,8 @@ config PCI_DOMAINS bool default PCI +source "drivers/pci/pcie/Kconfig" + source "drivers/pci/Kconfig" source "drivers/pci/hotplug/Kconfig" diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 80ea7506fa1..21033ed8330 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -71,6 +71,8 @@ all: compressed unwcheck compressed: vmlinux.gz +vmlinuz: vmlinux.gz + vmlinux.gz: vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 77225659e96..16e7b6600ae 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -217,16 +217,24 @@ void foo(void) DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, offsetof (struct ia64_mca_cpu, init_stack)); BLANK(); - DEFINE(IA64_SAL_OS_STATE_COMMON_OFFSET, - offsetof (struct ia64_sal_os_state, sal_ra)); DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET, offsetof (struct ia64_sal_os_state, os_gp)); - DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, - offsetof (struct ia64_sal_os_state, pal_min_state)); DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, offsetof (struct ia64_sal_os_state, proc_state_param)); + DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET, + offsetof (struct ia64_sal_os_state, sal_ra)); + DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET, + offsetof (struct ia64_sal_os_state, sal_gp)); + DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, + offsetof (struct ia64_sal_os_state, pal_min_state)); + DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET, + offsetof (struct ia64_sal_os_state, os_status)); + DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET, + offsetof (struct ia64_sal_os_state, context)); DEFINE(IA64_SAL_OS_STATE_SIZE, sizeof (struct ia64_sal_os_state)); + BLANK(); + DEFINE(IA64_PMSA_GR_OFFSET, offsetof (struct pal_min_state_area_s, pmsa_gr)); DEFINE(IA64_PMSA_BANK1_GR_OFFSET, diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 12cfedce73b..c33d0ba7e30 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -8,6 +8,8 @@ * Copyright (C) 1999-2003 Hewlett-Packard Co. * David Mosberger-Tang <davidm@hpl.hp.com> * Stephane Eranian <eranian@hpl.hp.com> + * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas <bjorn.helgaas@hp.com> * * All EFI Runtime Services are not implemented yet as EFI only * supports physical mode addressing on SoftSDV. This is to be fixed @@ -622,28 +624,20 @@ efi_get_iobase (void) return 0; } -static efi_memory_desc_t * -efi_memory_descriptor (unsigned long phys_addr) +static struct kern_memdesc * +kern_memory_descriptor (unsigned long phys_addr) { - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; + struct kern_memdesc *md; - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - - if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) + for (md = kern_memmap; md->start != ~0UL; md++) { + if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT)) return md; } return 0; } -static int -efi_memmap_has_mmio (void) +static efi_memory_desc_t * +efi_memory_descriptor (unsigned long phys_addr) { void *efi_map_start, *efi_map_end, *p; efi_memory_desc_t *md; @@ -656,8 +650,8 @@ efi_memmap_has_mmio (void) for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { md = p; - if (md->type == EFI_MEMORY_MAPPED_IO) - return 1; + if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) + return md; } return 0; } @@ -683,71 +677,125 @@ efi_mem_attributes (unsigned long phys_addr) } EXPORT_SYMBOL(efi_mem_attributes); -/* - * Determines whether the memory at phys_addr supports the desired - * attribute (WB, UC, etc). If this returns 1, the caller can safely - * access size bytes at phys_addr with the specified attribute. - */ -int -efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr) +u64 +efi_mem_attribute (unsigned long phys_addr, unsigned long size) { unsigned long end = phys_addr + size; efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + u64 attr; + + if (!md) + return 0; + + /* + * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells + * the kernel that firmware needs this region mapped. + */ + attr = md->attribute & ~EFI_MEMORY_RUNTIME; + do { + unsigned long md_end = efi_md_end(md); + + if (end <= md_end) + return attr; + + md = efi_memory_descriptor(md_end); + if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr) + return 0; + } while (md); + return 0; +} + +u64 +kern_mem_attribute (unsigned long phys_addr, unsigned long size) +{ + unsigned long end = phys_addr + size; + struct kern_memdesc *md; + u64 attr; /* - * Some firmware doesn't report MMIO regions in the EFI memory - * map. The Intel BigSur (a.k.a. HP i2000) has this problem. - * On those platforms, we have to assume UC is valid everywhere. + * This is a hack for ioremap calls before we set up kern_memmap. + * Maybe we should do efi_memmap_init() earlier instead. */ - if (!md || (md->attribute & attr) != attr) { - if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio()) - return 1; + if (!kern_memmap) { + attr = efi_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return EFI_MEMORY_WB; return 0; } + md = kern_memory_descriptor(phys_addr); + if (!md) + return 0; + + attr = md->attribute; do { - unsigned long md_end = efi_md_end(md); + unsigned long md_end = kmd_end(md); if (end <= md_end) - return 1; + return attr; - md = efi_memory_descriptor(md_end); - if (!md || (md->attribute & attr) != attr) + md = kern_memory_descriptor(md_end); + if (!md || md->attribute != attr) return 0; } while (md); return 0; } +EXPORT_SYMBOL(kern_mem_attribute); -/* - * For /dev/mem, we only allow read & write system calls to access - * write-back memory, because read & write don't allow the user to - * control access size. - */ int valid_phys_addr_range (unsigned long phys_addr, unsigned long size) { - return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB); + u64 attr; + + /* + * /dev/mem reads and writes use copy_to_user(), which implicitly + * uses a granule-sized kernel identity mapping. It's really + * only safe to do this for regions in kern_memmap. For more + * details, see Documentation/ia64/aliasing.txt. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + return 0; } -/* - * We allow mmap of anything in the EFI memory map that supports - * either write-back or uncacheable access. For uncacheable regions, - * the supported access sizes are system-dependent, and the user is - * responsible for using the correct size. - * - * Note that this doesn't currently allow access to hot-added memory, - * because that doesn't appear in the boot-time EFI memory map. - */ int valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size) { - if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB)) - return 1; + /* + * MMIO regions are often missing from the EFI memory map. + * We must allow mmap of them for programs like X, so we + * currently can't do any useful validation. + */ + return 1; +} - if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC)) - return 1; +pgprot_t +phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, + pgprot_t vma_prot) +{ + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; - return 0; + /* + * For /dev/mem mmap, we use user mappings, but if the region is + * in kern_memmap (and hence may be covered by a kernel mapping), + * we must use the same attribute as the kernel mapping. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + else if (attr & EFI_MEMORY_UC) + return pgprot_noncached(vma_prot); + + /* + * Some chipsets don't support UC access to memory. If + * WB is supported, we prefer that. + */ + if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + + return pgprot_noncached(vma_prot); } int __init diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S index 5a7fe70212a..a56e161d751 100644 --- a/arch/ia64/kernel/efi_stub.S +++ b/arch/ia64/kernel/efi_stub.S @@ -61,7 +61,7 @@ GLOBAL_ENTRY(efi_call_phys) or loc3=loc3,r17 mov b6=r2 ;; - andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared br.call.sptk.many rp=ia64_switch_mode_phys .ret0: mov out4=in5 mov out0=in1 diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h index 78eeb079341..ebc3dfb8882 100644 --- a/arch/ia64/kernel/entry.h +++ b/arch/ia64/kernel/entry.h @@ -23,6 +23,7 @@ #define PT(f) (IA64_PT_REGS_##f##_OFFSET) #define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET) +#define SOS(f) (IA64_SAL_OS_STATE_##f##_OFFSET) #define PT_REGS_SAVES(off) \ .unwabi 3, 'i'; \ diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 6dff024cd62..c1bd1feffab 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -159,7 +159,7 @@ ia64_os_mca_spin: GET_IA64_MCA_DATA(r2) // Using MCA stack, struct ia64_sal_os_state, variable proc_state_param ;; - add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, r2 + add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2 ;; ld8 r18=[r3] // Get processor state parameter on existing PALE_CHECK. ;; @@ -479,9 +479,11 @@ ia64_state_save: st8 [temp2]=r11,16 // rv_rc mov r11=cr.iipa ;; - st8 [temp1]=r18,16 // proc_state_param - st8 [temp2]=r19,16 // monarch + st8 [temp1]=r18 // proc_state_param + st8 [temp2]=r19 // monarch mov r6=IA64_KR(CURRENT) + add temp1=SOS(SAL_RA), regs + add temp2=SOS(SAL_GP), regs ;; st8 [temp1]=r12,16 // sal_ra st8 [temp2]=r10,16 // sal_gp @@ -503,12 +505,14 @@ ia64_state_save: st8 [temp2]=r11,16 // cr.iipa mov r12=cr.iim ;; - st8 [temp1]=r12,16 // cr.iim + st8 [temp1]=r12 // cr.iim (p1) mov r12=IA64_MCA_COLD_BOOT (p2) mov r12=IA64_INIT_WARM_BOOT mov r6=cr.iha + add temp1=SOS(OS_STATUS), regs ;; - st8 [temp2]=r6,16 // cr.iha + st8 [temp2]=r6 // cr.iha + add temp2=SOS(CONTEXT), regs st8 [temp1]=r12 // os_status, default is cold boot mov r6=IA64_MCA_SAME_CONTEXT ;; @@ -820,8 +824,8 @@ ia64_state_restore: // Restore the SAL to OS state. The previous code left regs at pt_regs. add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs ;; - add temp1=IA64_SAL_OS_STATE_COMMON_OFFSET, regs - add temp2=IA64_SAL_OS_STATE_COMMON_OFFSET+8, regs + add temp1=SOS(SAL_RA), regs + add temp2=SOS(SAL_GP), regs ;; ld8 r12=[temp1],16 // sal_ra ld8 r9=[temp2],16 // sal_gp @@ -842,8 +846,10 @@ ia64_state_restore: ;; mov cr.itir=temp3 mov cr.iipa=temp4 - ld8 temp3=[temp1],16 // cr.iim - ld8 temp4=[temp2],16 // cr.iha + ld8 temp3=[temp1] // cr.iim + ld8 temp4=[temp2] // cr.iha + add temp1=SOS(OS_STATUS), regs + add temp2=SOS(CONTEXT), regs ;; mov cr.iim=temp3 mov cr.iha=temp4 @@ -916,7 +922,7 @@ ia64_state_restore: ia64_new_stack: add regs=MCA_PT_REGS_OFFSET, r3 - add temp2=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, r3 + add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3 mov b0=r2 // save return address GET_IA64_MCA_DATA(temp1) invala @@ -1020,7 +1026,7 @@ ia64_old_stack: ia64_set_kernel_registers: add temp3=MCA_SP_OFFSET, r3 - add temp4=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_OS_GP_OFFSET, r3 + add temp4=MCA_SOS_OFFSET+SOS(OS_GP), r3 mov b0=r2 // save return address GET_IA64_MCA_DATA(temp1) ;; diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c index 056f7a6eedc..77fa65903d9 100644 --- a/arch/ia64/kernel/sal.c +++ b/arch/ia64/kernel/sal.c @@ -227,7 +227,7 @@ static int sal_cache_flush_drops_interrupts; static void __init check_sal_cache_flush (void) { - unsigned long flags, itv; + unsigned long flags; int cpu; u64 vector; @@ -238,9 +238,6 @@ check_sal_cache_flush (void) * Schedule a timer interrupt, wait until it's reported, and see if * SAL_CACHE_FLUSH drops it. */ - itv = ia64_get_itv(); - BUG_ON((itv & (1 << 16)) == 0); - ia64_set_itv(IA64_TIMER_VECTOR); ia64_set_itm(ia64_get_itc() + 1000); @@ -260,7 +257,6 @@ check_sal_cache_flush (void) ia64_eoi(); } - ia64_set_itv(itv); local_irq_restore(flags); put_cpu(); } diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index e4dfda1eb7d..6dba2d63f24 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -260,6 +260,7 @@ reserve_memory (void) n++; num_rsvd_regions = n; + BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n); sort_regions(rsvd_region, num_rsvd_regions); } diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 643ccc6960c..07bd02b6c37 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/efi.h> #include <asm/io.h> +#include <asm/meminit.h> static inline void __iomem * __ioremap (unsigned long offset, unsigned long size) @@ -21,16 +22,29 @@ __ioremap (unsigned long offset, unsigned long size) void __iomem * ioremap (unsigned long offset, unsigned long size) { - if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB)) - return phys_to_virt(offset); + u64 attr; + unsigned long gran_base, gran_size; - if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC)) + /* + * For things in kern_memmap, we must use the same attribute + * as the rest of the kernel. For more details, see + * Documentation/ia64/aliasing.txt. + */ + attr = kern_mem_attribute(offset, size); + if (attr & EFI_MEMORY_WB) + return phys_to_virt(offset); + else if (attr & EFI_MEMORY_UC) return __ioremap(offset, size); /* - * Someday this should check ACPI resources so we - * can do the right thing for hot-plugged regions. + * Some chipsets don't support UC access to memory. If + * WB is supported for the whole granule, we prefer that. */ + gran_base = GRANULEROUNDDOWN(offset); + gran_size = GRANULEROUNDUP(offset + size) - gran_base; + if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) + return phys_to_virt(offset); + return __ioremap(offset, size); } EXPORT_SYMBOL(ioremap); @@ -38,6 +52,9 @@ EXPORT_SYMBOL(ioremap); void __iomem * ioremap_nocache (unsigned long offset, unsigned long size) { + if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB) + return 0; + return __ioremap(offset, size); } EXPORT_SYMBOL(ioremap_nocache); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index cf7751b99d1..61dd8608da4 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -645,18 +645,31 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus) int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) { + unsigned long size = vma->vm_end - vma->vm_start; + pgprot_t prot; char *addr; + /* + * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt + * for more details. + */ + if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size)) + return -EINVAL; + prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, + vma->vm_page_prot); + if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot))) + return -EINVAL; + addr = pci_get_legacy_mem(bus); if (IS_ERR(addr)) return PTR_ERR(addr); vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = prot; vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO); if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) + size, vma->vm_page_prot)) return -EAGAIN; return 0; diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 739c948dc50..9a8a29339d2 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -51,6 +51,8 @@ static nasid_t sn_hwperf_master_nasid = INVALID_NASID; static int sn_hwperf_init(void); static DECLARE_MUTEX(sn_hwperf_init_mutex); +#define cnode_possible(n) ((n) < num_cnodes) + static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) { int e; @@ -127,14 +129,14 @@ static int sn_hwperf_geoid_to_cnode(char *location) } } - return node_possible(cnode) ? cnode : -1; + return cnode_possible(cnode) ? cnode : -1; } static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) { if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) BUG(); - if (!obj->sn_hwp_this_part) + if (SN_HWPERF_FOREIGN(obj)) return -1; return sn_hwperf_geoid_to_cnode(obj->location); } @@ -199,12 +201,12 @@ static void print_pci_topology(struct seq_file *s) static inline int sn_hwperf_has_cpus(cnodeid_t node) { - return node_online(node) && nr_cpus_node(node); + return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node); } static inline int sn_hwperf_has_mem(cnodeid_t node) { - return node_online(node) && NODE_DATA(node)->node_present_pages; + return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages; } static struct sn_hwperf_object_info * @@ -237,7 +239,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb int found_mem = 0; int found_cpu = 0; - if (!node_possible(node)) + if (!cnode_possible(node)) return -EINVAL; if (sn_hwperf_has_cpus(node)) { @@ -442,7 +444,7 @@ static int sn_topology_show(struct seq_file *s, void *d) seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location, obj->sn_hwp_this_part ? "local" : "shared", obj->name); - if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))) seq_putc(s, '\n'); else { cnodeid_t near_mem = -1; @@ -468,22 +470,24 @@ static int sn_topology_show(struct seq_file *s, void *d) /* * CPUs on this node, if any */ - cpumask = node_to_cpumask(ordinal); - for_each_online_cpu(i) { - if (cpu_isset(i, cpumask)) { - slice = 'a' + cpuid_to_slice(i); - c = cpu_data(i); - seq_printf(s, "cpu %d %s%c local" - " freq %luMHz, arch ia64", - i, obj->location, slice, - c->proc_freq / 1000000); - for_each_online_cpu(j) { - seq_printf(s, j ? ":%d" : ", dist %d", - node_distance( - cpu_to_node(i), - cpu_to_node(j))); + if (!SN_HWPERF_IS_IONODE(obj)) { + cpumask = node_to_cpumask(ordinal); + for_each_online_cpu(i) { + if (cpu_isset(i, cpumask)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( + cpu_to_node(i), + cpu_to_node(j))); + } + seq_putc(s, '\n'); } - seq_putc(s, '\n'); } } } @@ -523,7 +527,7 @@ static int sn_topology_show(struct seq_file *s, void *d) if (obj->sn_hwp_this_part && p->sn_hwp_this_part) /* both ends local to this partition */ seq_puts(s, " local"); - else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part) + else if (SN_HWPERF_FOREIGN(p)) /* both ends of the link in foreign partiton */ seq_puts(s, " foreign"); else @@ -776,7 +780,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) case SN_HWPERF_GET_NODE_NASID: if (a.sz != sizeof(u64) || - (node = a.arg) < 0 || !node_possible(node)) { + (node = a.arg) < 0 || !cnode_possible(node)) { r = -EINVAL; goto error; } diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 4cac7bdc7c7..2d7948567eb 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 2003-2006 Silicon Graphics, Inc. All Rights Reserved. */ #include <linux/types.h> @@ -1023,7 +1023,7 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, ~0ULL); - tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, 0ULL); if (request_irq(SGI_PCIASIC_ERROR, tioce_error_intr_handler, diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h index c2e3742108b..781ee2c7e8c 100644 --- a/include/asm-ia64/io.h +++ b/include/asm-ia64/io.h @@ -88,6 +88,7 @@ phys_to_virt (unsigned long address) } #define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern u64 kern_mem_attribute (unsigned long phys_addr, unsigned long size); extern int valid_phys_addr_range (unsigned long addr, size_t count); /* efi.c */ extern int valid_mmap_phys_addr_range (unsigned long addr, size_t count); diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h index 9c5389b7e62..ee97f7c2d46 100644 --- a/include/asm-ia64/mca.h +++ b/include/asm-ia64/mca.h @@ -69,14 +69,16 @@ typedef struct ia64_mc_info_s { */ struct ia64_sal_os_state { - /* SAL to OS, must be at offset 0 */ + + /* SAL to OS */ u64 os_gp; /* GP of the os registered with the SAL, physical */ u64 pal_proc; /* PAL_PROC entry point, physical */ u64 sal_proc; /* SAL_PROC entry point, physical */ u64 rv_rc; /* MCA - Rendezvous state, INIT - reason code */ u64 proc_state_param; /* from R18 */ u64 monarch; /* 1 for a monarch event, 0 for a slave */ - /* common, must follow SAL to OS */ + + /* common */ u64 sal_ra; /* Return address in SAL, physical */ u64 sal_gp; /* GP of the SAL - physical */ pal_min_state_area_t *pal_min_state; /* from R17. physical in asm, virtual in C */ @@ -98,7 +100,8 @@ struct ia64_sal_os_state { u64 iipa; u64 iim; u64 iha; - /* OS to SAL, must follow common */ + + /* OS to SAL */ u64 os_status; /* OS status to SAL, enum below */ u64 context; /* 0 if return to same context 1 if return to new context */ diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index eaac08d5e0b..228981cadf8 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -316,22 +316,20 @@ ia64_phys_addr_valid (unsigned long addr) #define pte_mkhuge(pte) (__pte(pte_val(pte))) /* - * Macro to a page protection value as "uncacheable". Note that "protection" is really a - * misnomer here as the protection value contains the memory attribute bits, dirty bits, - * and various other bits as well. + * Make page protection values cacheable, uncacheable, or write- + * combining. Note that "protection" is really a misnomer here as the + * protection value contains the memory attribute bits, dirty bits, and + * various other bits as well. */ +#define pgprot_cacheable(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WB) #define pgprot_noncached(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC) - -/* - * Macro to make mark a page protection value as "write-combining". - * Note that "protection" is really a misnomer here as the protection - * value contains the memory attribute bits, dirty bits, and various - * other bits as well. Accesses through a write-combining translation - * works bypasses the caches, but does allow for consecutive writes to - * be combined into single (but larger) write transactions. - */ #define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC) +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#define __HAVE_PHYS_MEM_ACCESS_PROT + static inline unsigned long pgd_index (unsigned long address) { diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index 8c865e43f60..cd490b20d59 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -345,7 +345,7 @@ ia64_sn_plat_set_error_handling_features(void) ret_stuff.v1 = 0; ret_stuff.v2 = 0; SAL_CALL_REENTRANT(ret_stuff, SN_SAL_SET_ERROR_HANDLING_FEATURES, - (SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV | SAL_ERR_FEAT_LOG_SBES), + SAL_ERR_FEAT_LOG_SBES, 0, 0, 0, 0, 0, 0); return ret_stuff.status; diff --git a/include/linux/efi.h b/include/linux/efi.h index e203613d3ae..66d621dbcb6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -294,6 +294,7 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); +extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr); extern int __init efi_uart_console_only (void); |