summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2006-06-26 13:56:40 +0200
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 10:48:15 -0700
commita32073bffc656ca4bde6002b6cf7c1a8e0e22712 (patch)
tree5ddcd3107eca8807685a19490c2c849d728a51a6 /arch
parent7c2d9cd218916276e52a5dae827b84a159fe5c96 (diff)
[PATCH] x86_64: Clean and enhance up K8 northbridge access code
- Factor out the duplicated access/cache code into a single file * Shared between i386/x86-64. - Share flush code between AGP and IOMMU * Fix a bug: AGP didn't wait for end of flush before - Drop 8 northbridges limit and allocate dynamically - Add lock to serialize AGP and IOMMU GART flushes - Add PCI ID for next AMD northbridge - Random related cleanups The old K8 NUMA discovery code is unchanged. New systems should all use SRAT for this. Cc: "Navin Boppuri" <navin.boppuri@newisys.com> Cc: Dave Jones <davej@redhat.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/Kconfig4
-rw-r--r--arch/i386/kernel/Makefile4
-rw-r--r--arch/x86_64/Kconfig4
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/aperture.c24
-rw-r--r--arch/x86_64/kernel/k8.c118
-rw-r--r--arch/x86_64/kernel/pci-gart.c93
-rw-r--r--arch/x86_64/pci/k8-bus.c10
8 files changed, 169 insertions, 89 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 1596101cfaf..2206bf6637d 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1054,6 +1054,10 @@ config SCx200
This support is also available as a module. If compiled as a
module, it will be called scx200.
+config K8_NB
+ def_bool y
+ depends on AGP_AMD64
+
source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig"
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 96fb8a020af..28b14d6c7b1 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_K8_NB) += k8.o
EXTRA_AFLAGS := -traditional
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
$(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
$(call if_changed,syscall)
+
+k8-y += ../../x86_64/kernel/k8.o
+
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index af44130f0d6..fc75275d8c7 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -501,6 +501,10 @@ config REORDER
optimal TLB usage. If you have pretty much any version of binutils,
this can increase your kernel build time by roughly one minute.
+config K8_NB
+ def_bool y
+ depends on AGP_AMD64 || GART_IOMMU || (PCI && NUMA)
+
endmenu
#
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 381bc6ad743..f927d11065f 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
obj-$(CONFIG_X86_VSMP) += vsmp.o
+obj-$(CONFIG_K8_NB) += k8.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 70b9d21ed67..a7ad03ee98c 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -24,6 +24,7 @@
#include <asm/proto.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
+#include <asm/k8.h>
int iommu_aperture;
int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +38,6 @@ int fix_aperture __initdata = 1;
/* This code runs before the PCI subsystem is initialized, so just
access the northbridge directly. */
-#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
-
static u32 __init allocate_aperture(void)
{
pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +67,20 @@ static u32 __init allocate_aperture(void)
return (u32)__pa(p);
}
-static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size)
+static int __init aperture_valid(u64 aper_base, u32 aper_size)
{
if (!aper_base)
return 0;
if (aper_size < 64*1024*1024) {
- printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20);
+ printk("Aperture too small (%d MB)\n", aper_size>>20);
return 0;
}
if (aper_base + aper_size >= 0xffffffff) {
- printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
+ printk("Aperture beyond 4GB. Ignoring.\n");
return 0;
}
if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
- printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
+ printk("Aperture pointing to e820 RAM. Ignoring.\n");
return 0;
}
return 1;
@@ -140,7 +139,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
aper, 32 << *order, apsizereg);
- if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order))
+ if (!aperture_valid(aper, (32*1024*1024) << *order))
return 0;
return (u32)aper;
}
@@ -208,9 +207,8 @@ void __init iommu_hole_init(void)
fix = 0;
for (num = 24; num < 32; num++) {
- char name[30];
- if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
- continue;
+ if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
+ continue;
iommu_aperture = 1;
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
printk("CPU %d: aperture @ %Lx size %u MB\n", num-24,
aper_base, aper_size>>20);
- sprintf(name, "northbridge cpu %d", num-24);
-
- if (!aperture_valid(name, aper_base, aper_size)) {
+ if (!aperture_valid(aper_base, aper_size)) {
fix = 1;
break;
}
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
/* Fix up the north bridges */
for (num = 24; num < 32; num++) {
- if (read_pci_config(0, num, 3, 0x00) != NB_ID_3)
+ if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
continue;
/* Don't enable translation yet. That is done later.
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
new file mode 100644
index 00000000000..6416682d33d
--- /dev/null
+++ b/arch/x86_64/kernel/k8.c
@@ -0,0 +1,118 @@
+/*
+ * Shared support code for AMD K8 northbridges and derivates.
+ * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
+ */
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/k8.h>
+
+int num_k8_northbridges;
+EXPORT_SYMBOL(num_k8_northbridges);
+
+static u32 *flush_words;
+
+struct pci_device_id k8_nb_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+ {}
+};
+EXPORT_SYMBOL(k8_nb_ids);
+
+struct pci_dev **k8_northbridges;
+EXPORT_SYMBOL(k8_northbridges);
+
+static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+{
+ do {
+ dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+ if (!dev)
+ break;
+ } while (!pci_match_id(&k8_nb_ids[0], dev));
+ return dev;
+}
+
+int cache_k8_northbridges(void)
+{
+ int i;
+ struct pci_dev *dev;
+ if (num_k8_northbridges)
+ return 0;
+
+ num_k8_northbridges = 0;
+ dev = NULL;
+ while ((dev = next_k8_northbridge(dev)) != NULL)
+ num_k8_northbridges++;
+
+ k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
+ GFP_KERNEL);
+ if (!k8_northbridges)
+ return -ENOMEM;
+
+ flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
+ if (!flush_words) {
+ kfree(k8_northbridges);
+ return -ENOMEM;
+ }
+
+ dev = NULL;
+ i = 0;
+ while ((dev = next_k8_northbridge(dev)) != NULL) {
+ k8_northbridges[i++] = dev;
+ pci_read_config_dword(dev, 0x9c, &flush_words[i]);
+ }
+ k8_northbridges[i] = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+
+/* Ignores subdevice/subvendor but as far as I can figure out
+ they're useless anyways */
+int __init early_is_k8_nb(u32 device)
+{
+ struct pci_device_id *id;
+ u32 vendor = device & 0xffff;
+ device >>= 16;
+ for (id = k8_nb_ids; id->vendor; id++)
+ if (vendor == id->vendor && device == id->device)
+ return 1;
+ return 0;
+}
+
+void k8_flush_garts(void)
+{
+ int flushed, i;
+ unsigned long flags;
+ static DEFINE_SPINLOCK(gart_lock);
+
+ /* Avoid races between AGP and IOMMU. In theory it's not needed
+ but I'm not sure if the hardware won't lose flush requests
+ when another is pending. This whole thing is so expensive anyways
+ that it doesn't matter to serialize more. -AK */
+ spin_lock_irqsave(&gart_lock, flags);
+ flushed = 0;
+ for (i = 0; i < num_k8_northbridges; i++) {
+ pci_write_config_dword(k8_northbridges[i], 0x9c,
+ flush_words[i]|1);
+ flushed++;
+ }
+ for (i = 0; i < num_k8_northbridges; i++) {
+ u32 w;
+ /* Make sure the hardware actually executed the flush*/
+ for (;;) {
+ pci_read_config_dword(k8_northbridges[i],
+ 0x9c, &w);
+ if (!(w & 1))
+ break;
+ cpu_relax();
+ }
+ }
+ spin_unlock_irqrestore(&gart_lock, flags);
+ if (!flushed)
+ printk("nothing to flush?\n");
+}
+EXPORT_SYMBOL_GPL(k8_flush_garts);
+
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index ea8f4041794..ded3af3bcee 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -32,6 +32,7 @@
#include <asm/kdebug.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
+#include <asm/k8.h>
unsigned long iommu_bus_base; /* GART remapping area (physical) */
static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */
also seen with Qlogic at least). */
int iommu_fullflush = 1;
-#define MAX_NB 8
-
/* Allocation bitmap for the remapping area */
static DEFINE_SPINLOCK(iommu_bitmap_lock);
static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
#define to_pages(addr,size) \
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-#define for_all_nb(dev) \
- dev = NULL; \
- while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
-
-static struct pci_dev *northbridges[MAX_NB];
-static u32 northbridge_flush_word[MAX_NB];
-
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
/*
* Use global flush state to avoid races with multiple flushers.
*/
-static void flush_gart(struct device *dev)
+static void flush_gart(void)
{
unsigned long flags;
- int flushed = 0;
- int i, max;
-
spin_lock_irqsave(&iommu_bitmap_lock, flags);
- if (need_flush) {
- max = 0;
- for (i = 0; i < MAX_NB; i++) {
- if (!northbridges[i])
- continue;
- pci_write_config_dword(northbridges[i], 0x9c,
- northbridge_flush_word[i] | 1);
- flushed++;
- max = i;
- }
- for (i = 0; i <= max; i++) {
- u32 w;
- if (!northbridges[i])
- continue;
- /* Make sure the hardware actually executed the flush. */
- for (;;) {
- pci_read_config_dword(northbridges[i], 0x9c, &w);
- if (!(w & 1))
- break;
- cpu_relax();
- }
- }
- if (!flushed)
- printk("nothing to flush?\n");
+ if (need_flush) {
+ k8_flush_garts();
need_flush = 0;
}
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
-
-
#ifdef CONFIG_IOMMU_LEAK
#define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
size_t size, int dir)
{
dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
- flush_gart(dev);
+ flush_gart();
return map;
}
@@ -351,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
s->dma_address = addr;
s->dma_length = s->length;
}
- flush_gart(dev);
+ flush_gart();
return nents;
}
@@ -458,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
goto error;
out++;
- flush_gart(dev);
+ flush_gart();
if (out < nents)
sg[out].dma_length = 0;
return out;
error:
- flush_gart(NULL);
+ flush_gart();
gart_unmap_sg(dev, sg, nents, dir);
/* When it was forced or merged try again in a dumb way */
if (force_iommu || iommu_merge) {
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
void *gatt;
unsigned aper_base, new_aper_base;
unsigned aper_size, gatt_size, new_aper_size;
-
+ int i;
+
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
- for_all_nb(dev) {
+ dev = NULL;
+ for (i = 0; i < num_k8_northbridges; i++) {
+ dev = k8_northbridges[i];
new_aper_base = read_aperture(dev, &new_aper_size);
if (!new_aper_base)
goto nommu;
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
panic("Cannot allocate GATT table");
memset(gatt, 0, gatt_size);
agp_gatt_table = gatt;
-
- for_all_nb(dev) {
+
+ for (i = 0; i < num_k8_northbridges; i++) {
u32 ctl;
u32 gatt_reg;
+ dev = k8_northbridges[i];
gatt_reg = __pa(gatt) >> 12;
gatt_reg <<= 4;
pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
pci_write_config_dword(dev, 0x90, ctl);
}
- flush_gart(NULL);
+ flush_gart();
printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
return 0;
@@ -607,10 +576,14 @@ static int __init pci_iommu_init(void)
struct agp_kern_info info;
unsigned long aper_size;
unsigned long iommu_start;
- struct pci_dev *dev;
unsigned long scratch;
long i;
+ if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
+ printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
+ return -1;
+ }
+
#ifndef CONFIG_AGP_AMD64
no_agp = 1;
#else
@@ -637,14 +610,6 @@ static int __init pci_iommu_init(void)
return -1;
}
- i = 0;
- for_all_nb(dev)
- i++;
- if (i > MAX_NB) {
- printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
- return -1;
- }
-
printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
aper_size = info.aper_size * 1024 * 1024;
iommu_size = check_iommu_size(info.aper_base, aper_size);
@@ -707,20 +672,8 @@ static int __init pci_iommu_init(void)
for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
iommu_gatt_base[i] = gart_unmapped_entry;
- for_all_nb(dev) {
- u32 flag;
- int cpu = PCI_SLOT(dev->devfn) - 24;
- if (cpu >= MAX_NB)
- continue;
- northbridges[cpu] = dev;
- pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
- northbridge_flush_word[cpu] = flag;
- }
-
- flush_gart(NULL);
-
+ flush_gart();
dma_ops = &gart_dma_ops;
-
return 0;
}
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index 3acf60ded2a..b50a7c7c47f 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -2,6 +2,7 @@
#include <linux/pci.h>
#include <asm/mpspec.h>
#include <linux/cpumask.h>
+#include <asm/k8.h>
/*
* This discovers the pcibus <-> node mapping on AMD K8.
@@ -18,7 +19,6 @@
#define NR_LDT_BUS_NUMBER_REGISTERS 3
#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF)
#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
-#define PCI_DEVICE_ID_K8HTCONFIG 0x1100
/**
* fill_mp_bus_to_cpumask()
@@ -28,8 +28,7 @@
__init static int
fill_mp_bus_to_cpumask(void)
{
- struct pci_dev *nb_dev = NULL;
- int i, j;
+ int i, j, k;
u32 ldtbus, nid;
static int lbnr[3] = {
LDT_BUS_NUMBER_REGISTER_0,
@@ -37,8 +36,9 @@ fill_mp_bus_to_cpumask(void)
LDT_BUS_NUMBER_REGISTER_2
};
- while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD,
- PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) {
+ cache_k8_northbridges();
+ for (k = 0; k < num_k8_northbridges; k++) {
+ struct pci_dev *nb_dev = k8_northbridges[k];
pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);
for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {