From 184cd4a3b962a4769889615430eaf40076b97969 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 15 Nov 2011 17:29:08 +0000 Subject: powerpc/powernv: PCI support for p7IOC under OPAL v2 This adds support for p7IOC (and possibly other IODA v1 IO Hubs) using OPAL v2 interfaces. We completely take over resource assignment and assign them using an algorithm that hands out device BARs in a way that makes them fit in individual segments of the M32 window of the bridge, which enables us to assign individual PEs to devices and functions. The current implementation gives out a PE per functions on PCIe, and a PE for the entire bridge for PCIe to PCI-X bridges. This can be adjusted / fine tuned later. We also setup DMA resources (32-bit only for now) and MSIs (both 32-bit and 64-bit MSI are supported). The DMA allocation tries to divide the available 256M segments of the 32-bit DMA address space "fairly" among PEs. This is done using a "weight" heuristic which assigns less value to things like OHCI USB controllers than, for example SCSI RAID controllers. This algorithm will probably want some fine tuning for specific devices or device types. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/pci.h | 84 ++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'arch/powerpc/platforms/powernv/pci.h') diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d4dbc495093..28ae4ca512c 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -9,6 +9,50 @@ enum pnv_phb_type { PNV_PHB_IODA2, }; +/* Data associated with a PE, including IOMMU tracking etc.. */ +struct pnv_ioda_pe { + /* A PE can be associated with a single device or an + * entire bus (& children). In the former case, pdev + * is populated, in the later case, pbus is. + */ + struct pci_dev *pdev; + struct pci_bus *pbus; + + /* Effective RID (device RID for a device PE and base bus + * RID with devfn 0 for a bus PE) + */ + unsigned int rid; + + /* PE number */ + unsigned int pe_number; + + /* "Weight" assigned to the PE for the sake of DMA resource + * allocations + */ + unsigned int dma_weight; + + /* This is a PCI-E -> PCI-X bridge, this points to the + * corresponding bus PE + */ + struct pnv_ioda_pe *bus_pe; + + /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ + int tce32_seg; + int tce32_segcount; + struct iommu_table tce32_table; + + /* XXX TODO: Add support for additional 64-bit iommus */ + + /* MSIs. MVE index is identical for for 32 and 64 bit MSI + * and -1 if not supported. (It's actually identical to the + * PE number) + */ + int mve_number; + + /* Link in list of PE#s */ + struct list_head link; +}; + struct pnv_phb { struct pci_controller *hose; enum pnv_phb_type type; @@ -34,6 +78,45 @@ struct pnv_phb { struct { struct iommu_table iommu_table; } p5ioc2; + + struct { + /* Global bridge info */ + unsigned int total_pe; + unsigned int m32_size; + unsigned int m32_segsize; + unsigned int m32_pci_base; + unsigned int io_size; + unsigned int io_segsize; + unsigned int io_pci_base; + + /* PE allocation bitmap */ + unsigned long *pe_alloc; + + /* M32 & IO segment maps */ + unsigned int *m32_segmap; + unsigned int *io_segmap; + struct pnv_ioda_pe *pe_array; + + /* Reverse map of PEs, will have to extend if + * we are to support more than 256 PEs, indexed + * bus { bus, devfn } + */ + unsigned char pe_rmap[0x10000]; + + /* 32-bit TCE tables allocation */ + unsigned long tce32_count; + + /* Total "weight" for the sake of DMA resources + * allocation + */ + unsigned int dma_weight; + unsigned int dma_pe_count; + + /* Sorted list of used PE's, sorted at + * boot for resource allocation purposes + */ + struct list_head pe_list; + } ioda; }; }; @@ -43,6 +126,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset); extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); +extern void pnv_pci_init_ioda_hub(struct device_node *np); #endif /* __POWERNV_PCI_H */ -- cgit v1.2.3-70-g09d2 From cee72d5bb48952f2e50acd2610d52ea82f7092c9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 29 Nov 2011 18:22:53 +0000 Subject: powerpc/powernv: Display diag data on p7ioc EEH errors Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/pci-ioda.c | 25 +++--- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 1 + arch/powerpc/platforms/powernv/pci.c | 117 +++++++++++++++++++++++++--- arch/powerpc/platforms/powernv/pci.h | 16 ++++ 4 files changed, 140 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/platforms/powernv/pci.h') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 425c2b29794..f31162cfdaa 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ -#define DEBUG +#undef DEBUG #include #include @@ -467,14 +467,13 @@ static void __devinit pnv_ioda_update_resources(struct pci_bus *bus) struct pci_bus *cbus; struct pci_dev *cdev; unsigned int i; - u16 cmd; - /* Clear all device enables */ - list_for_each_entry(cdev, &bus->devices, bus_list) { - pci_read_config_word(cdev, PCI_COMMAND, &cmd); - cmd &= ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY|PCI_COMMAND_MASTER); - pci_write_config_word(cdev, PCI_COMMAND, cmd); - } + /* We used to clear all device enables here. However it looks like + * clearing MEM enable causes Obsidian (IPR SCS) to go bonkers, + * and shoot fatal errors to the PHB which in turns fences itself + * and we can't recover from that ... yet. So for now, let's leave + * the enables as-is and hope for the best. + */ /* Check if bus resources fit in our IO or M32 range */ for (i = 0; bus->self && (i < 2); i++) { @@ -618,7 +617,7 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb, struct pci_dn *pdn = pnv_ioda_get_pdn(parent); if (pdn && pdn->pe_number != IODA_INVALID_PE) { rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, - pe->pe_number, 1); + pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); /* XXX What to do in case of error ? */ } parent = parent->bus->self; @@ -638,7 +637,7 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb, pe->mve_number = -1; } else { rc = opal_pci_set_mve_enable(phb->opal_id, - pe->mve_number, 1); + pe->mve_number, OPAL_ENABLE_MVE); if (rc) { pe_err(pe, "OPAL error %ld enabling MVE %d\n", rc, pe->mve_number); @@ -1187,6 +1186,12 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np) phb->opal_id = phb_id; phb->type = PNV_PHB_IODA1; + /* Detect specific models for error handling */ + if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) + phb->model = PNV_PHB_MODEL_P7IOC; + else + phb->model = PNV_PHB_MODEL_UNKNOWN; + /* We parse "ranges" now since we need to deduce the register base * from the IO base */ diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 4c80f7c77d5..264967770c3 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -137,6 +137,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, phb->hose->private_data = phb; phb->opal_id = phb_id; phb->type = PNV_PHB_P5IOC2; + phb->model = PNV_PHB_MODEL_P5IOC2; phb->regs = of_iomap(np, 0); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index c0ed379498a..a70bc1e385e 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -144,6 +144,112 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) } #endif /* CONFIG_PCI_MSI */ +static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb) +{ + struct OpalIoP7IOCPhbErrorData *data = &phb->diag.p7ioc; + int i; + + pr_info("PHB %d diagnostic data:\n", phb->hose->global_number); + + pr_info(" brdgCtl = 0x%08x\n", data->brdgCtl); + + pr_info(" portStatusReg = 0x%08x\n", data->portStatusReg); + pr_info(" rootCmplxStatus = 0x%08x\n", data->rootCmplxStatus); + pr_info(" busAgentStatus = 0x%08x\n", data->busAgentStatus); + + pr_info(" deviceStatus = 0x%08x\n", data->deviceStatus); + pr_info(" slotStatus = 0x%08x\n", data->slotStatus); + pr_info(" linkStatus = 0x%08x\n", data->linkStatus); + pr_info(" devCmdStatus = 0x%08x\n", data->devCmdStatus); + pr_info(" devSecStatus = 0x%08x\n", data->devSecStatus); + + pr_info(" rootErrorStatus = 0x%08x\n", data->rootErrorStatus); + pr_info(" uncorrErrorStatus = 0x%08x\n", data->uncorrErrorStatus); + pr_info(" corrErrorStatus = 0x%08x\n", data->corrErrorStatus); + pr_info(" tlpHdr1 = 0x%08x\n", data->tlpHdr1); + pr_info(" tlpHdr2 = 0x%08x\n", data->tlpHdr2); + pr_info(" tlpHdr3 = 0x%08x\n", data->tlpHdr3); + pr_info(" tlpHdr4 = 0x%08x\n", data->tlpHdr4); + pr_info(" sourceId = 0x%08x\n", data->sourceId); + + pr_info(" errorClass = 0x%016llx\n", data->errorClass); + pr_info(" correlator = 0x%016llx\n", data->correlator); + + pr_info(" p7iocPlssr = 0x%016llx\n", data->p7iocPlssr); + pr_info(" p7iocCsr = 0x%016llx\n", data->p7iocCsr); + pr_info(" lemFir = 0x%016llx\n", data->lemFir); + pr_info(" lemErrorMask = 0x%016llx\n", data->lemErrorMask); + pr_info(" lemWOF = 0x%016llx\n", data->lemWOF); + pr_info(" phbErrorStatus = 0x%016llx\n", data->phbErrorStatus); + pr_info(" phbFirstErrorStatus = 0x%016llx\n", data->phbFirstErrorStatus); + pr_info(" phbErrorLog0 = 0x%016llx\n", data->phbErrorLog0); + pr_info(" phbErrorLog1 = 0x%016llx\n", data->phbErrorLog1); + pr_info(" mmioErrorStatus = 0x%016llx\n", data->mmioErrorStatus); + pr_info(" mmioFirstErrorStatus = 0x%016llx\n", data->mmioFirstErrorStatus); + pr_info(" mmioErrorLog0 = 0x%016llx\n", data->mmioErrorLog0); + pr_info(" mmioErrorLog1 = 0x%016llx\n", data->mmioErrorLog1); + pr_info(" dma0ErrorStatus = 0x%016llx\n", data->dma0ErrorStatus); + pr_info(" dma0FirstErrorStatus = 0x%016llx\n", data->dma0FirstErrorStatus); + pr_info(" dma0ErrorLog0 = 0x%016llx\n", data->dma0ErrorLog0); + pr_info(" dma0ErrorLog1 = 0x%016llx\n", data->dma0ErrorLog1); + pr_info(" dma1ErrorStatus = 0x%016llx\n", data->dma1ErrorStatus); + pr_info(" dma1FirstErrorStatus = 0x%016llx\n", data->dma1FirstErrorStatus); + pr_info(" dma1ErrorLog0 = 0x%016llx\n", data->dma1ErrorLog0); + pr_info(" dma1ErrorLog1 = 0x%016llx\n", data->dma1ErrorLog1); + + for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { + if ((data->pestA[i] >> 63) == 0 && + (data->pestB[i] >> 63) == 0) + continue; + pr_info(" PE[%3d] PESTA = 0x%016llx\n", i, data->pestA[i]); + pr_info(" PESTB = 0x%016llx\n", data->pestB[i]); + } +} + +static void pnv_pci_dump_phb_diag_data(struct pnv_phb *phb) +{ + switch(phb->model) { + case PNV_PHB_MODEL_P7IOC: + pnv_pci_dump_p7ioc_diag_data(phb); + break; + default: + pr_warning("PCI %d: Can't decode this PHB diag data\n", + phb->hose->global_number); + } +} + +static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) +{ + unsigned long flags, rc; + int has_diag; + + spin_lock_irqsave(&phb->lock, flags); + + rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + has_diag = (rc == OPAL_SUCCESS); + + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (rc) { + pr_warning("PCI %d: Failed to clear EEH freeze state" + " for PE#%d, err %ld\n", + phb->hose->global_number, pe_no, rc); + + /* For now, let's only display the diag buffer when we fail to clear + * the EEH status. We'll do more sensible things later when we have + * proper EEH support. We need to make sure we don't pollute ourselves + * with the normal errors generated when probing empty slots + */ + if (has_diag) + pnv_pci_dump_phb_diag_data(phb); + else + pr_warning("PCI %d: No diag data available\n", + phb->hose->global_number); + } + + spin_unlock_irqrestore(&phb->lock, flags); +} + static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, u32 bdfn) { @@ -165,15 +271,8 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, } cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", bdfn, pe_no, fstate); - if (fstate != 0) { - rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (rc) { - pr_warning("PCI %d: Failed to clear EEH freeze state" - " for PE#%d, err %lld\n", - phb->hose->global_number, pe_no, rc); - } - } + if (fstate != 0) + pnv_pci_handle_eeh_config(phb, pe_no); } static int pnv_pci_read_config(struct pci_bus *bus, diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 28ae4ca512c..8bc47963464 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -9,6 +9,15 @@ enum pnv_phb_type { PNV_PHB_IODA2, }; +/* Precise PHB model for error management */ +enum pnv_phb_model { + PNV_PHB_MODEL_UNKNOWN, + PNV_PHB_MODEL_P5IOC2, + PNV_PHB_MODEL_P7IOC, +}; + +#define PNV_PCI_DIAG_BUF_SIZE 4096 + /* Data associated with a PE, including IOMMU tracking etc.. */ struct pnv_ioda_pe { /* A PE can be associated with a single device or an @@ -56,6 +65,7 @@ struct pnv_ioda_pe { struct pnv_phb { struct pci_controller *hose; enum pnv_phb_type type; + enum pnv_phb_model model; u64 opal_id; void __iomem *regs; spinlock_t lock; @@ -118,6 +128,12 @@ struct pnv_phb { struct list_head pe_list; } ioda; }; + + /* PHB status structure */ + union { + unsigned char blob[PNV_PCI_DIAG_BUF_SIZE]; + struct OpalIoP7IOCPhbErrorData p7ioc; + } diag; }; extern struct pci_ops pnv_pci_ops; -- cgit v1.2.3-70-g09d2