From 5019609fce965dbdc66a7d947385fe92ca522231 Mon Sep 17 00:00:00 2001 From: Kyle Moffett Date: Thu, 22 Dec 2011 10:19:12 +0000 Subject: powerpc/mpic: Remove MPIC_BROKEN_FRR_NIRQS and duplicate irq_count The mpic->irq_count variable is only used as a software error-checking limit to determine whether or not an IRQ number is valid. In board code which does not manually specify an IRQ count to mpic_alloc(), i.e. 0, it is automatically detected from the number of ISUs and the ISU size. In practice, all hardware ends up with irq_count == num_sources, so all of the runtime checks on mpic->irq_count should just check the value of mpic->num_sources instead. When platform hardware does not correctly report the number of IRQs, which only happens on the MPC85xx/MPC86xx, the MPIC_BROKEN_FRR_NIRQS flag is used to override the detected value of num_sources with the manual irq_count parameter. Since there's no need to manually specify the number of IRQs except in this case, the extra flag can be eliminated and the test changed to "irq_count != 0". Signed-off-by: Kyle Moffett Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mpic.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 67b4d983723..2ebac31d1bb 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -273,7 +273,6 @@ struct mpic unsigned int isu_size; unsigned int isu_shift; unsigned int isu_mask; - unsigned int irq_count; /* Number of sources */ unsigned int num_sources; /* default senses array */ @@ -363,8 +362,6 @@ struct mpic #define MPIC_ENABLE_MCK 0x00000200 /* Disable bias among target selection, spread interrupts evenly */ #define MPIC_NO_BIAS 0x00000400 -/* Ignore NIRQS as reported by FRR */ -#define MPIC_BROKEN_FRR_NIRQS 0x00000800 /* Destination only supports a single CPU at a time */ #define MPIC_SINGLE_DEST_CPU 0x00001000 /* Enable CoreInt delivery of interrupts */ -- cgit v1.2.3-70-g09d2 From e55d7f737d3daf4aaf41945c1829138c608662e9 Mon Sep 17 00:00:00 2001 From: Kyle Moffett Date: Thu, 22 Dec 2011 10:19:14 +0000 Subject: powerpc/mpic: Remove duplicate MPIC_WANTS_RESET flag There are two separate flags controlling whether or not the MPIC is reset during initialization, which is completely unnecessary, and only one of them can be specified in the device tree. Also, most platforms in-tree right now do actually want to reset the MPIC during initialization anyways, which means lots of duplicate code passing the MPIC_WANTS_RESET flag. Fix all of the callers which currently do not pass the MPIC_WANTS_RESET flag to pass the MPIC_NO_RESET flag, then remove the MPIC_WANTS_RESET flag and make the code reset the MPIC by default. Signed-off-by: Kyle Moffett Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mpic.h | 6 +----- arch/powerpc/platforms/44x/currituck.c | 2 +- arch/powerpc/platforms/44x/iss4xx.c | 3 +-- arch/powerpc/platforms/85xx/corenet_ds.c | 3 ++- arch/powerpc/platforms/85xx/ksi8560.c | 3 +-- arch/powerpc/platforms/85xx/mpc8536_ds.c | 4 +--- arch/powerpc/platforms/85xx/mpc85xx_ads.c | 3 +-- arch/powerpc/platforms/85xx/mpc85xx_cds.c | 3 +-- arch/powerpc/platforms/85xx/mpc85xx_ds.c | 2 +- arch/powerpc/platforms/85xx/mpc85xx_mds.c | 3 +-- arch/powerpc/platforms/85xx/mpc85xx_rdb.c | 3 +-- arch/powerpc/platforms/85xx/p1010rdb.c | 3 +-- arch/powerpc/platforms/85xx/p1022_ds.c | 4 +--- arch/powerpc/platforms/85xx/p1023_rds.c | 3 +-- arch/powerpc/platforms/85xx/sbc8548.c | 3 +-- arch/powerpc/platforms/85xx/sbc8560.c | 3 +-- arch/powerpc/platforms/85xx/socrates.c | 3 +-- arch/powerpc/platforms/85xx/stx_gp3.c | 3 +-- arch/powerpc/platforms/85xx/tqm85xx.c | 2 +- arch/powerpc/platforms/85xx/xes_mpc85xx.c | 4 +--- arch/powerpc/platforms/86xx/pic.c | 3 +-- arch/powerpc/platforms/cell/setup.c | 3 ++- arch/powerpc/platforms/chrp/setup.c | 3 ++- arch/powerpc/platforms/embedded6xx/holly.c | 3 +-- arch/powerpc/platforms/embedded6xx/linkstation.c | 3 +-- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 3 +-- arch/powerpc/platforms/embedded6xx/storcenter.c | 3 +-- arch/powerpc/platforms/maple/setup.c | 2 +- arch/powerpc/platforms/pasemi/setup.c | 2 +- arch/powerpc/platforms/powermac/pic.c | 1 - arch/powerpc/platforms/pseries/setup.c | 3 ++- arch/powerpc/sysdev/mpic.c | 2 +- 32 files changed, 35 insertions(+), 59 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 2ebac31d1bb..d7e3fec343d 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -348,8 +348,6 @@ struct mpic #define MPIC_U3_HT_IRQS 0x00000004 /* Broken IPI registers (autodetected) */ #define MPIC_BROKEN_IPI 0x00000008 -/* MPIC wants a reset */ -#define MPIC_WANTS_RESET 0x00000010 /* Spurious vector requires EOI */ #define MPIC_SPV_EOI 0x00000020 /* No passthrough disable */ @@ -366,9 +364,7 @@ struct mpic #define MPIC_SINGLE_DEST_CPU 0x00001000 /* Enable CoreInt delivery of interrupts */ #define MPIC_ENABLE_COREINT 0x00002000 -/* Disable resetting of the MPIC. - * NOTE: This flag trumps MPIC_WANTS_RESET. - */ +/* Do not reset the MPIC during initialization */ #define MPIC_NO_RESET 0x00004000 /* Freescale MPIC (compatible includes "fsl,mpic") */ #define MPIC_FSL 0x00008000 diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c index 3f6229b5dee..583e67fee37 100644 --- a/arch/powerpc/platforms/44x/currituck.c +++ b/arch/powerpc/platforms/44x/currituck.c @@ -83,7 +83,7 @@ static void __init ppc47x_init_irq(void) * device-tree, just pass 0 to all arguments */ struct mpic *mpic = - mpic_alloc(np, 0, 0, 0, 0, " MPIC "); + mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); ppc_md.get_irq = mpic_get_irq; diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index 5b8cdbb82f8..a28a8629727 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -71,8 +71,7 @@ static void __init iss4xx_init_irq(void) /* The MPIC driver will get everything it needs from the * device-tree, just pass 0 to all arguments */ - struct mpic *mpic = mpic_alloc(np, 0, 0, 0, 0, - " MPIC "); + struct mpic *mpic = mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); ppc_md.get_irq = mpic_get_irq; diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c index 768479b986d..df69e99e511 100644 --- a/arch/powerpc/platforms/85xx/corenet_ds.c +++ b/arch/powerpc/platforms/85xx/corenet_ds.c @@ -36,7 +36,8 @@ void __init corenet_ds_pic_init(void) { struct mpic *mpic; - unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU; + unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | + MPIC_NO_RESET; if (ppc_md.get_irq == mpic_get_coreint_irq) flags |= MPIC_ENABLE_COREINT; diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c index 20f75d7819c..60120e55da4 100644 --- a/arch/powerpc/platforms/85xx/ksi8560.c +++ b/arch/powerpc/platforms/85xx/ksi8560.c @@ -57,8 +57,7 @@ static void machine_restart(char *cmd) static void __init ksi8560_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index d5373e0c0f7..f58872688d8 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -36,9 +36,7 @@ void __init mpc8536_ds_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | - MPIC_BIG_ENDIAN, + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c index 3bebb5173bf..d19f675cb36 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c @@ -50,8 +50,7 @@ static int mpc85xx_exclude_device(struct pci_controller *hose, static void __init mpc85xx_ads_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 40f03da616a..02d97e31c18 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -188,8 +188,7 @@ static struct irqaction mpc85xxcds_8259_irqaction = { static void __init mpc85xx_cds_pic_init(void) { struct mpic *mpic; - mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index 528b9a09e14..6e23e3e34bd 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -72,12 +72,12 @@ void __init mpc85xx_ds_pic_init(void) if (of_flat_dt_is_compatible(root, "fsl,MPC8572DS-CAMP")) { mpic = mpic_alloc(NULL, 0, + MPIC_NO_RESET | MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); } else { mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index a0cb7707c5b..57aceb5d273 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -434,8 +434,7 @@ machine_arch_initcall(p1021_mds, swiotlb_setup_bus_notifier); static void __init mpc85xx_mds_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN | + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index a23f6f30993..407c7391e36 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -48,13 +48,12 @@ void __init mpc85xx_rdb_pic_init(void) unsigned long root = of_get_flat_dt_root(); if (of_flat_dt_is_compatible(root, "fsl,MPC85XXRDB-CAMP")) { - mpic = mpic_alloc(NULL, 0, + mpic = mpic_alloc(NULL, 0, MPIC_NO_RESET | MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); } else { mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c index 8650773c839..d8bd6563d9c 100644 --- a/arch/powerpc/platforms/85xx/p1010rdb.c +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -32,8 +32,7 @@ void __init p1010_rdb_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN | + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 28266ab619d..63ba7905396 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -242,9 +242,7 @@ p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port) void __init p1022_ds_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | - MPIC_BIG_ENDIAN | + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rds.c index b1cd6a28901..6b07398e436 100644 --- a/arch/powerpc/platforms/85xx/p1023_rds.c +++ b/arch/powerpc/platforms/85xx/p1023_rds.c @@ -93,8 +93,7 @@ machine_device_initcall(p1023_rds, mpc85xx_common_publish_devices); static void __init mpc85xx_rds_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN | + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c index 184a5078461..1677b8a2267 100644 --- a/arch/powerpc/platforms/85xx/sbc8548.c +++ b/arch/powerpc/platforms/85xx/sbc8548.c @@ -54,8 +54,7 @@ static int sbc_rev; static void __init sbc8548_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/sbc8560.c b/arch/powerpc/platforms/85xx/sbc8560.c index 940752e9305..3c3bbcc2756 100644 --- a/arch/powerpc/platforms/85xx/sbc8560.c +++ b/arch/powerpc/platforms/85xx/sbc8560.c @@ -41,8 +41,7 @@ static void __init sbc8560_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c index 18f635906b2..b7191921775 100644 --- a/arch/powerpc/platforms/85xx/socrates.c +++ b/arch/powerpc/platforms/85xx/socrates.c @@ -48,8 +48,7 @@ static void __init socrates_pic_init(void) { struct device_node *np; - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c index e9e5234b4e7..27ca3a7b04a 100644 --- a/arch/powerpc/platforms/85xx/stx_gp3.c +++ b/arch/powerpc/platforms/85xx/stx_gp3.c @@ -48,8 +48,7 @@ static void __init stx_gp3_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c index bf7c89fb75b..d7504cefe01 100644 --- a/arch/powerpc/platforms/85xx/tqm85xx.c +++ b/arch/powerpc/platforms/85xx/tqm85xx.c @@ -47,7 +47,7 @@ static void __init tqm85xx_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index 1ab4e026006..503c21596c6 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -43,9 +43,7 @@ void __init xes_mpc85xx_pic_init(void) { - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | - MPIC_BIG_ENDIAN, + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); mpic_init(mpic); diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c index 27959d63a07..22cc3571ae1 100644 --- a/arch/powerpc/platforms/86xx/pic.c +++ b/arch/powerpc/platforms/86xx/pic.c @@ -37,8 +37,7 @@ void __init mpc86xx_init_irq(void) int cascade_irq; #endif - struct mpic *mpic = mpic_alloc(NULL, 0, - MPIC_WANTS_RESET | MPIC_BIG_ENDIAN | + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " MPIC "); BUG_ON(mpic == NULL); diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 62002a7edfe..fa3e294fd34 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -197,7 +197,8 @@ static void __init mpic_init_IRQ(void) /* The MPIC driver will get everything it needs from the * device-tree, just pass 0 to all arguments */ - mpic = mpic_alloc(dn, 0, MPIC_SECONDARY, 0, 0, " MPIC "); + mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET, + 0, 0, " MPIC "); if (mpic == NULL) continue; mpic_init(mpic); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index f1f17bb2c33..c665d7de6c9 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -435,7 +435,8 @@ static void __init chrp_find_openpic(void) if (len > 1) isu_size = iranges[3]; - chrp_mpic = mpic_alloc(np, opaddr, 0, isu_size, 0, " MPIC "); + chrp_mpic = mpic_alloc(np, opaddr, MPIC_NO_RESET, + isu_size, 0, " MPIC "); if (chrp_mpic == NULL) { printk(KERN_ERR "Failed to allocate MPIC structure\n"); goto bail; diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index d7727de85d5..ab51b21b4bd 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -154,8 +154,7 @@ static void __init holly_init_IRQ(void) struct device_node *cascade_node = NULL; #endif - mpic = mpic_alloc(NULL, 0, - MPIC_BIG_ENDIAN | MPIC_WANTS_RESET | + mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108, 24, 0, "Tsi108_PIC"); diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 9479f4ebe1b..455e7c08742 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -82,8 +82,7 @@ static void __init linkstation_init_IRQ(void) { struct mpic *mpic; - mpic = mpic_alloc(NULL, 0, MPIC_WANTS_RESET, - 4, 0, " EPIC "); + mpic = mpic_alloc(NULL, 0, 0, 4, 0, " EPIC "); BUG_ON(mpic == NULL); /* PCI IRQs */ diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 90acb544916..74ccce36bae 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -108,8 +108,7 @@ static void __init mpc7448_hpc2_init_IRQ(void) struct device_node *cascade_node = NULL; #endif - mpic = mpic_alloc(NULL, 0, - MPIC_BIG_ENDIAN | MPIC_WANTS_RESET | + mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108, 24, 0, "Tsi108_PIC"); diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index 451dce4edcc..e0ed3c71d69 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -84,8 +84,7 @@ static void __init storcenter_init_IRQ(void) { struct mpic *mpic; - mpic = mpic_alloc(NULL, 0, MPIC_WANTS_RESET, - 16, 0, " OpenPIC "); + mpic = mpic_alloc(NULL, 0, 0, 16, 0, " OpenPIC "); BUG_ON(mpic == NULL); /* diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 0bcbfe7b2c5..3b7545a51aa 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -262,7 +262,7 @@ static void __init maple_init_IRQ(void) flags |= MPIC_BIG_ENDIAN; /* XXX Maple specific bits */ - flags |= MPIC_U3_HT_IRQS | MPIC_WANTS_RESET; + flags |= MPIC_U3_HT_IRQS; /* All U3/U4 are big-endian, older SLOF firmware doesn't encode this */ flags |= MPIC_BIG_ENDIAN; diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 98b7a7c1317..e777ad471a4 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -224,7 +224,7 @@ static __init void pas_init_IRQ(void) openpic_addr = of_read_number(opprop, naddr); printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); - mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS; + mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS | MPIC_NO_RESET; nmiprop = of_get_property(mpic_node, "nmi-source", NULL); if (nmiprop) diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 7761aabfc29..0293d7a258c 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -469,7 +469,6 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np, pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0); - flags |= MPIC_WANTS_RESET; if (of_get_property(np, "big-endian", NULL)) flags |= MPIC_BIG_ENDIAN; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 6d0a5dfbb9f..d928412cfb3 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -190,7 +190,8 @@ static void __init pseries_mpic_init_IRQ(void) BUG_ON(openpic_addr == 0); /* Setup the openpic driver */ - mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, 0, 16, 0, " MPIC "); + mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, + MPIC_NO_RESET, 16, 0, " MPIC "); BUG_ON(mpic == NULL); /* Add ISUs */ diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 90171d447c3..b9b989d305d 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1286,7 +1286,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, /* When using a device-node, reset requests are only honored if the MPIC * is allowed to reset. */ - if ((mpic->flags & MPIC_WANTS_RESET) && !(mpic->flags & MPIC_NO_RESET)) { + if (!(mpic->flags & MPIC_NO_RESET)) { printk(KERN_DEBUG "mpic: Resetting\n"); mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0), mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0)) -- cgit v1.2.3-70-g09d2 From eb39c8803d0e3d98fe74825f99287f63d55e6460 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Thu, 16 Feb 2012 01:14:22 +0000 Subject: fadump: Reserve the memory for firmware assisted dump. Reserve the memory during early boot to preserve CPU state data, HPTE region and RMA (real mode area) region data in case of kernel crash. At the time of crash, powerpc firmware will store CPU state data, HPTE region data and move RMA region data to the reserved memory area. If the firmware-assisted dump fails to reserve the memory, then fallback to existing kexec-based kdump. Most of the code implementation to reserve memory has been adapted from phyp assisted dump implementation written by Linas Vepstas and Manish Ahuja This patch also introduces a config option CONFIG_FA_DUMP for firmware assisted dump feature on Powerpc (ppc64) architecture. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 13 ++ arch/powerpc/include/asm/fadump.h | 71 +++++++++++ arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/fadump.c | 246 ++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/prom.c | 15 ++- 5 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/fadump.h create mode 100644 arch/powerpc/kernel/fadump.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1919634a9b3..afa4dabfad7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -386,6 +386,19 @@ config PHYP_DUMP If unsure, say "N" +config FA_DUMP + bool "Firmware-assisted dump" + depends on PPC64 && PPC_RTAS && CRASH_DUMP + help + A robust mechanism to get reliable kernel crash dump with + assistance from firmware. This approach does not use kexec, + instead firmware assists in booting the kdump kernel + while preserving memory contents. Firmware-assisted dump + is meant to be a kdump replacement offering robustness and + speed not possible without system firmware assistance. + + If unsure, say "N" + config IRQ_ALL_CPUS bool "Distribute interrupts on all CPUs by default" depends on SMP && !MV64360 diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h new file mode 100644 index 00000000000..7be25d30d98 --- /dev/null +++ b/arch/powerpc/include/asm/fadump.h @@ -0,0 +1,71 @@ +/* + * Firmware Assisted dump header file. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2011 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#ifndef __PPC64_FA_DUMP_H__ +#define __PPC64_FA_DUMP_H__ + +#ifdef CONFIG_FA_DUMP + +/* + * The RMA region will be saved for later dumping when kernel crashes. + * RMA is Real Mode Area, the first block of logical memory address owned + * by logical partition, containing the storage that may be accessed with + * translate off. + */ +#define RMA_START 0x0 +#define RMA_END (ppc64_rma_size) + +/* + * On some Power systems where RMO is 128MB, it still requires minimum of + * 256MB for kernel to boot successfully. When kdump infrastructure is + * configured to save vmcore over network, we run into OOM issue while + * loading modules related to network setup. Hence we need aditional 64M + * of memory to avoid OOM issue. + */ +#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \ + + (0x1UL << 26)) + +/* Firmware provided dump sections */ +#define FADUMP_CPU_STATE_DATA 0x0001 +#define FADUMP_HPTE_REGION 0x0002 +#define FADUMP_REAL_MODE_REGION 0x0011 + +struct fw_dump { + unsigned long cpu_state_data_size; + unsigned long hpte_region_size; + unsigned long boot_memory_size; + unsigned long reserve_dump_area_start; + unsigned long reserve_dump_area_size; + /* cmd line option during boot */ + unsigned long reserve_bootvar; + + int ibm_configure_kernel_dump; + + unsigned long fadump_enabled:1; + unsigned long fadump_supported:1; + unsigned long dump_active:1; +}; + +extern int early_init_dt_scan_fw_dump(unsigned long node, + const char *uname, int depth, void *data); +extern int fadump_reserve_mem(void); +#endif +#endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ee728e433aa..391bf7e1ba2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_FA_DUMP) += fadump.o ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_E500) += idle_e500.o endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c new file mode 100644 index 00000000000..deb276a9ce7 --- /dev/null +++ b/arch/powerpc/kernel/fadump.c @@ -0,0 +1,246 @@ +/* + * Firmware Assisted dump: A robust mechanism to get reliable kernel crash + * dump with assistance from firmware. This approach does not use kexec, + * instead firmware assists in booting the kdump kernel while preserving + * memory contents. The most of the code implementation has been adapted + * from phyp assisted dump implementation written by Linas Vepstas and + * Manish Ahuja + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2011 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG +#define pr_fmt(fmt) "fadump: " fmt + +#include +#include + +#include +#include +#include +#include + +static struct fw_dump fw_dump; + +/* Scan the Firmware Assisted dump configuration details. */ +int __init early_init_dt_scan_fw_dump(unsigned long node, + const char *uname, int depth, void *data) +{ + __be32 *sections; + int i, num_sections; + unsigned long size; + const int *token; + + if (depth != 1 || strcmp(uname, "rtas") != 0) + return 0; + + /* + * Check if Firmware Assisted dump is supported. if yes, check + * if dump has been initiated on last reboot. + */ + token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); + if (!token) + return 0; + + fw_dump.fadump_supported = 1; + fw_dump.ibm_configure_kernel_dump = *token; + + /* + * The 'ibm,kernel-dump' rtas node is present only if there is + * dump data waiting for us. + */ + if (of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL)) + fw_dump.dump_active = 1; + + /* Get the sizes required to store dump data for the firmware provided + * dump sections. + * For each dump section type supported, a 32bit cell which defines + * the ID of a supported section followed by two 32 bit cells which + * gives teh size of the section in bytes. + */ + sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", + &size); + + if (!sections) + return 0; + + num_sections = size / (3 * sizeof(u32)); + + for (i = 0; i < num_sections; i++, sections += 3) { + u32 type = (u32)of_read_number(sections, 1); + + switch (type) { + case FADUMP_CPU_STATE_DATA: + fw_dump.cpu_state_data_size = + of_read_ulong(§ions[1], 2); + break; + case FADUMP_HPTE_REGION: + fw_dump.hpte_region_size = + of_read_ulong(§ions[1], 2); + break; + } + } + return 1; +} + +/** + * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM + * + * Function to find the largest memory size we need to reserve during early + * boot process. This will be the size of the memory that is required for a + * kernel to boot successfully. + * + * This function has been taken from phyp-assisted dump feature implementation. + * + * returns larger of 256MB or 5% rounded down to multiples of 256MB. + * + * TODO: Come up with better approach to find out more accurate memory size + * that is required for a kernel to boot successfully. + * + */ +static inline unsigned long fadump_calculate_reserve_size(void) +{ + unsigned long size; + + /* + * Check if the size is specified through fadump_reserve_mem= cmdline + * option. If yes, then use that. + */ + if (fw_dump.reserve_bootvar) + return fw_dump.reserve_bootvar; + + /* divide by 20 to get 5% of value */ + size = memblock_end_of_DRAM() / 20; + + /* round it down in multiples of 256 */ + size = size & ~0x0FFFFFFFUL; + + /* Truncate to memory_limit. We don't want to over reserve the memory.*/ + if (memory_limit && size > memory_limit) + size = memory_limit; + + return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM); +} + +/* + * Calculate the total memory size required to be reserved for + * firmware-assisted dump registration. + */ +static unsigned long get_fadump_area_size(void) +{ + unsigned long size = 0; + + size += fw_dump.cpu_state_data_size; + size += fw_dump.hpte_region_size; + size += fw_dump.boot_memory_size; + + size = PAGE_ALIGN(size); + return size; +} + +int __init fadump_reserve_mem(void) +{ + unsigned long base, size, memory_boundary; + + if (!fw_dump.fadump_enabled) + return 0; + + if (!fw_dump.fadump_supported) { + printk(KERN_INFO "Firmware-assisted dump is not supported on" + " this hardware\n"); + fw_dump.fadump_enabled = 0; + return 0; + } + /* Initialize boot memory size */ + fw_dump.boot_memory_size = fadump_calculate_reserve_size(); + + /* + * Calculate the memory boundary. + * If memory_limit is less than actual memory boundary then reserve + * the memory for fadump beyond the memory_limit and adjust the + * memory_limit accordingly, so that the running kernel can run with + * specified memory_limit. + */ + if (memory_limit && memory_limit < memblock_end_of_DRAM()) { + size = get_fadump_area_size(); + if ((memory_limit + size) < memblock_end_of_DRAM()) + memory_limit += size; + else + memory_limit = memblock_end_of_DRAM(); + printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" + " dump, now %#016llx\n", + (unsigned long long)memory_limit); + } + if (memory_limit) + memory_boundary = memory_limit; + else + memory_boundary = memblock_end_of_DRAM(); + + if (fw_dump.dump_active) { + printk(KERN_INFO "Firmware-assisted dump is active.\n"); + /* + * If last boot has crashed then reserve all the memory + * above boot_memory_size so that we don't touch it until + * dump is written to disk by userspace tool. This memory + * will be released for general use once the dump is saved. + */ + base = fw_dump.boot_memory_size; + size = memory_boundary - base; + memblock_reserve(base, size); + printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " + "for saving crash dump\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20)); + } else { + /* Reserve the memory at the top of memory. */ + size = get_fadump_area_size(); + base = memory_boundary - size; + memblock_reserve(base, size); + printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " + "for firmware-assisted dump\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20)); + } + fw_dump.reserve_dump_area_start = base; + fw_dump.reserve_dump_area_size = size; + return 1; +} + +/* Look for fadump= cmdline option. */ +static int __init early_fadump_param(char *p) +{ + if (!p) + return 1; + + if (strncmp(p, "on", 2) == 0) + fw_dump.fadump_enabled = 1; + else if (strncmp(p, "off", 3) == 0) + fw_dump.fadump_enabled = 0; + + return 0; +} +early_param("fadump", early_fadump_param); + +/* Look for fadump_reserve_mem= cmdline option */ +static int __init early_fadump_reserve_mem(char *p) +{ + if (p) + fw_dump.reserve_bootvar = memparse(p, &p); + return 0; +} +early_param("fadump_reserve_mem", early_fadump_reserve_mem); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index abe405dab34..70222b35cfc 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -55,6 +55,7 @@ #include #include #include +#include #include @@ -719,6 +720,11 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); #endif +#ifdef CONFIG_FA_DUMP + /* scan tree to see if dump is active during last boot */ + of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); +#endif + /* Pre-initialize the cmd_line with the content of boot_commmand_line, * which will be empty except when the content of the variable has * been overriden by a bootloading mechanism. This happens typically @@ -750,7 +756,14 @@ void __init early_init_devtree(void *params) if (PHYSICAL_START > MEMORY_START) memblock_reserve(MEMORY_START, 0x8000); reserve_kdump_trampoline(); - reserve_crashkernel(); +#ifdef CONFIG_FA_DUMP + /* + * If we fail to reserve memory for firmware-assisted dump then + * fallback to kexec based kdump. + */ + if (fadump_reserve_mem() == 0) +#endif + reserve_crashkernel(); early_reserve_mem(); phyp_dump_reserve_mem(); -- cgit v1.2.3-70-g09d2 From 3ccc00a7e04ff7718c9aebb4b0c982571c798759 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 20 Feb 2012 02:15:03 +0000 Subject: fadump: Register for firmware assisted dump. On 2012-02-20 11:02:51 Mon, Paul Mackerras wrote: > On Thu, Feb 16, 2012 at 04:44:30PM +0530, Mahesh J Salgaonkar wrote: > > If I have read the code correctly, we are going to get this printk on > non-pSeries machines or on older pSeries machines, even if the user > has not put the fadump=on option on the kernel command line. The > printk will be annoying since there is no actual error condition. It > seems to me that the condition for the printk should include > fw_dump.fadump_enabled. In other words you should probably add > > if (!fw_dump.fadump_enabled) > return 0; > > at the beginning of the function. Hi Paul, Thanks for pointing it out. Please find the updated patch below. The existing patches above this (4/10 through 10/10) cleanly applies on this update. Thanks, -Mahesh. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/fadump.h | 57 ++++++ arch/powerpc/kernel/fadump.c | 355 +++++++++++++++++++++++++++++++++++++- arch/powerpc/kernel/iommu.c | 8 +- arch/powerpc/mm/hash_utils_64.c | 11 ++ 4 files changed, 427 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 7be25d30d98..bbaf278bdcc 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -48,6 +48,58 @@ #define FADUMP_HPTE_REGION 0x0002 #define FADUMP_REAL_MODE_REGION 0x0011 +/* Dump request flag */ +#define FADUMP_REQUEST_FLAG 0x00000001 + +/* FAD commands */ +#define FADUMP_REGISTER 1 +#define FADUMP_UNREGISTER 2 +#define FADUMP_INVALIDATE 3 + +/* Kernel Dump section info */ +struct fadump_section { + u32 request_flag; + u16 source_data_type; + u16 error_flags; + u64 source_address; + u64 source_len; + u64 bytes_dumped; + u64 destination_address; +}; + +/* ibm,configure-kernel-dump header. */ +struct fadump_section_header { + u32 dump_format_version; + u16 dump_num_sections; + u16 dump_status_flag; + u32 offset_first_dump_section; + + /* Fields for disk dump option. */ + u32 dd_block_size; + u64 dd_block_offset; + u64 dd_num_blocks; + u32 dd_offset_disk_path; + + /* Maximum time allowed to prevent an automatic dump-reboot. */ + u32 max_time_auto; +}; + +/* + * Firmware Assisted dump memory structure. This structure is required for + * registering future kernel dump with power firmware through rtas call. + * + * No disk dump option. Hence disk dump path string section is not included. + */ +struct fadump_mem_struct { + struct fadump_section_header header; + + /* Kernel dump sections */ + struct fadump_section cpu_state_data; + struct fadump_section hpte_region; + struct fadump_section rmr_region; +}; + +/* Firmware-assisted dump configuration details. */ struct fw_dump { unsigned long cpu_state_data_size; unsigned long hpte_region_size; @@ -62,10 +114,15 @@ struct fw_dump { unsigned long fadump_enabled:1; unsigned long fadump_supported:1; unsigned long dump_active:1; + unsigned long dump_registered:1; }; extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data); extern int fadump_reserve_mem(void); +extern int setup_fadump(void); +extern int is_fadump_active(void); +#else /* CONFIG_FA_DUMP */ +static inline int is_fadump_active(void) { return 0; } #endif #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index deb276a9ce7..eb8f782afad 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -29,6 +29,9 @@ #include #include +#include +#include +#include #include #include @@ -36,6 +39,10 @@ #include static struct fw_dump fw_dump; +static struct fadump_mem_struct fdm; +static const struct fadump_mem_struct *fdm_active; + +static DEFINE_MUTEX(fadump_mutex); /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, @@ -64,7 +71,8 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, * The 'ibm,kernel-dump' rtas node is present only if there is * dump data waiting for us. */ - if (of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL)) + fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); + if (fdm_active) fw_dump.dump_active = 1; /* Get the sizes required to store dump data for the firmware provided @@ -98,6 +106,85 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, return 1; } +int is_fadump_active(void) +{ + return fw_dump.dump_active; +} + +/* Print firmware assisted dump configurations for debugging purpose. */ +static void fadump_show_config(void) +{ + pr_debug("Support for firmware-assisted dump (fadump): %s\n", + (fw_dump.fadump_supported ? "present" : "no support")); + + if (!fw_dump.fadump_supported) + return; + + pr_debug("Fadump enabled : %s\n", + (fw_dump.fadump_enabled ? "yes" : "no")); + pr_debug("Dump Active : %s\n", + (fw_dump.dump_active ? "yes" : "no")); + pr_debug("Dump section sizes:\n"); + pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); + pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); + pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size); +} + +static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm, + unsigned long addr) +{ + if (!fdm) + return 0; + + memset(fdm, 0, sizeof(struct fadump_mem_struct)); + addr = addr & PAGE_MASK; + + fdm->header.dump_format_version = 0x00000001; + fdm->header.dump_num_sections = 3; + fdm->header.dump_status_flag = 0; + fdm->header.offset_first_dump_section = + (u32)offsetof(struct fadump_mem_struct, cpu_state_data); + + /* + * Fields for disk dump option. + * We are not using disk dump option, hence set these fields to 0. + */ + fdm->header.dd_block_size = 0; + fdm->header.dd_block_offset = 0; + fdm->header.dd_num_blocks = 0; + fdm->header.dd_offset_disk_path = 0; + + /* set 0 to disable an automatic dump-reboot. */ + fdm->header.max_time_auto = 0; + + /* Kernel dump sections */ + /* cpu state data section. */ + fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG; + fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA; + fdm->cpu_state_data.source_address = 0; + fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size; + fdm->cpu_state_data.destination_address = addr; + addr += fw_dump.cpu_state_data_size; + + /* hpte region section */ + fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG; + fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION; + fdm->hpte_region.source_address = 0; + fdm->hpte_region.source_len = fw_dump.hpte_region_size; + fdm->hpte_region.destination_address = addr; + addr += fw_dump.hpte_region_size; + + /* RMA region section */ + fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG; + fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION; + fdm->rmr_region.source_address = RMA_START; + fdm->rmr_region.source_len = fw_dump.boot_memory_size; + fdm->rmr_region.destination_address = addr; + addr += fw_dump.boot_memory_size; + + return addr; +} + /** * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM * @@ -166,8 +253,15 @@ int __init fadump_reserve_mem(void) fw_dump.fadump_enabled = 0; return 0; } - /* Initialize boot memory size */ - fw_dump.boot_memory_size = fadump_calculate_reserve_size(); + /* + * Initialize boot memory size + * If dump is active then we have already calculated the size during + * first kernel. + */ + if (fdm_active) + fw_dump.boot_memory_size = fdm_active->rmr_region.source_len; + else + fw_dump.boot_memory_size = fadump_calculate_reserve_size(); /* * Calculate the memory boundary. @@ -244,3 +338,258 @@ static int __init early_fadump_reserve_mem(char *p) return 0; } early_param("fadump_reserve_mem", early_fadump_reserve_mem); + +static void register_fw_dump(struct fadump_mem_struct *fdm) +{ + int rc; + unsigned int wait_time; + + pr_debug("Registering for firmware-assisted kernel dump...\n"); + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, + FADUMP_REGISTER, fdm, + sizeof(struct fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + + } while (wait_time); + + switch (rc) { + case -1: + printk(KERN_ERR "Failed to register firmware-assisted kernel" + " dump. Hardware Error(%d).\n", rc); + break; + case -3: + printk(KERN_ERR "Failed to register firmware-assisted kernel" + " dump. Parameter Error(%d).\n", rc); + break; + case -9: + printk(KERN_ERR "firmware-assisted kernel dump is already " + " registered."); + fw_dump.dump_registered = 1; + break; + case 0: + printk(KERN_INFO "firmware-assisted kernel dump registration" + " is successful\n"); + fw_dump.dump_registered = 1; + break; + } +} + +static void register_fadump(void) +{ + /* + * If no memory is reserved then we can not register for firmware- + * assisted dump. + */ + if (!fw_dump.reserve_dump_area_size) + return; + + /* register the future kernel dump with firmware. */ + register_fw_dump(&fdm); +} + +static int fadump_unregister_dump(struct fadump_mem_struct *fdm) +{ + int rc = 0; + unsigned int wait_time; + + pr_debug("Un-register firmware-assisted dump\n"); + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, + FADUMP_UNREGISTER, fdm, + sizeof(struct fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + } while (wait_time); + + if (rc) { + printk(KERN_ERR "Failed to un-register firmware-assisted dump." + " unexpected error(%d).\n", rc); + return rc; + } + fw_dump.dump_registered = 0; + return 0; +} + +static ssize_t fadump_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", fw_dump.fadump_enabled); +} + +static ssize_t fadump_register_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", fw_dump.dump_registered); +} + +static ssize_t fadump_register_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret = 0; + + if (!fw_dump.fadump_enabled || fdm_active) + return -EPERM; + + mutex_lock(&fadump_mutex); + + switch (buf[0]) { + case '0': + if (fw_dump.dump_registered == 0) { + ret = -EINVAL; + goto unlock_out; + } + /* Un-register Firmware-assisted dump */ + fadump_unregister_dump(&fdm); + break; + case '1': + if (fw_dump.dump_registered == 1) { + ret = -EINVAL; + goto unlock_out; + } + /* Register Firmware-assisted dump */ + register_fadump(); + break; + default: + ret = -EINVAL; + break; + } + +unlock_out: + mutex_unlock(&fadump_mutex); + return ret < 0 ? ret : count; +} + +static int fadump_region_show(struct seq_file *m, void *private) +{ + const struct fadump_mem_struct *fdm_ptr; + + if (!fw_dump.fadump_enabled) + return 0; + + if (fdm_active) + fdm_ptr = fdm_active; + else + fdm_ptr = &fdm; + + seq_printf(m, + "CPU : [%#016llx-%#016llx] %#llx bytes, " + "Dumped: %#llx\n", + fdm_ptr->cpu_state_data.destination_address, + fdm_ptr->cpu_state_data.destination_address + + fdm_ptr->cpu_state_data.source_len - 1, + fdm_ptr->cpu_state_data.source_len, + fdm_ptr->cpu_state_data.bytes_dumped); + seq_printf(m, + "HPTE: [%#016llx-%#016llx] %#llx bytes, " + "Dumped: %#llx\n", + fdm_ptr->hpte_region.destination_address, + fdm_ptr->hpte_region.destination_address + + fdm_ptr->hpte_region.source_len - 1, + fdm_ptr->hpte_region.source_len, + fdm_ptr->hpte_region.bytes_dumped); + seq_printf(m, + "DUMP: [%#016llx-%#016llx] %#llx bytes, " + "Dumped: %#llx\n", + fdm_ptr->rmr_region.destination_address, + fdm_ptr->rmr_region.destination_address + + fdm_ptr->rmr_region.source_len - 1, + fdm_ptr->rmr_region.source_len, + fdm_ptr->rmr_region.bytes_dumped); + + if (!fdm_active || + (fw_dump.reserve_dump_area_start == + fdm_ptr->cpu_state_data.destination_address)) + return 0; + + /* Dump is active. Show reserved memory region. */ + seq_printf(m, + " : [%#016llx-%#016llx] %#llx bytes, " + "Dumped: %#llx\n", + (unsigned long long)fw_dump.reserve_dump_area_start, + fdm_ptr->cpu_state_data.destination_address - 1, + fdm_ptr->cpu_state_data.destination_address - + fw_dump.reserve_dump_area_start, + fdm_ptr->cpu_state_data.destination_address - + fw_dump.reserve_dump_area_start); + return 0; +} + +static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, + 0444, fadump_enabled_show, + NULL); +static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered, + 0644, fadump_register_show, + fadump_register_store); + +static int fadump_region_open(struct inode *inode, struct file *file) +{ + return single_open(file, fadump_region_show, inode->i_private); +} + +static const struct file_operations fadump_region_fops = { + .open = fadump_region_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void fadump_init_files(void) +{ + struct dentry *debugfs_file; + int rc = 0; + + rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr); + if (rc) + printk(KERN_ERR "fadump: unable to create sysfs file" + " fadump_enabled (%d)\n", rc); + + rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr); + if (rc) + printk(KERN_ERR "fadump: unable to create sysfs file" + " fadump_registered (%d)\n", rc); + + debugfs_file = debugfs_create_file("fadump_region", 0444, + powerpc_debugfs_root, NULL, + &fadump_region_fops); + if (!debugfs_file) + printk(KERN_ERR "fadump: unable to create debugfs file" + " fadump_region\n"); + return; +} + +/* + * Prepare for firmware-assisted dump. + */ +int __init setup_fadump(void) +{ + if (!fw_dump.fadump_enabled) + return 0; + + if (!fw_dump.fadump_supported) { + printk(KERN_ERR "Firmware-assisted dump is not supported on" + " this hardware\n"); + return 0; + } + + fadump_show_config(); + /* Initialize the kernel dump memory structure for FAD registration. */ + if (fw_dump.reserve_dump_area_size) + init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); + fadump_init_files(); + + return 1; +} +subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0cfcf98aafc..359f078571c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -39,6 +39,7 @@ #include #include #include +#include #define DBG(...) @@ -445,7 +446,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, static void iommu_table_clear(struct iommu_table *tbl) { - if (!is_kdump_kernel()) { + /* + * In case of firmware assisted dump system goes through clean + * reboot process at the time of system crash. Hence it's safe to + * clear the TCE entries if firmware assisted dump is active. + */ + if (!is_kdump_kernel() || is_fadump_active()) { /* Clear the table in case firmware left allocations in it */ ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); return; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 2d282186cb4..b534bbac3f8 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -55,6 +55,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -625,6 +626,16 @@ static void __init htab_initialize(void) /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; +#ifdef CONFIG_FA_DUMP + /* + * If firmware assisted dump is active firmware preserves + * the contents of htab along with entire partition memory. + * Clear the htab if firmware assisted dump is active so + * that we dont end up using old mappings. + */ + if (is_fadump_active() && ppc_md.hpte_clear_all) + ppc_md.hpte_clear_all(); +#endif } else { /* Find storage for the HPT. Must be contiguous in * the absolute address space. On cell we want it to be -- cgit v1.2.3-70-g09d2 From 2df173d9e85d9e2c6a8933c63f0c034accff7e0f Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Thu, 16 Feb 2012 01:14:37 +0000 Subject: fadump: Initialize elfcore header and add PT_LOAD program headers. Build the crash memory range list by traversing through system memory during the first kernel before we register for firmware-assisted dump. After the successful dump registration, initialize the elfcore header and populate PT_LOAD program headers with crash memory ranges. The elfcore header is saved in the scratch area within the reserved memory. The scratch area starts at the end of the memory reserved for saving RMR region contents. The scratch area contains fadump crash info structure that contains magic number for fadump validation and physical address where the eflcore header can be found. This structure will also be used to pass some important crash info data to the second kernel which will help second kernel to populate ELF core header with correct data before it gets exported through /proc/vmcore. Since the firmware preserves the entire partition memory at the time of crash the contents of the scratch area will be preserved till second kernel boot. Since the memory dump exported through /proc/vmcore is in ELF format similar to kdump, it will help us to reuse the kdump infrastructure for dump capture and filtering. Unlike phyp dump, userspace tool does not need to refer any sysfs interface while reading /proc/vmcore. NOTE: The current design implementation does not address a possibility of introducing additional fields (in future) to this structure without affecting compatibility. It's on TODO list to come up with better approach to address this. Reserved dump area start => +-------------------------------------+ | CPU state dump data | +-------------------------------------+ | HPTE region data | +-------------------------------------+ | RMR region data | Scratch area start => +-------------------------------------+ | fadump crash info structure { | | magic nummber | +------|---- elfcorehdr_addr | | | } | +----> +-------------------------------------+ | ELF core header | Reserved dump area end => +-------------------------------------+ Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/fadump.h | 43 +++++++ arch/powerpc/kernel/fadump.c | 233 +++++++++++++++++++++++++++++++++++++- 2 files changed, 275 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index bbaf278bdcc..9e172a5d53c 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -43,6 +43,12 @@ #define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \ + (0x1UL << 26)) +#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt) + +#ifndef ELF_CORE_EFLAGS +#define ELF_CORE_EFLAGS 0 +#endif + /* Firmware provided dump sections */ #define FADUMP_CPU_STATE_DATA 0x0001 #define FADUMP_HPTE_REGION 0x0002 @@ -56,6 +62,9 @@ #define FADUMP_UNREGISTER 2 #define FADUMP_INVALIDATE 3 +/* Dump status flag */ +#define FADUMP_ERROR_FLAG 0x2000 + /* Kernel Dump section info */ struct fadump_section { u32 request_flag; @@ -109,6 +118,7 @@ struct fw_dump { /* cmd line option during boot */ unsigned long reserve_bootvar; + unsigned long fadumphdr_addr; int ibm_configure_kernel_dump; unsigned long fadump_enabled:1; @@ -117,6 +127,39 @@ struct fw_dump { unsigned long dump_registered:1; }; +/* + * Copy the ascii values for first 8 characters from a string into u64 + * variable at their respective indexes. + * e.g. + * The string "FADMPINF" will be converted into 0x4641444d50494e46 + */ +static inline u64 str_to_u64(const char *str) +{ + u64 val = 0; + int i; + + for (i = 0; i < sizeof(val); i++) + val = (*str) ? (val << 8) | *str++ : val << 8; + return val; +} +#define STR_TO_HEX(x) str_to_u64(x) + +#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF") + +/* fadump crash info structure */ +struct fadump_crash_info_header { + u64 magic_number; + u64 elfcorehdr_addr; +}; + +/* Crash memory ranges */ +#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2) + +struct fad_crash_memory_ranges { + unsigned long long base; + unsigned long long size; +}; + extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data); extern int fadump_reserve_mem(void); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index eb8f782afad..63857e183de 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,8 @@ static struct fadump_mem_struct fdm; static const struct fadump_mem_struct *fdm_active; static DEFINE_MUTEX(fadump_mutex); +struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES]; +int crash_mem_ranges; /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, @@ -235,6 +238,10 @@ static unsigned long get_fadump_area_size(void) size += fw_dump.cpu_state_data_size; size += fw_dump.hpte_region_size; size += fw_dump.boot_memory_size; + size += sizeof(struct fadump_crash_info_header); + size += sizeof(struct elfhdr); /* ELF core header.*/ + /* Program headers for crash memory regions. */ + size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); size = PAGE_ALIGN(size); return size; @@ -300,6 +307,12 @@ int __init fadump_reserve_mem(void) "for saving crash dump\n", (unsigned long)(size >> 20), (unsigned long)(base >> 20)); + + fw_dump.fadumphdr_addr = + fdm_active->rmr_region.destination_address + + fdm_active->rmr_region.source_len; + pr_debug("fadumphdr_addr = %p\n", + (void *) fw_dump.fadumphdr_addr); } else { /* Reserve the memory at the top of memory. */ size = get_fadump_area_size(); @@ -380,8 +393,210 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) } } +/* + * Validate and process the dump data stored by firmware before exporting + * it through '/proc/vmcore'. + */ +static int __init process_fadump(const struct fadump_mem_struct *fdm_active) +{ + struct fadump_crash_info_header *fdh; + + if (!fdm_active || !fw_dump.fadumphdr_addr) + return -EINVAL; + + /* Check if the dump data is valid. */ + if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) || + (fdm_active->rmr_region.error_flags != 0)) { + printk(KERN_ERR "Dump taken by platform is not valid\n"); + return -EINVAL; + } + if (fdm_active->rmr_region.bytes_dumped != + fdm_active->rmr_region.source_len) { + printk(KERN_ERR "Dump taken by platform is incomplete\n"); + return -EINVAL; + } + + /* Validate the fadump crash info header */ + fdh = __va(fw_dump.fadumphdr_addr); + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + printk(KERN_ERR "Crash info header is not valid.\n"); + return -EINVAL; + } + + /* + * We are done validating dump info and elfcore header is now ready + * to be exported. set elfcorehdr_addr so that vmcore module will + * export the elfcore header through '/proc/vmcore'. + */ + elfcorehdr_addr = fdh->elfcorehdr_addr; + + return 0; +} + +static inline void fadump_add_crash_memory(unsigned long long base, + unsigned long long end) +{ + if (base == end) + return; + + pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", + crash_mem_ranges, base, end - 1, (end - base)); + crash_memory_ranges[crash_mem_ranges].base = base; + crash_memory_ranges[crash_mem_ranges].size = end - base; + crash_mem_ranges++; +} + +static void fadump_exclude_reserved_area(unsigned long long start, + unsigned long long end) +{ + unsigned long long ra_start, ra_end; + + ra_start = fw_dump.reserve_dump_area_start; + ra_end = ra_start + fw_dump.reserve_dump_area_size; + + if ((ra_start < end) && (ra_end > start)) { + if ((start < ra_start) && (end > ra_end)) { + fadump_add_crash_memory(start, ra_start); + fadump_add_crash_memory(ra_end, end); + } else if (start < ra_start) { + fadump_add_crash_memory(start, ra_start); + } else if (ra_end < end) { + fadump_add_crash_memory(ra_end, end); + } + } else + fadump_add_crash_memory(start, end); +} + +static int fadump_init_elfcore_header(char *bufp) +{ + struct elfhdr *elf; + + elf = (struct elfhdr *) bufp; + bufp += sizeof(struct elfhdr); + memcpy(elf->e_ident, ELFMAG, SELFMAG); + elf->e_ident[EI_CLASS] = ELF_CLASS; + elf->e_ident[EI_DATA] = ELF_DATA; + elf->e_ident[EI_VERSION] = EV_CURRENT; + elf->e_ident[EI_OSABI] = ELF_OSABI; + memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); + elf->e_type = ET_CORE; + elf->e_machine = ELF_ARCH; + elf->e_version = EV_CURRENT; + elf->e_entry = 0; + elf->e_phoff = sizeof(struct elfhdr); + elf->e_shoff = 0; + elf->e_flags = ELF_CORE_EFLAGS; + elf->e_ehsize = sizeof(struct elfhdr); + elf->e_phentsize = sizeof(struct elf_phdr); + elf->e_phnum = 0; + elf->e_shentsize = 0; + elf->e_shnum = 0; + elf->e_shstrndx = 0; + + return 0; +} + +/* + * Traverse through memblock structure and setup crash memory ranges. These + * ranges will be used create PT_LOAD program headers in elfcore header. + */ +static void fadump_setup_crash_memory_ranges(void) +{ + struct memblock_region *reg; + unsigned long long start, end; + + pr_debug("Setup crash memory ranges.\n"); + crash_mem_ranges = 0; + /* + * add the first memory chunk (RMA_START through boot_memory_size) as + * a separate memory chunk. The reason is, at the time crash firmware + * will move the content of this memory chunk to different location + * specified during fadump registration. We need to create a separate + * program header for this chunk with the correct offset. + */ + fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + + for_each_memblock(memory, reg) { + start = (unsigned long long)reg->base; + end = start + (unsigned long long)reg->size; + if (start == RMA_START && end >= fw_dump.boot_memory_size) + start = fw_dump.boot_memory_size; + + /* add this range excluding the reserved dump area. */ + fadump_exclude_reserved_area(start, end); + } +} + +static int fadump_create_elfcore_headers(char *bufp) +{ + struct elfhdr *elf; + struct elf_phdr *phdr; + int i; + + fadump_init_elfcore_header(bufp); + elf = (struct elfhdr *)bufp; + bufp += sizeof(struct elfhdr); + + /* setup PT_LOAD sections. */ + + for (i = 0; i < crash_mem_ranges; i++) { + unsigned long long mbase, msize; + mbase = crash_memory_ranges[i].base; + msize = crash_memory_ranges[i].size; + + if (!msize) + continue; + + phdr = (struct elf_phdr *)bufp; + bufp += sizeof(struct elf_phdr); + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_offset = mbase; + + if (mbase == RMA_START) { + /* + * The entire RMA region will be moved by firmware + * to the specified destination_address. Hence set + * the correct offset. + */ + phdr->p_offset = fdm.rmr_region.destination_address; + } + + phdr->p_paddr = mbase; + phdr->p_vaddr = (unsigned long)__va(mbase); + phdr->p_filesz = msize; + phdr->p_memsz = msize; + phdr->p_align = 0; + + /* Increment number of program headers. */ + (elf->e_phnum)++; + } + return 0; +} + +static unsigned long init_fadump_header(unsigned long addr) +{ + struct fadump_crash_info_header *fdh; + + if (!addr) + return 0; + + fw_dump.fadumphdr_addr = addr; + fdh = __va(addr); + addr += sizeof(struct fadump_crash_info_header); + + memset(fdh, 0, sizeof(struct fadump_crash_info_header)); + fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; + fdh->elfcorehdr_addr = addr; + + return addr; +} + static void register_fadump(void) { + unsigned long addr; + void *vaddr; + /* * If no memory is reserved then we can not register for firmware- * assisted dump. @@ -389,6 +604,16 @@ static void register_fadump(void) if (!fw_dump.reserve_dump_area_size) return; + fadump_setup_crash_memory_ranges(); + + addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len; + /* Initialize fadump crash info header. */ + addr = init_fadump_header(addr); + vaddr = __va(addr); + + pr_debug("Creating ELF core headers at %#016lx\n", addr); + fadump_create_elfcore_headers(vaddr); + /* register the future kernel dump with firmware. */ register_fw_dump(&fdm); } @@ -585,8 +810,14 @@ int __init setup_fadump(void) } fadump_show_config(); + /* + * If dump data is available then see if it is valid and prepare for + * saving it to the disk. + */ + if (fw_dump.dump_active) + process_fadump(fdm_active); /* Initialize the kernel dump memory structure for FAD registration. */ - if (fw_dump.reserve_dump_area_size) + else if (fw_dump.reserve_dump_area_size) init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); fadump_init_files(); -- cgit v1.2.3-70-g09d2 From ebaeb5ae24379b5b635dc1d1fa6df904bc95b4d9 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Thu, 16 Feb 2012 01:14:45 +0000 Subject: fadump: Convert firmware-assisted cpu state dump data into elf notes. When registered for firmware assisted dump on powerpc, firmware preserves the registers for the active CPUs during a system crash. This patch reads the cpu register data stored in Firmware-assisted dump format (except for crashing cpu) and converts it into elf notes and updates the PT_NOTE program header accordingly. The exact register state for crashing cpu is saved to fadump crash info structure in scratch area during crash_fadump() and read during second kernel boot. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/fadump.h | 44 ++++++ arch/powerpc/kernel/fadump.c | 314 ++++++++++++++++++++++++++++++++++++- arch/powerpc/kernel/setup-common.c | 6 + arch/powerpc/kernel/traps.c | 3 + 4 files changed, 365 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 9e172a5d53c..67681958e4b 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -65,6 +65,18 @@ /* Dump status flag */ #define FADUMP_ERROR_FLAG 0x2000 +#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1) + +#define CPU_UNKNOWN (~((u32)0)) + +/* Utility macros */ +#define SKIP_TO_NEXT_CPU(reg_entry) \ +({ \ + while (reg_entry->reg_id != REG_ID("CPUEND")) \ + reg_entry++; \ + reg_entry++; \ +}) + /* Kernel Dump section info */ struct fadump_section { u32 request_flag; @@ -119,6 +131,9 @@ struct fw_dump { unsigned long reserve_bootvar; unsigned long fadumphdr_addr; + unsigned long cpu_notes_buf; + unsigned long cpu_notes_buf_size; + int ibm_configure_kernel_dump; unsigned long fadump_enabled:1; @@ -143,13 +158,40 @@ static inline u64 str_to_u64(const char *str) return val; } #define STR_TO_HEX(x) str_to_u64(x) +#define REG_ID(x) str_to_u64(x) #define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF") +#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE") + +/* The firmware-assisted dump format. + * + * The register save area is an area in the partition's memory used to preserve + * the register contents (CPU state data) for the active CPUs during a firmware + * assisted dump. The dump format contains register save area header followed + * by register entries. Each list of registers for a CPU starts with + * "CPUSTRT" and ends with "CPUEND". + */ + +/* Register save area header. */ +struct fadump_reg_save_area_header { + u64 magic_number; + u32 version; + u32 num_cpu_offset; +}; + +/* Register entry. */ +struct fadump_reg_entry { + u64 reg_id; + u64 reg_value; +}; /* fadump crash info structure */ struct fadump_crash_info_header { u64 magic_number; u64 elfcorehdr_addr; + u32 crashing_cpu; + struct pt_regs regs; + struct cpumask cpu_online_mask; }; /* Crash memory ranges */ @@ -165,7 +207,9 @@ extern int early_init_dt_scan_fw_dump(unsigned long node, extern int fadump_reserve_mem(void); extern int setup_fadump(void); extern int is_fadump_active(void); +extern void crash_fadump(struct pt_regs *, const char *); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } +static inline void crash_fadump(struct pt_regs *regs, const char *str) { } #endif #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 63857e183de..da68bdad194 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -240,6 +240,7 @@ static unsigned long get_fadump_area_size(void) size += fw_dump.boot_memory_size; size += sizeof(struct fadump_crash_info_header); size += sizeof(struct elfhdr); /* ELF core header.*/ + size += sizeof(struct elf_phdr); /* place holder for cpu notes */ /* Program headers for crash memory regions. */ size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); @@ -393,6 +394,285 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) } } +void crash_fadump(struct pt_regs *regs, const char *str) +{ + struct fadump_crash_info_header *fdh = NULL; + + if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) + return; + + fdh = __va(fw_dump.fadumphdr_addr); + crashing_cpu = smp_processor_id(); + fdh->crashing_cpu = crashing_cpu; + crash_save_vmcoreinfo(); + + if (regs) + fdh->regs = *regs; + else + ppc_save_regs(&fdh->regs); + + fdh->cpu_online_mask = *cpu_online_mask; + + /* Call ibm,os-term rtas call to trigger firmware assisted dump */ + rtas_os_term((char *)str); +} + +#define GPR_MASK 0xffffff0000000000 +static inline int fadump_gpr_index(u64 id) +{ + int i = -1; + char str[3]; + + if ((id & GPR_MASK) == REG_ID("GPR")) { + /* get the digits at the end */ + id &= ~GPR_MASK; + id >>= 24; + str[2] = '\0'; + str[1] = id & 0xff; + str[0] = (id >> 8) & 0xff; + sscanf(str, "%d", &i); + if (i > 31) + i = -1; + } + return i; +} + +static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id, + u64 reg_val) +{ + int i; + + i = fadump_gpr_index(reg_id); + if (i >= 0) + regs->gpr[i] = (unsigned long)reg_val; + else if (reg_id == REG_ID("NIA")) + regs->nip = (unsigned long)reg_val; + else if (reg_id == REG_ID("MSR")) + regs->msr = (unsigned long)reg_val; + else if (reg_id == REG_ID("CTR")) + regs->ctr = (unsigned long)reg_val; + else if (reg_id == REG_ID("LR")) + regs->link = (unsigned long)reg_val; + else if (reg_id == REG_ID("XER")) + regs->xer = (unsigned long)reg_val; + else if (reg_id == REG_ID("CR")) + regs->ccr = (unsigned long)reg_val; + else if (reg_id == REG_ID("DAR")) + regs->dar = (unsigned long)reg_val; + else if (reg_id == REG_ID("DSISR")) + regs->dsisr = (unsigned long)reg_val; +} + +static struct fadump_reg_entry* +fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + + while (reg_entry->reg_id != REG_ID("CPUEND")) { + fadump_set_regval(regs, reg_entry->reg_id, + reg_entry->reg_value); + reg_entry++; + } + reg_entry++; + return reg_entry; +} + +static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type, + void *data, size_t data_len) +{ + struct elf_note note; + + note.n_namesz = strlen(name) + 1; + note.n_descsz = data_len; + note.n_type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) + 3)/4; + memcpy(buf, name, note.n_namesz); + buf += (note.n_namesz + 3)/4; + memcpy(buf, data, note.n_descsz); + buf += (note.n_descsz + 3)/4; + + return buf; +} + +static void fadump_final_note(u32 *buf) +{ + struct elf_note note; + + note.n_namesz = 0; + note.n_descsz = 0; + note.n_type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + +static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) +{ + struct elf_prstatus prstatus; + + memset(&prstatus, 0, sizeof(prstatus)); + /* + * FIXME: How do i get PID? Do I really need it? + * prstatus.pr_pid = ???? + */ + elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); + buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, + &prstatus, sizeof(prstatus)); + return buf; +} + +static void fadump_update_elfcore_header(char *bufp) +{ + struct elfhdr *elf; + struct elf_phdr *phdr; + + elf = (struct elfhdr *)bufp; + bufp += sizeof(struct elfhdr); + + /* First note is a place holder for cpu notes info. */ + phdr = (struct elf_phdr *)bufp; + + if (phdr->p_type == PT_NOTE) { + phdr->p_paddr = fw_dump.cpu_notes_buf; + phdr->p_offset = phdr->p_paddr; + phdr->p_filesz = fw_dump.cpu_notes_buf_size; + phdr->p_memsz = fw_dump.cpu_notes_buf_size; + } + return; +} + +static void *fadump_cpu_notes_buf_alloc(unsigned long size) +{ + void *vaddr; + struct page *page; + unsigned long order, count, i; + + order = get_order(size); + vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order); + if (!vaddr) + return NULL; + + count = 1 << order; + page = virt_to_page(vaddr); + for (i = 0; i < count; i++) + SetPageReserved(page + i); + return vaddr; +} + +static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size) +{ + struct page *page; + unsigned long order, count, i; + + order = get_order(size); + count = 1 << order; + page = virt_to_page(vaddr); + for (i = 0; i < count; i++) + ClearPageReserved(page + i); + __free_pages(page, order); +} + +/* + * Read CPU state dump data and convert it into ELF notes. + * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be + * used to access the data to allow for additional fields to be added without + * affecting compatibility. Each list of registers for a CPU starts with + * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, + * 8 Byte ASCII identifier and 8 Byte register value. The register entry + * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part + * of register value. For more details refer to PAPR document. + * + * Only for the crashing cpu we ignore the CPU dump data and get exact + * state from fadump crash info structure populated by first kernel at the + * time of crash. + */ +static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) +{ + struct fadump_reg_save_area_header *reg_header; + struct fadump_reg_entry *reg_entry; + struct fadump_crash_info_header *fdh = NULL; + void *vaddr; + unsigned long addr; + u32 num_cpus, *note_buf; + struct pt_regs regs; + int i, rc = 0, cpu = 0; + + if (!fdm->cpu_state_data.bytes_dumped) + return -EINVAL; + + addr = fdm->cpu_state_data.destination_address; + vaddr = __va(addr); + + reg_header = vaddr; + if (reg_header->magic_number != REGSAVE_AREA_MAGIC) { + printk(KERN_ERR "Unable to read register save area.\n"); + return -ENOENT; + } + pr_debug("--------CPU State Data------------\n"); + pr_debug("Magic Number: %llx\n", reg_header->magic_number); + pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset); + + vaddr += reg_header->num_cpu_offset; + num_cpus = *((u32 *)(vaddr)); + pr_debug("NumCpus : %u\n", num_cpus); + vaddr += sizeof(u32); + reg_entry = (struct fadump_reg_entry *)vaddr; + + /* Allocate buffer to hold cpu crash notes. */ + fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); + fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); + note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size); + if (!note_buf) { + printk(KERN_ERR "Failed to allocate 0x%lx bytes for " + "cpu notes buffer\n", fw_dump.cpu_notes_buf_size); + return -ENOMEM; + } + fw_dump.cpu_notes_buf = __pa(note_buf); + + pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", + (num_cpus * sizeof(note_buf_t)), note_buf); + + if (fw_dump.fadumphdr_addr) + fdh = __va(fw_dump.fadumphdr_addr); + + for (i = 0; i < num_cpus; i++) { + if (reg_entry->reg_id != REG_ID("CPUSTRT")) { + printk(KERN_ERR "Unable to read CPU state data\n"); + rc = -ENOENT; + goto error_out; + } + /* Lower 4 bytes of reg_value contains logical cpu id */ + cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK; + if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) { + SKIP_TO_NEXT_CPU(reg_entry); + continue; + } + pr_debug("Reading register data for cpu %d...\n", cpu); + if (fdh && fdh->crashing_cpu == cpu) { + regs = fdh->regs; + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + SKIP_TO_NEXT_CPU(reg_entry); + } else { + reg_entry++; + reg_entry = fadump_read_registers(reg_entry, ®s); + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + } + } + fadump_final_note(note_buf); + + pr_debug("Updating elfcore header (%llx) with cpu notes\n", + fdh->elfcorehdr_addr); + fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); + return 0; + +error_out: + fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf), + fw_dump.cpu_notes_buf_size); + fw_dump.cpu_notes_buf = 0; + fw_dump.cpu_notes_buf_size = 0; + return rc; + +} + /* * Validate and process the dump data stored by firmware before exporting * it through '/proc/vmcore'. @@ -400,18 +680,21 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) static int __init process_fadump(const struct fadump_mem_struct *fdm_active) { struct fadump_crash_info_header *fdh; + int rc = 0; if (!fdm_active || !fw_dump.fadumphdr_addr) return -EINVAL; /* Check if the dump data is valid. */ if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) || + (fdm_active->cpu_state_data.error_flags != 0) || (fdm_active->rmr_region.error_flags != 0)) { printk(KERN_ERR "Dump taken by platform is not valid\n"); return -EINVAL; } - if (fdm_active->rmr_region.bytes_dumped != - fdm_active->rmr_region.source_len) { + if ((fdm_active->rmr_region.bytes_dumped != + fdm_active->rmr_region.source_len) || + !fdm_active->cpu_state_data.bytes_dumped) { printk(KERN_ERR "Dump taken by platform is incomplete\n"); return -EINVAL; } @@ -423,6 +706,10 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active) return -EINVAL; } + rc = fadump_build_cpu_notes(fdm_active); + if (rc) + return rc; + /* * We are done validating dump info and elfcore header is now ready * to be exported. set elfcorehdr_addr so that vmcore module will @@ -537,6 +824,27 @@ static int fadump_create_elfcore_headers(char *bufp) elf = (struct elfhdr *)bufp; bufp += sizeof(struct elfhdr); + /* + * setup ELF PT_NOTE, place holder for cpu notes info. The notes info + * will be populated during second kernel boot after crash. Hence + * this PT_NOTE will always be the first elf note. + * + * NOTE: Any new ELF note addition should be placed after this note. + */ + phdr = (struct elf_phdr *)bufp; + bufp += sizeof(struct elf_phdr); + phdr->p_type = PT_NOTE; + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + + phdr->p_offset = 0; + phdr->p_paddr = 0; + phdr->p_filesz = 0; + phdr->p_memsz = 0; + + (elf->e_phnum)++; + /* setup PT_LOAD sections. */ for (i = 0; i < crash_mem_ranges; i++) { @@ -588,6 +896,8 @@ static unsigned long init_fadump_header(unsigned long addr) memset(fdh, 0, sizeof(struct fadump_crash_info_header)); fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; fdh->elfcorehdr_addr = addr; + /* We will set the crashing cpu id in crash_fadump() during crash. */ + fdh->crashing_cpu = CPU_UNKNOWN; return addr; } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 77bb77da05c..4e62a56e1a9 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -61,6 +61,7 @@ #include #include #include +#include #include "setup.h" @@ -639,6 +640,11 @@ EXPORT_SYMBOL(check_legacy_ioport); static int ppc_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { + /* + * If firmware-assisted dump has been registered then trigger + * firmware-assisted dump and let firmware handle everything else. + */ + crash_fadump(NULL, ptr); ppc_md.panic(ptr); /* May not return */ return NOTIFY_DONE; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c091527efd8..5d40e592ffc 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -57,6 +57,7 @@ #include #include #include +#include #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -145,6 +146,8 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, arch_spin_unlock(&die_lock); raw_local_irq_restore(flags); + crash_fadump(regs, "die oops"); + /* * A system reset (0x100) is a request to dump, so we always send * it through the crashdump code. -- cgit v1.2.3-70-g09d2 From b500afff11f64227ca69fd2d05986d08d9573935 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Thu, 16 Feb 2012 01:15:08 +0000 Subject: fadump: Invalidate registration and release reserved memory for general use. This patch introduces an sysfs interface '/sys/kernel/fadump_release_mem' to invalidate the last fadump registration, invalidate '/proc/vmcore', release the reserved memory for general use and re-register for future kernel dump. Once the dump is copied to the disk, unlike phyp dump, the userspace tool can release all the memory reserved for dump with one single operation of echo 1 to '/sys/kernel/fadump_release_mem'. Release the reserved memory region excluding the size of the memory required for future kernel dump registration. And therefore, unlike kdump, Fadump doesn't need a 2nd reboot to get back the system to the production configuration. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/fadump.h | 3 + arch/powerpc/kernel/fadump.c | 158 +++++++++++++++++++++++++++++++++++++- 2 files changed, 157 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 67681958e4b..88dbf965918 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -208,6 +208,9 @@ extern int fadump_reserve_mem(void); extern int setup_fadump(void); extern int is_fadump_active(void); extern void crash_fadump(struct pt_regs *, const char *); +extern void fadump_cleanup(void); + +extern void vmcore_cleanup(void); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index a83bc9015c6..cfe7a38708c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -984,6 +986,132 @@ static int fadump_unregister_dump(struct fadump_mem_struct *fdm) return 0; } +static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) +{ + int rc = 0; + unsigned int wait_time; + + pr_debug("Invalidating firmware-assisted dump registration\n"); + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, + FADUMP_INVALIDATE, fdm, + sizeof(struct fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + } while (wait_time); + + if (rc) { + printk(KERN_ERR "Failed to invalidate firmware-assisted dump " + "rgistration. unexpected error(%d).\n", rc); + return rc; + } + fw_dump.dump_active = 0; + fdm_active = NULL; + return 0; +} + +void fadump_cleanup(void) +{ + /* Invalidate the registration only if dump is active. */ + if (fw_dump.dump_active) { + init_fadump_mem_struct(&fdm, + fdm_active->cpu_state_data.destination_address); + fadump_invalidate_dump(&fdm); + } +} + +/* + * Release the memory that was reserved in early boot to preserve the memory + * contents. The released memory will be available for general use. + */ +static void fadump_release_memory(unsigned long begin, unsigned long end) +{ + unsigned long addr; + unsigned long ra_start, ra_end; + + ra_start = fw_dump.reserve_dump_area_start; + ra_end = ra_start + fw_dump.reserve_dump_area_size; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + /* + * exclude the dump reserve area. Will reuse it for next + * fadump registration. + */ + if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start)) + continue; + + ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); + init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); + free_page((unsigned long)__va(addr)); + totalram_pages++; + } +} + +static void fadump_invalidate_release_mem(void) +{ + unsigned long reserved_area_start, reserved_area_end; + unsigned long destination_address; + + mutex_lock(&fadump_mutex); + if (!fw_dump.dump_active) { + mutex_unlock(&fadump_mutex); + return; + } + + destination_address = fdm_active->cpu_state_data.destination_address; + fadump_cleanup(); + mutex_unlock(&fadump_mutex); + + /* + * Save the current reserved memory bounds we will require them + * later for releasing the memory for general use. + */ + reserved_area_start = fw_dump.reserve_dump_area_start; + reserved_area_end = reserved_area_start + + fw_dump.reserve_dump_area_size; + /* + * Setup reserve_dump_area_start and its size so that we can + * reuse this reserved memory for Re-registration. + */ + fw_dump.reserve_dump_area_start = destination_address; + fw_dump.reserve_dump_area_size = get_fadump_area_size(); + + fadump_release_memory(reserved_area_start, reserved_area_end); + if (fw_dump.cpu_notes_buf) { + fadump_cpu_notes_buf_free( + (unsigned long)__va(fw_dump.cpu_notes_buf), + fw_dump.cpu_notes_buf_size); + fw_dump.cpu_notes_buf = 0; + fw_dump.cpu_notes_buf_size = 0; + } + /* Initialize the kernel dump memory structure for FAD registration. */ + init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); +} + +static ssize_t fadump_release_memory_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (!fw_dump.dump_active) + return -EPERM; + + if (buf[0] == '1') { + /* + * Take away the '/proc/vmcore'. We are releasing the dump + * memory, hence it will not be valid anymore. + */ + vmcore_cleanup(); + fadump_invalidate_release_mem(); + + } else + return -EINVAL; + return count; +} + static ssize_t fadump_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -1043,10 +1171,13 @@ static int fadump_region_show(struct seq_file *m, void *private) if (!fw_dump.fadump_enabled) return 0; + mutex_lock(&fadump_mutex); if (fdm_active) fdm_ptr = fdm_active; - else + else { + mutex_unlock(&fadump_mutex); fdm_ptr = &fdm; + } seq_printf(m, "CPU : [%#016llx-%#016llx] %#llx bytes, " @@ -1076,7 +1207,7 @@ static int fadump_region_show(struct seq_file *m, void *private) if (!fdm_active || (fw_dump.reserve_dump_area_start == fdm_ptr->cpu_state_data.destination_address)) - return 0; + goto out; /* Dump is active. Show reserved memory region. */ seq_printf(m, @@ -1088,9 +1219,15 @@ static int fadump_region_show(struct seq_file *m, void *private) fw_dump.reserve_dump_area_start, fdm_ptr->cpu_state_data.destination_address - fw_dump.reserve_dump_area_start); +out: + if (fdm_active) + mutex_unlock(&fadump_mutex); return 0; } +static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem, + 0200, NULL, + fadump_release_memory_store); static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, 0444, fadump_enabled_show, NULL); @@ -1131,6 +1268,13 @@ static void fadump_init_files(void) if (!debugfs_file) printk(KERN_ERR "fadump: unable to create debugfs file" " fadump_region\n"); + + if (fw_dump.dump_active) { + rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr); + if (rc) + printk(KERN_ERR "fadump: unable to create sysfs file" + " fadump_release_mem (%d)\n", rc); + } return; } @@ -1153,8 +1297,14 @@ int __init setup_fadump(void) * If dump data is available then see if it is valid and prepare for * saving it to the disk. */ - if (fw_dump.dump_active) - process_fadump(fdm_active); + if (fw_dump.dump_active) { + /* + * if dump process fails then invalidate the registration + * and release memory before proceeding for re-registration. + */ + if (process_fadump(fdm_active) < 0) + fadump_invalidate_release_mem(); + } /* Initialize the kernel dump memory structure for FAD registration. */ else if (fw_dump.reserve_dump_area_size) init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); -- cgit v1.2.3-70-g09d2 From 12d9299241241200e4f34f3b02f206fa8384a923 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Thu, 16 Feb 2012 01:15:23 +0000 Subject: fadump: Remove the phyp assisted dump code. Remove the phyp assisted dump implementation which is not is use. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- Documentation/powerpc/phyp-assisted-dump.txt | 127 ------- arch/powerpc/Kconfig | 10 - arch/powerpc/include/asm/phyp_dump.h | 47 --- arch/powerpc/kernel/prom.c | 87 ----- arch/powerpc/platforms/pseries/Makefile | 1 - arch/powerpc/platforms/pseries/phyp_dump.c | 513 --------------------------- 6 files changed, 785 deletions(-) delete mode 100644 Documentation/powerpc/phyp-assisted-dump.txt delete mode 100644 arch/powerpc/include/asm/phyp_dump.h delete mode 100644 arch/powerpc/platforms/pseries/phyp_dump.c (limited to 'arch/powerpc/include') diff --git a/Documentation/powerpc/phyp-assisted-dump.txt b/Documentation/powerpc/phyp-assisted-dump.txt deleted file mode 100644 index ad340205d96..00000000000 --- a/Documentation/powerpc/phyp-assisted-dump.txt +++ /dev/null @@ -1,127 +0,0 @@ - - Hypervisor-Assisted Dump - ------------------------ - November 2007 - -The goal of hypervisor-assisted dump is to enable the dump of -a crashed system, and to do so from a fully-reset system, and -to minimize the total elapsed time until the system is back -in production use. - -As compared to kdump or other strategies, hypervisor-assisted -dump offers several strong, practical advantages: - --- Unlike kdump, the system has been reset, and loaded - with a fresh copy of the kernel. In particular, - PCI and I/O devices have been reinitialized and are - in a clean, consistent state. --- As the dump is performed, the dumped memory becomes - immediately available to the system for normal use. --- After the dump is completed, no further reboots are - required; the system will be fully usable, and running - in its normal, production mode on its normal kernel. - -The above can only be accomplished by coordination with, -and assistance from the hypervisor. The procedure is -as follows: - --- When a system crashes, the hypervisor will save - the low 256MB of RAM to a previously registered - save region. It will also save system state, system - registers, and hardware PTE's. - --- After the low 256MB area has been saved, the - hypervisor will reset PCI and other hardware state. - It will *not* clear RAM. It will then launch the - bootloader, as normal. - --- The freshly booted kernel will notice that there - is a new node (ibm,dump-kernel) in the device tree, - indicating that there is crash data available from - a previous boot. It will boot into only 256MB of RAM, - reserving the rest of system memory. - --- Userspace tools will parse /sys/kernel/release_region - and read /proc/vmcore to obtain the contents of memory, - which holds the previous crashed kernel. The userspace - tools may copy this info to disk, or network, nas, san, - iscsi, etc. as desired. - - For Example: the values in /sys/kernel/release-region - would look something like this (address-range pairs). - CPU:0x177fee000-0x10000: HPTE:0x177ffe020-0x1000: / - DUMP:0x177fff020-0x10000000, 0x10000000-0x16F1D370A - --- As the userspace tools complete saving a portion of - dump, they echo an offset and size to - /sys/kernel/release_region to release the reserved - memory back to general use. - - An example of this is: - "echo 0x40000000 0x10000000 > /sys/kernel/release_region" - which will release 256MB at the 1GB boundary. - -Please note that the hypervisor-assisted dump feature -is only available on Power6-based systems with recent -firmware versions. - -Implementation details: ----------------------- - -During boot, a check is made to see if firmware supports -this feature on this particular machine. If it does, then -we check to see if a active dump is waiting for us. If yes -then everything but 256 MB of RAM is reserved during early -boot. This area is released once we collect a dump from user -land scripts that are run. If there is dump data, then -the /sys/kernel/release_region file is created, and -the reserved memory is held. - -If there is no waiting dump data, then only the highest -256MB of the ram is reserved as a scratch area. This area -is *not* released: this region will be kept permanently -reserved, so that it can act as a receptacle for a copy -of the low 256MB in the case a crash does occur. See, -however, "open issues" below, as to whether -such a reserved region is really needed. - -Currently the dump will be copied from /proc/vmcore to a -a new file upon user intervention. The starting address -to be read and the range for each data point in provided -in /sys/kernel/release_region. - -The tools to examine the dump will be same as the ones -used for kdump. - -General notes: --------------- -Security: please note that there are potential security issues -with any sort of dump mechanism. In particular, plaintext -(unencrypted) data, and possibly passwords, may be present in -the dump data. Userspace tools must take adequate precautions to -preserve security. - -Open issues/ToDo: ------------- - o The various code paths that tell the hypervisor that a crash - occurred, vs. it simply being a normal reboot, should be - reviewed, and possibly clarified/fixed. - - o Instead of using /sys/kernel, should there be a /sys/dump - instead? There is a dump_subsys being created by the s390 code, - perhaps the pseries code should use a similar layout as well. - - o Is reserving a 256MB region really required? The goal of - reserving a 256MB scratch area is to make sure that no - important crash data is clobbered when the hypervisor - save low mem to the scratch area. But, if one could assure - that nothing important is located in some 256MB area, then - it would not need to be reserved. Something that can be - improved in subsequent versions. - - o Still working the kdump team to integrate this with kdump, - some work remains but this would not affect the current - patches. - - o Still need to write a shell script, to copy the dump away. - Currently I am parsing it manually. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index afa4dabfad7..c71311133e6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -376,16 +376,6 @@ config CRASH_DUMP The same kernel binary can be used as production kernel and dump capture kernel. -config PHYP_DUMP - bool "Hypervisor-assisted dump (EXPERIMENTAL)" - depends on PPC_PSERIES && EXPERIMENTAL - help - Hypervisor-assisted dump is meant to be a kdump replacement - offering robustness and speed not possible without system - hypervisor assistance. - - If unsure, say "N" - config FA_DUMP bool "Firmware-assisted dump" depends on PPC64 && PPC_RTAS && CRASH_DUMP diff --git a/arch/powerpc/include/asm/phyp_dump.h b/arch/powerpc/include/asm/phyp_dump.h deleted file mode 100644 index fa74c6c3e10..00000000000 --- a/arch/powerpc/include/asm/phyp_dump.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Hypervisor-assisted dump - * - * Linas Vepstas, Manish Ahuja 2008 - * Copyright 2008 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _PPC64_PHYP_DUMP_H -#define _PPC64_PHYP_DUMP_H - -#ifdef CONFIG_PHYP_DUMP - -/* The RMR region will be saved for later dumping - * whenever the kernel crashes. Set this to 256MB. */ -#define PHYP_DUMP_RMR_START 0x0 -#define PHYP_DUMP_RMR_END (1UL<<28) - -struct phyp_dump { - /* Memory that is reserved during very early boot. */ - unsigned long init_reserve_start; - unsigned long init_reserve_size; - /* cmd line options during boot */ - unsigned long reserve_bootvar; - unsigned long phyp_dump_at_boot; - /* Check status during boot if dump supported, active & present*/ - unsigned long phyp_dump_configured; - unsigned long phyp_dump_is_active; - /* store cpu & hpte size */ - unsigned long cpu_state_size; - unsigned long hpte_region_size; - /* previous scratch area values */ - unsigned long reserved_scratch_addr; - unsigned long reserved_scratch_size; -}; - -extern struct phyp_dump *phyp_dump_info; - -int early_init_dt_scan_phyp_dump(unsigned long node, - const char *uname, int depth, void *data); - -#endif /* CONFIG_PHYP_DUMP */ -#endif /* _PPC64_PHYP_DUMP_H */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 70222b35cfc..89e850af3dd 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include #include @@ -616,86 +615,6 @@ static void __init early_reserve_mem(void) } } -#ifdef CONFIG_PHYP_DUMP -/** - * phyp_dump_calculate_reserve_size() - reserve variable boot area 5% or arg - * - * Function to find the largest size we need to reserve - * during early boot process. - * - * It either looks for boot param and returns that OR - * returns larger of 256 or 5% rounded down to multiples of 256MB. - * - */ -static inline unsigned long phyp_dump_calculate_reserve_size(void) -{ - unsigned long tmp; - - if (phyp_dump_info->reserve_bootvar) - return phyp_dump_info->reserve_bootvar; - - /* divide by 20 to get 5% of value */ - tmp = memblock_end_of_DRAM(); - do_div(tmp, 20); - - /* round it down in multiples of 256 */ - tmp = tmp & ~0x0FFFFFFFUL; - - return (tmp > PHYP_DUMP_RMR_END ? tmp : PHYP_DUMP_RMR_END); -} - -/** - * phyp_dump_reserve_mem() - reserve all not-yet-dumped mmemory - * - * This routine may reserve memory regions in the kernel only - * if the system is supported and a dump was taken in last - * boot instance or if the hardware is supported and the - * scratch area needs to be setup. In other instances it returns - * without reserving anything. The memory in case of dump being - * active is freed when the dump is collected (by userland tools). - */ -static void __init phyp_dump_reserve_mem(void) -{ - unsigned long base, size; - unsigned long variable_reserve_size; - - if (!phyp_dump_info->phyp_dump_configured) { - printk(KERN_ERR "Phyp-dump not supported on this hardware\n"); - return; - } - - if (!phyp_dump_info->phyp_dump_at_boot) { - printk(KERN_INFO "Phyp-dump disabled at boot time\n"); - return; - } - - variable_reserve_size = phyp_dump_calculate_reserve_size(); - - if (phyp_dump_info->phyp_dump_is_active) { - /* Reserve *everything* above RMR.Area freed by userland tools*/ - base = variable_reserve_size; - size = memblock_end_of_DRAM() - base; - - /* XXX crashed_ram_end is wrong, since it may be beyond - * the memory_limit, it will need to be adjusted. */ - memblock_reserve(base, size); - - phyp_dump_info->init_reserve_start = base; - phyp_dump_info->init_reserve_size = size; - } else { - size = phyp_dump_info->cpu_state_size + - phyp_dump_info->hpte_region_size + - variable_reserve_size; - base = memblock_end_of_DRAM() - size; - memblock_reserve(base, size); - phyp_dump_info->init_reserve_start = base; - phyp_dump_info->init_reserve_size = size; - } -} -#else -static inline void __init phyp_dump_reserve_mem(void) {} -#endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ - void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -715,11 +634,6 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_opal, NULL); #endif -#ifdef CONFIG_PHYP_DUMP - /* scan tree to see if dump occurred during last boot */ - of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); -#endif - #ifdef CONFIG_FA_DUMP /* scan tree to see if dump is active during last boot */ of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); @@ -765,7 +679,6 @@ void __init early_init_devtree(void *params) #endif reserve_crashkernel(); early_reserve_mem(); - phyp_dump_reserve_mem(); /* * Ensure that total memory size is page-aligned, because otherwise diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 236db46b407..67b3e6e0cf3 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -18,7 +18,6 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o -obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o diff --git a/arch/powerpc/platforms/pseries/phyp_dump.c b/arch/powerpc/platforms/pseries/phyp_dump.c deleted file mode 100644 index 6e7742da007..00000000000 --- a/arch/powerpc/platforms/pseries/phyp_dump.c +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Hypervisor-assisted dump - * - * Linas Vepstas, Manish Ahuja 2008 - * Copyright 2008 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -/* Variables, used to communicate data between early boot and late boot */ -static struct phyp_dump phyp_dump_vars; -struct phyp_dump *phyp_dump_info = &phyp_dump_vars; - -static int ibm_configure_kernel_dump; -/* ------------------------------------------------- */ -/* RTAS interfaces to declare the dump regions */ - -struct dump_section { - u32 dump_flags; - u16 source_type; - u16 error_flags; - u64 source_address; - u64 source_length; - u64 length_copied; - u64 destination_address; -}; - -struct phyp_dump_header { - u32 version; - u16 num_of_sections; - u16 status; - - u32 first_offset_section; - u32 dump_disk_section; - u64 block_num_dd; - u64 num_of_blocks_dd; - u32 offset_dd; - u32 maxtime_to_auto; - /* No dump disk path string used */ - - struct dump_section cpu_data; - struct dump_section hpte_data; - struct dump_section kernel_data; -}; - -/* The dump header *must be* in low memory, so .bss it */ -static struct phyp_dump_header phdr; - -#define NUM_DUMP_SECTIONS 3 -#define DUMP_HEADER_VERSION 0x1 -#define DUMP_REQUEST_FLAG 0x1 -#define DUMP_SOURCE_CPU 0x0001 -#define DUMP_SOURCE_HPTE 0x0002 -#define DUMP_SOURCE_RMO 0x0011 -#define DUMP_ERROR_FLAG 0x2000 -#define DUMP_TRIGGERED 0x4000 -#define DUMP_PERFORMED 0x8000 - - -/** - * init_dump_header() - initialize the header declaring a dump - * Returns: length of dump save area. - * - * When the hypervisor saves crashed state, it needs to put - * it somewhere. The dump header tells the hypervisor where - * the data can be saved. - */ -static unsigned long init_dump_header(struct phyp_dump_header *ph) -{ - unsigned long addr_offset = 0; - - /* Set up the dump header */ - ph->version = DUMP_HEADER_VERSION; - ph->num_of_sections = NUM_DUMP_SECTIONS; - ph->status = 0; - - ph->first_offset_section = - (u32)offsetof(struct phyp_dump_header, cpu_data); - ph->dump_disk_section = 0; - ph->block_num_dd = 0; - ph->num_of_blocks_dd = 0; - ph->offset_dd = 0; - - ph->maxtime_to_auto = 0; /* disabled */ - - /* The first two sections are mandatory */ - ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG; - ph->cpu_data.source_type = DUMP_SOURCE_CPU; - ph->cpu_data.source_address = 0; - ph->cpu_data.source_length = phyp_dump_info->cpu_state_size; - ph->cpu_data.destination_address = addr_offset; - addr_offset += phyp_dump_info->cpu_state_size; - - ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG; - ph->hpte_data.source_type = DUMP_SOURCE_HPTE; - ph->hpte_data.source_address = 0; - ph->hpte_data.source_length = phyp_dump_info->hpte_region_size; - ph->hpte_data.destination_address = addr_offset; - addr_offset += phyp_dump_info->hpte_region_size; - - /* This section describes the low kernel region */ - ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG; - ph->kernel_data.source_type = DUMP_SOURCE_RMO; - ph->kernel_data.source_address = PHYP_DUMP_RMR_START; - ph->kernel_data.source_length = PHYP_DUMP_RMR_END; - ph->kernel_data.destination_address = addr_offset; - addr_offset += ph->kernel_data.source_length; - - return addr_offset; -} - -static void print_dump_header(const struct phyp_dump_header *ph) -{ -#ifdef DEBUG - if (ph == NULL) - return; - - printk(KERN_INFO "dump header:\n"); - /* setup some ph->sections required */ - printk(KERN_INFO "version = %d\n", ph->version); - printk(KERN_INFO "Sections = %d\n", ph->num_of_sections); - printk(KERN_INFO "Status = 0x%x\n", ph->status); - - /* No ph->disk, so all should be set to 0 */ - printk(KERN_INFO "Offset to first section 0x%x\n", - ph->first_offset_section); - printk(KERN_INFO "dump disk sections should be zero\n"); - printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section); - printk(KERN_INFO "block num = %lld\n", ph->block_num_dd); - printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd); - printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd); - printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto); - - /*set cpu state and hpte states as well scratch pad area */ - printk(KERN_INFO " CPU AREA\n"); - printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags); - printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type); - printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags); - printk(KERN_INFO "cpu source_address =%llx\n", - ph->cpu_data.source_address); - printk(KERN_INFO "cpu source_length =%llx\n", - ph->cpu_data.source_length); - printk(KERN_INFO "cpu length_copied =%llx\n", - ph->cpu_data.length_copied); - - printk(KERN_INFO " HPTE AREA\n"); - printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags); - printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type); - printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags); - printk(KERN_INFO "HPTE source_address =%llx\n", - ph->hpte_data.source_address); - printk(KERN_INFO "HPTE source_length =%llx\n", - ph->hpte_data.source_length); - printk(KERN_INFO "HPTE length_copied =%llx\n", - ph->hpte_data.length_copied); - - printk(KERN_INFO " SRSD AREA\n"); - printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags); - printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type); - printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags); - printk(KERN_INFO "SRSD source_address =%llx\n", - ph->kernel_data.source_address); - printk(KERN_INFO "SRSD source_length =%llx\n", - ph->kernel_data.source_length); - printk(KERN_INFO "SRSD length_copied =%llx\n", - ph->kernel_data.length_copied); -#endif -} - -static ssize_t show_phyp_dump_active(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - - /* create filesystem entry so kdump is phyp-dump aware */ - return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot); -} - -static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600, - show_phyp_dump_active, - NULL); - -static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr) -{ - int rc; - - /* Add addr value if not initialized before */ - if (ph->cpu_data.destination_address == 0) { - ph->cpu_data.destination_address += addr; - ph->hpte_data.destination_address += addr; - ph->kernel_data.destination_address += addr; - } - - /* ToDo Invalidate kdump and free memory range. */ - - do { - rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, - 1, ph, sizeof(struct phyp_dump_header)); - } while (rtas_busy_delay(rc)); - - if (rc) { - printk(KERN_ERR "phyp-dump: unexpected error (%d) on " - "register\n", rc); - print_dump_header(ph); - return; - } - - rc = sysfs_create_file(kernel_kobj, &pdl.attr); - if (rc) - printk(KERN_ERR "phyp-dump: unable to create sysfs" - " file (%d)\n", rc); -} - -static -void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr) -{ - int rc; - - /* Add addr value if not initialized before */ - if (ph->cpu_data.destination_address == 0) { - ph->cpu_data.destination_address += addr; - ph->hpte_data.destination_address += addr; - ph->kernel_data.destination_address += addr; - } - - do { - rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, - 2, ph, sizeof(struct phyp_dump_header)); - } while (rtas_busy_delay(rc)); - - if (rc) { - printk(KERN_ERR "phyp-dump: unexpected error (%d) " - "on invalidate\n", rc); - print_dump_header(ph); - } -} - -/* ------------------------------------------------- */ -/** - * release_memory_range -- release memory previously memblock_reserved - * @start_pfn: starting physical frame number - * @nr_pages: number of pages to free. - * - * This routine will release memory that had been previously - * memblock_reserved in early boot. The released memory becomes - * available for genreal use. - */ -static void release_memory_range(unsigned long start_pfn, - unsigned long nr_pages) -{ - struct page *rpage; - unsigned long end_pfn; - long i; - - end_pfn = start_pfn + nr_pages; - - for (i = start_pfn; i <= end_pfn; i++) { - rpage = pfn_to_page(i); - if (PageReserved(rpage)) { - ClearPageReserved(rpage); - init_page_count(rpage); - __free_page(rpage); - totalram_pages++; - } - } -} - -/** - * track_freed_range -- Counts the range being freed. - * Once the counter goes to zero, it re-registers dump for - * future use. - */ -static void -track_freed_range(unsigned long addr, unsigned long length) -{ - static unsigned long scratch_area_size, reserved_area_size; - - if (addr < phyp_dump_info->init_reserve_start) - return; - - if ((addr >= phyp_dump_info->init_reserve_start) && - (addr <= phyp_dump_info->init_reserve_start + - phyp_dump_info->init_reserve_size)) - reserved_area_size += length; - - if ((addr >= phyp_dump_info->reserved_scratch_addr) && - (addr <= phyp_dump_info->reserved_scratch_addr + - phyp_dump_info->reserved_scratch_size)) - scratch_area_size += length; - - if ((reserved_area_size == phyp_dump_info->init_reserve_size) && - (scratch_area_size == phyp_dump_info->reserved_scratch_size)) { - - invalidate_last_dump(&phdr, - phyp_dump_info->reserved_scratch_addr); - register_dump_area(&phdr, - phyp_dump_info->reserved_scratch_addr); - } -} - -/* ------------------------------------------------- */ -/** - * sysfs_release_region -- sysfs interface to release memory range. - * - * Usage: - * "echo > /sys/kernel/release_region" - * - * Example: - * "echo 0x40000000 0x10000000 > /sys/kernel/release_region" - * - * will release 256MB starting at 1GB. - */ -static ssize_t store_release_region(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - unsigned long start_addr, length, end_addr; - unsigned long start_pfn, nr_pages; - ssize_t ret; - - ret = sscanf(buf, "%lx %lx", &start_addr, &length); - if (ret != 2) - return -EINVAL; - - track_freed_range(start_addr, length); - - /* Range-check - don't free any reserved memory that - * wasn't reserved for phyp-dump */ - if (start_addr < phyp_dump_info->init_reserve_start) - start_addr = phyp_dump_info->init_reserve_start; - - end_addr = phyp_dump_info->init_reserve_start + - phyp_dump_info->init_reserve_size; - if (start_addr+length > end_addr) - length = end_addr - start_addr; - - /* Release the region of memory assed in by user */ - start_pfn = PFN_DOWN(start_addr); - nr_pages = PFN_DOWN(length); - release_memory_range(start_pfn, nr_pages); - - return count; -} - -static ssize_t show_release_region(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - u64 second_addr_range; - - /* total reserved size - start of scratch area */ - second_addr_range = phyp_dump_info->init_reserve_size - - phyp_dump_info->reserved_scratch_size; - return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:" - " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n", - phdr.cpu_data.destination_address, - phdr.cpu_data.length_copied, - phdr.hpte_data.destination_address, - phdr.hpte_data.length_copied, - phdr.kernel_data.destination_address, - phdr.kernel_data.length_copied, - phyp_dump_info->init_reserve_start, - second_addr_range); -} - -static struct kobj_attribute rr = __ATTR(release_region, 0600, - show_release_region, - store_release_region); - -static int __init phyp_dump_setup(void) -{ - struct device_node *rtas; - const struct phyp_dump_header *dump_header = NULL; - unsigned long dump_area_start; - unsigned long dump_area_length; - int header_len = 0; - int rc; - - /* If no memory was reserved in early boot, there is nothing to do */ - if (phyp_dump_info->init_reserve_size == 0) - return 0; - - /* Return if phyp dump not supported */ - if (!phyp_dump_info->phyp_dump_configured) - return -ENOSYS; - - /* Is there dump data waiting for us? If there isn't, - * then register a new dump area, and release all of - * the rest of the reserved ram. - * - * The /rtas/ibm,kernel-dump rtas node is present only - * if there is dump data waiting for us. - */ - rtas = of_find_node_by_path("/rtas"); - if (rtas) { - dump_header = of_get_property(rtas, "ibm,kernel-dump", - &header_len); - of_node_put(rtas); - } - - ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump"); - - print_dump_header(dump_header); - dump_area_length = init_dump_header(&phdr); - /* align down */ - dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK; - - if (dump_header == NULL) { - register_dump_area(&phdr, dump_area_start); - return 0; - } - - /* re-register the dump area, if old dump was invalid */ - if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) { - invalidate_last_dump(&phdr, dump_area_start); - register_dump_area(&phdr, dump_area_start); - return 0; - } - - if (dump_header) { - phyp_dump_info->reserved_scratch_addr = - dump_header->cpu_data.destination_address; - phyp_dump_info->reserved_scratch_size = - dump_header->cpu_data.source_length + - dump_header->hpte_data.source_length + - dump_header->kernel_data.source_length; - } - - /* Should we create a dump_subsys, analogous to s390/ipl.c ? */ - rc = sysfs_create_file(kernel_kobj, &rr.attr); - if (rc) - printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n", - rc); - - /* ToDo: re-register the dump area, for next time. */ - return 0; -} -machine_subsys_initcall(pseries, phyp_dump_setup); - -int __init early_init_dt_scan_phyp_dump(unsigned long node, - const char *uname, int depth, void *data) -{ - const unsigned int *sizes; - - phyp_dump_info->phyp_dump_configured = 0; - phyp_dump_info->phyp_dump_is_active = 0; - - if (depth != 1 || strcmp(uname, "rtas") != 0) - return 0; - - if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL)) - phyp_dump_info->phyp_dump_configured++; - - if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL)) - phyp_dump_info->phyp_dump_is_active++; - - sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", - NULL); - if (!sizes) - return 0; - - if (sizes[0] == 1) - phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]); - - if (sizes[3] == 2) - phyp_dump_info->hpte_region_size = - *((unsigned long *)&sizes[4]); - return 1; -} - -/* Look for phyp_dump= cmdline option */ -static int __init early_phyp_dump_enabled(char *p) -{ - phyp_dump_info->phyp_dump_at_boot = 1; - - if (!p) - return 0; - - if (strncmp(p, "1", 1) == 0) - phyp_dump_info->phyp_dump_at_boot = 1; - else if (strncmp(p, "0", 1) == 0) - phyp_dump_info->phyp_dump_at_boot = 0; - - return 0; -} -early_param("phyp_dump", early_phyp_dump_enabled); - -/* Look for phyp_dump_reserve_size= cmdline option */ -static int __init early_phyp_dump_reserve_size(char *p) -{ - if (p) - phyp_dump_info->reserve_bootvar = memparse(p, &p); - - return 0; -} -early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size); -- cgit v1.2.3-70-g09d2 From a6cf7ed5119fb22f54584a9f867b638edd3c4384 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 29 Feb 2012 21:12:16 +0000 Subject: powerpc/atomic: Implement atomic*_inc_not_zero Implement atomic_inc_not_zero and atomic64_inc_not_zero. At the moment we use atomic*_add_unless which requires us to put 0 and 1 constants into registers. We can also avoid a subtract by saving the original value in a second temporary. This removes 3 instructions from fget: - c0000000001b63c0: 39 00 00 00 li r8,0 - c0000000001b63c4: 39 40 00 01 li r10,1 ... - c0000000001b63e8: 7c 0a 00 50 subf r0,r10,r0 Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/atomic.h | 59 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 02e41b53488..14174e838ad 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -212,6 +212,36 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) return t; } +/** + * atomic_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1, so long as @v is non-zero. + * Returns non-zero if @v was non-zero, and zero otherwise. + */ +static __inline__ int atomic_inc_not_zero(atomic_t *v) +{ + int t1, t2; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: lwarx %0,0,%2 # atomic_inc_not_zero\n\ + cmpwi 0,%0,0\n\ + beq- 2f\n\ + addic %1,%0,1\n" + PPC405_ERR77(0,%2) +" stwcx. %1,0,%2\n\ + bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + "\n\ +2:" + : "=&r" (t1), "=&r" (t2) + : "r" (&v->counter) + : "cc", "xer", "memory"); + + return t1; +} +#define atomic_inc_not_zero(v) atomic_inc_not_zero((v)) #define atomic_sub_and_test(a, v) (atomic_sub_return((a), (v)) == 0) #define atomic_dec_and_test(v) (atomic_dec_return((v)) == 0) @@ -467,7 +497,34 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) return t != u; } -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +/** + * atomic_inc64_not_zero - increment unless the number is zero + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1, so long as @v is non-zero. + * Returns non-zero if @v was non-zero, and zero otherwise. + */ +static __inline__ long atomic64_inc_not_zero(atomic64_t *v) +{ + long t1, t2; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: ldarx %0,0,%2 # atomic64_inc_not_zero\n\ + cmpdi 0,%0,0\n\ + beq- 2f\n\ + addic %1,%0,1\n\ + stdcx. %1,0,%2\n\ + bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + "\n\ +2:" + : "=&r" (t1), "=&r" (t2) + : "r" (&v->counter) + : "cc", "xer", "memory"); + + return t1; +} #endif /* __powerpc64__ */ -- cgit v1.2.3-70-g09d2 From b0787660260604ba63621881851de0032279819b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 7 Mar 2012 18:43:10 +0000 Subject: powerpc: clean up vio.c This cleans up vio.c after the removal of the legacy iSeries platform. It also removes some no longer referenced include files. Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iseries/hv_call_event.h | 201 ----------------- arch/powerpc/include/asm/iseries/hv_lp_event.h | 162 -------------- arch/powerpc/include/asm/iseries/iommu.h | 37 ---- arch/powerpc/include/asm/iseries/vio.h | 265 ----------------------- arch/powerpc/kernel/vio.c | 18 +- 5 files changed, 1 insertion(+), 682 deletions(-) delete mode 100644 arch/powerpc/include/asm/iseries/hv_call_event.h delete mode 100644 arch/powerpc/include/asm/iseries/hv_lp_event.h delete mode 100644 arch/powerpc/include/asm/iseries/iommu.h delete mode 100644 arch/powerpc/include/asm/iseries/vio.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/iseries/hv_call_event.h b/arch/powerpc/include/asm/iseries/hv_call_event.h deleted file mode 100644 index cc029d388e1..00000000000 --- a/arch/powerpc/include/asm/iseries/hv_call_event.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (C) 2001 Mike Corrigan IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * This file contains the "hypervisor call" interface which is used to - * drive the hypervisor from the OS. - */ -#ifndef _ASM_POWERPC_ISERIES_HV_CALL_EVENT_H -#define _ASM_POWERPC_ISERIES_HV_CALL_EVENT_H - -#include -#include - -#include -#include -#include - -struct HvLpEvent; - -typedef u8 HvLpEvent_Type; -typedef u8 HvLpEvent_AckInd; -typedef u8 HvLpEvent_AckType; - -typedef u8 HvLpDma_Direction; -typedef u8 HvLpDma_AddressType; - -typedef u64 HvLpEvent_Rc; -typedef u64 HvLpDma_Rc; - -#define HvCallEventAckLpEvent HvCallEvent + 0 -#define HvCallEventCancelLpEvent HvCallEvent + 1 -#define HvCallEventCloseLpEventPath HvCallEvent + 2 -#define HvCallEventDmaBufList HvCallEvent + 3 -#define HvCallEventDmaSingle HvCallEvent + 4 -#define HvCallEventDmaToSp HvCallEvent + 5 -#define HvCallEventGetOverflowLpEvents HvCallEvent + 6 -#define HvCallEventGetSourceLpInstanceId HvCallEvent + 7 -#define HvCallEventGetTargetLpInstanceId HvCallEvent + 8 -#define HvCallEventOpenLpEventPath HvCallEvent + 9 -#define HvCallEventSetLpEventStack HvCallEvent + 10 -#define HvCallEventSignalLpEvent HvCallEvent + 11 -#define HvCallEventSignalLpEventParms HvCallEvent + 12 -#define HvCallEventSetInterLpQueueIndex HvCallEvent + 13 -#define HvCallEventSetLpEventQueueInterruptProc HvCallEvent + 14 -#define HvCallEventRouter15 HvCallEvent + 15 - -static inline void HvCallEvent_getOverflowLpEvents(u8 queueIndex) -{ - HvCall1(HvCallEventGetOverflowLpEvents, queueIndex); -} - -static inline void HvCallEvent_setInterLpQueueIndex(u8 queueIndex) -{ - HvCall1(HvCallEventSetInterLpQueueIndex, queueIndex); -} - -static inline void HvCallEvent_setLpEventStack(u8 queueIndex, - char *eventStackAddr, u32 eventStackSize) -{ - HvCall3(HvCallEventSetLpEventStack, queueIndex, - virt_to_abs(eventStackAddr), eventStackSize); -} - -static inline void HvCallEvent_setLpEventQueueInterruptProc(u8 queueIndex, - u16 lpLogicalProcIndex) -{ - HvCall2(HvCallEventSetLpEventQueueInterruptProc, queueIndex, - lpLogicalProcIndex); -} - -static inline HvLpEvent_Rc HvCallEvent_signalLpEvent(struct HvLpEvent *event) -{ - return HvCall1(HvCallEventSignalLpEvent, virt_to_abs(event)); -} - -static inline HvLpEvent_Rc HvCallEvent_signalLpEventFast(HvLpIndex targetLp, - HvLpEvent_Type type, u16 subtype, HvLpEvent_AckInd ackInd, - HvLpEvent_AckType ackType, HvLpInstanceId sourceInstanceId, - HvLpInstanceId targetInstanceId, u64 correlationToken, - u64 eventData1, u64 eventData2, u64 eventData3, - u64 eventData4, u64 eventData5) -{ - /* Pack the misc bits into a single Dword to pass to PLIC */ - union { - struct { - u8 ack_and_target; - u8 type; - u16 subtype; - HvLpInstanceId src_inst; - HvLpInstanceId target_inst; - } parms; - u64 dword; - } packed; - - packed.parms.ack_and_target = (ackType << 7) | (ackInd << 6) | targetLp; - packed.parms.type = type; - packed.parms.subtype = subtype; - packed.parms.src_inst = sourceInstanceId; - packed.parms.target_inst = targetInstanceId; - - return HvCall7(HvCallEventSignalLpEventParms, packed.dword, - correlationToken, eventData1, eventData2, - eventData3, eventData4, eventData5); -} - -extern void *iseries_hv_alloc(size_t size, dma_addr_t *dma_handle, gfp_t flag); -extern void iseries_hv_free(size_t size, void *vaddr, dma_addr_t dma_handle); -extern dma_addr_t iseries_hv_map(void *vaddr, size_t size, - enum dma_data_direction direction); -extern void iseries_hv_unmap(dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction); - -static inline HvLpEvent_Rc HvCallEvent_ackLpEvent(struct HvLpEvent *event) -{ - return HvCall1(HvCallEventAckLpEvent, virt_to_abs(event)); -} - -static inline HvLpEvent_Rc HvCallEvent_cancelLpEvent(struct HvLpEvent *event) -{ - return HvCall1(HvCallEventCancelLpEvent, virt_to_abs(event)); -} - -static inline HvLpInstanceId HvCallEvent_getSourceLpInstanceId( - HvLpIndex targetLp, HvLpEvent_Type type) -{ - return HvCall2(HvCallEventGetSourceLpInstanceId, targetLp, type); -} - -static inline HvLpInstanceId HvCallEvent_getTargetLpInstanceId( - HvLpIndex targetLp, HvLpEvent_Type type) -{ - return HvCall2(HvCallEventGetTargetLpInstanceId, targetLp, type); -} - -static inline void HvCallEvent_openLpEventPath(HvLpIndex targetLp, - HvLpEvent_Type type) -{ - HvCall2(HvCallEventOpenLpEventPath, targetLp, type); -} - -static inline void HvCallEvent_closeLpEventPath(HvLpIndex targetLp, - HvLpEvent_Type type) -{ - HvCall2(HvCallEventCloseLpEventPath, targetLp, type); -} - -static inline HvLpDma_Rc HvCallEvent_dmaBufList(HvLpEvent_Type type, - HvLpIndex remoteLp, HvLpDma_Direction direction, - HvLpInstanceId localInstanceId, - HvLpInstanceId remoteInstanceId, - HvLpDma_AddressType localAddressType, - HvLpDma_AddressType remoteAddressType, - /* Do these need to be converted to absolute addresses? */ - u64 localBufList, u64 remoteBufList, u32 transferLength) -{ - /* Pack the misc bits into a single Dword to pass to PLIC */ - union { - struct { - u8 flags; - HvLpIndex remote; - u8 type; - u8 reserved; - HvLpInstanceId local_inst; - HvLpInstanceId remote_inst; - } parms; - u64 dword; - } packed; - - packed.parms.flags = (direction << 7) | - (localAddressType << 6) | (remoteAddressType << 5); - packed.parms.remote = remoteLp; - packed.parms.type = type; - packed.parms.reserved = 0; - packed.parms.local_inst = localInstanceId; - packed.parms.remote_inst = remoteInstanceId; - - return HvCall4(HvCallEventDmaBufList, packed.dword, localBufList, - remoteBufList, transferLength); -} - -static inline HvLpDma_Rc HvCallEvent_dmaToSp(void *local, u32 remote, - u32 length, HvLpDma_Direction dir) -{ - return HvCall4(HvCallEventDmaToSp, virt_to_abs(local), remote, - length, dir); -} - -#endif /* _ASM_POWERPC_ISERIES_HV_CALL_EVENT_H */ diff --git a/arch/powerpc/include/asm/iseries/hv_lp_event.h b/arch/powerpc/include/asm/iseries/hv_lp_event.h deleted file mode 100644 index 8f5da7d7720..00000000000 --- a/arch/powerpc/include/asm/iseries/hv_lp_event.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2001 Mike Corrigan IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* This file contains the class for HV events in the system. */ - -#ifndef _ASM_POWERPC_ISERIES_HV_LP_EVENT_H -#define _ASM_POWERPC_ISERIES_HV_LP_EVENT_H - -#include -#include -#include -#include - -/* - * HvLpEvent is the structure for Lp Event messages passed between - * partitions through PLIC. - */ - -struct HvLpEvent { - u8 flags; /* Event flags x00-x00 */ - u8 xType; /* Type of message x01-x01 */ - u16 xSubtype; /* Subtype for event x02-x03 */ - u8 xSourceLp; /* Source LP x04-x04 */ - u8 xTargetLp; /* Target LP x05-x05 */ - u8 xSizeMinus1; /* Size of Derived class - 1 x06-x06 */ - u8 xRc; /* RC for Ack flows x07-x07 */ - u16 xSourceInstanceId; /* Source sides instance id x08-x09 */ - u16 xTargetInstanceId; /* Target sides instance id x0A-x0B */ - union { - u32 xSubtypeData; /* Data usable by the subtype x0C-x0F */ - u16 xSubtypeDataShort[2]; /* Data as 2 shorts */ - u8 xSubtypeDataChar[4]; /* Data as 4 chars */ - } x; - - u64 xCorrelationToken; /* Unique value for source/type x10-x17 */ -}; - -typedef void (*LpEventHandler)(struct HvLpEvent *); - -/* Register a handler for an event type - returns 0 on success */ -extern int HvLpEvent_registerHandler(HvLpEvent_Type eventType, - LpEventHandler hdlr); - -/* - * Unregister a handler for an event type - * - * This call will sleep until the handler being removed is guaranteed to - * be no longer executing on any CPU. Do not call with locks held. - * - * returns 0 on success - * Unregister will fail if there are any paths open for the type - */ -extern int HvLpEvent_unregisterHandler(HvLpEvent_Type eventType); - -/* - * Open an Lp Event Path for an event type - * returns 0 on success - * openPath will fail if there is no handler registered for the event type. - * The lpIndex specified is the partition index for the target partition - * (for VirtualIo, VirtualLan and SessionMgr) other types specify zero) - */ -extern int HvLpEvent_openPath(HvLpEvent_Type eventType, HvLpIndex lpIndex); - -/* - * Close an Lp Event Path for a type and partition - * returns 0 on success - */ -extern int HvLpEvent_closePath(HvLpEvent_Type eventType, HvLpIndex lpIndex); - -#define HvLpEvent_Type_Hypervisor 0 -#define HvLpEvent_Type_MachineFac 1 -#define HvLpEvent_Type_SessionMgr 2 -#define HvLpEvent_Type_SpdIo 3 -#define HvLpEvent_Type_VirtualBus 4 -#define HvLpEvent_Type_PciIo 5 -#define HvLpEvent_Type_RioIo 6 -#define HvLpEvent_Type_VirtualLan 7 -#define HvLpEvent_Type_VirtualIo 8 -#define HvLpEvent_Type_NumTypes 9 - -#define HvLpEvent_Rc_Good 0 -#define HvLpEvent_Rc_BufferNotAvailable 1 -#define HvLpEvent_Rc_Cancelled 2 -#define HvLpEvent_Rc_GenericError 3 -#define HvLpEvent_Rc_InvalidAddress 4 -#define HvLpEvent_Rc_InvalidPartition 5 -#define HvLpEvent_Rc_InvalidSize 6 -#define HvLpEvent_Rc_InvalidSubtype 7 -#define HvLpEvent_Rc_InvalidSubtypeData 8 -#define HvLpEvent_Rc_InvalidType 9 -#define HvLpEvent_Rc_PartitionDead 10 -#define HvLpEvent_Rc_PathClosed 11 -#define HvLpEvent_Rc_SubtypeError 12 - -#define HvLpEvent_Function_Ack 0 -#define HvLpEvent_Function_Int 1 - -#define HvLpEvent_AckInd_NoAck 0 -#define HvLpEvent_AckInd_DoAck 1 - -#define HvLpEvent_AckType_ImmediateAck 0 -#define HvLpEvent_AckType_DeferredAck 1 - -#define HV_LP_EVENT_INT 0x01 -#define HV_LP_EVENT_DO_ACK 0x02 -#define HV_LP_EVENT_DEFERRED_ACK 0x04 -#define HV_LP_EVENT_VALID 0x80 - -#define HvLpDma_Direction_LocalToRemote 0 -#define HvLpDma_Direction_RemoteToLocal 1 - -#define HvLpDma_AddressType_TceIndex 0 -#define HvLpDma_AddressType_RealAddress 1 - -#define HvLpDma_Rc_Good 0 -#define HvLpDma_Rc_Error 1 -#define HvLpDma_Rc_PartitionDead 2 -#define HvLpDma_Rc_PathClosed 3 -#define HvLpDma_Rc_InvalidAddress 4 -#define HvLpDma_Rc_InvalidLength 5 - -static inline int hvlpevent_is_valid(struct HvLpEvent *h) -{ - return h->flags & HV_LP_EVENT_VALID; -} - -static inline void hvlpevent_invalidate(struct HvLpEvent *h) -{ - h->flags &= ~ HV_LP_EVENT_VALID; -} - -static inline int hvlpevent_is_int(struct HvLpEvent *h) -{ - return h->flags & HV_LP_EVENT_INT; -} - -static inline int hvlpevent_is_ack(struct HvLpEvent *h) -{ - return !hvlpevent_is_int(h); -} - -static inline int hvlpevent_need_ack(struct HvLpEvent *h) -{ - return h->flags & HV_LP_EVENT_DO_ACK; -} - -#endif /* _ASM_POWERPC_ISERIES_HV_LP_EVENT_H */ diff --git a/arch/powerpc/include/asm/iseries/iommu.h b/arch/powerpc/include/asm/iseries/iommu.h deleted file mode 100644 index 1b9692c6089..00000000000 --- a/arch/powerpc/include/asm/iseries/iommu.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef _ASM_POWERPC_ISERIES_IOMMU_H -#define _ASM_POWERPC_ISERIES_IOMMU_H - -/* - * Copyright (C) 2005 Stephen Rothwell, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the: - * Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, - * Boston, MA 02111-1307 USA - */ - -struct pci_dev; -struct vio_dev; -struct device_node; -struct iommu_table; - -/* Get table parameters from HV */ -extern void iommu_table_getparms_iSeries(unsigned long busno, - unsigned char slotno, unsigned char virtbus, - struct iommu_table *tbl); - -extern struct iommu_table *vio_build_iommu_table_iseries(struct vio_dev *dev); -extern void iommu_vio_init(void); - -#endif /* _ASM_POWERPC_ISERIES_IOMMU_H */ diff --git a/arch/powerpc/include/asm/iseries/vio.h b/arch/powerpc/include/asm/iseries/vio.h deleted file mode 100644 index f9ac0d00b95..00000000000 --- a/arch/powerpc/include/asm/iseries/vio.h +++ /dev/null @@ -1,265 +0,0 @@ -/* -*- linux-c -*- - * - * iSeries Virtual I/O Message Path header - * - * Authors: Dave Boutcher - * Ryan Arnold - * Colin Devilbiss - * - * (C) Copyright 2000 IBM Corporation - * - * This header file is used by the iSeries virtual I/O device - * drivers. It defines the interfaces to the common functions - * (implemented in drivers/char/viopath.h) as well as defining - * common functions and structures. Currently (at the time I - * wrote this comment) the iSeries virtual I/O device drivers - * that use this are - * drivers/block/viodasd.c - * drivers/char/viocons.c - * drivers/char/viotape.c - * drivers/cdrom/viocd.c - * - * The iSeries virtual ethernet support (veth.c) uses a whole - * different set of functions. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) anyu later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ -#ifndef _ASM_POWERPC_ISERIES_VIO_H -#define _ASM_POWERPC_ISERIES_VIO_H - -#include -#include - -/* - * iSeries virtual I/O events use the subtype field in - * HvLpEvent to figure out what kind of vio event is coming - * in. We use a table to route these, and this defines - * the maximum number of distinct subtypes - */ -#define VIO_MAX_SUBTYPES 8 - -#define VIOMAXBLOCKDMA 12 - -struct open_data { - u64 disk_size; - u16 max_disk; - u16 cylinders; - u16 tracks; - u16 sectors; - u16 bytes_per_sector; -}; - -struct rw_data { - u64 offset; - struct { - u32 token; - u32 reserved; - u64 len; - } dma_info[VIOMAXBLOCKDMA]; -}; - -struct vioblocklpevent { - struct HvLpEvent event; - u32 reserved; - u16 version; - u16 sub_result; - u16 disk; - u16 flags; - union { - struct open_data open_data; - struct rw_data rw_data; - u64 changed; - } u; -}; - -#define vioblockflags_ro 0x0001 - -enum vioblocksubtype { - vioblockopen = 0x0001, - vioblockclose = 0x0002, - vioblockread = 0x0003, - vioblockwrite = 0x0004, - vioblockflush = 0x0005, - vioblockcheck = 0x0007 -}; - -struct viocdlpevent { - struct HvLpEvent event; - u32 reserved; - u16 version; - u16 sub_result; - u16 disk; - u16 flags; - u32 token; - u64 offset; /* On open, max number of disks */ - u64 len; /* On open, size of the disk */ - u32 block_size; /* Only set on open */ - u32 media_size; /* Only set on open */ -}; - -enum viocdsubtype { - viocdopen = 0x0001, - viocdclose = 0x0002, - viocdread = 0x0003, - viocdwrite = 0x0004, - viocdlockdoor = 0x0005, - viocdgetinfo = 0x0006, - viocdcheck = 0x0007 -}; - -struct viotapelpevent { - struct HvLpEvent event; - u32 reserved; - u16 version; - u16 sub_type_result; - u16 tape; - u16 flags; - u32 token; - u64 len; - union { - struct { - u32 tape_op; - u32 count; - } op; - struct { - u32 type; - u32 resid; - u32 dsreg; - u32 gstat; - u32 erreg; - u32 file_no; - u32 block_no; - } get_status; - struct { - u32 block_no; - } get_pos; - } u; -}; - -enum viotapesubtype { - viotapeopen = 0x0001, - viotapeclose = 0x0002, - viotaperead = 0x0003, - viotapewrite = 0x0004, - viotapegetinfo = 0x0005, - viotapeop = 0x0006, - viotapegetpos = 0x0007, - viotapesetpos = 0x0008, - viotapegetstatus = 0x0009 -}; - -/* - * Each subtype can register a handler to process their events. - * The handler must have this interface. - */ -typedef void (vio_event_handler_t) (struct HvLpEvent * event); - -extern int viopath_open(HvLpIndex remoteLp, int subtype, int numReq); -extern int viopath_close(HvLpIndex remoteLp, int subtype, int numReq); -extern int vio_setHandler(int subtype, vio_event_handler_t * beh); -extern int vio_clearHandler(int subtype); -extern int viopath_isactive(HvLpIndex lp); -extern HvLpInstanceId viopath_sourceinst(HvLpIndex lp); -extern HvLpInstanceId viopath_targetinst(HvLpIndex lp); -extern void vio_set_hostlp(void); -extern void *vio_get_event_buffer(int subtype); -extern void vio_free_event_buffer(int subtype, void *buffer); - -extern struct vio_dev *vio_create_viodasd(u32 unit); - -extern HvLpIndex viopath_hostLp; -extern HvLpIndex viopath_ourLp; - -#define VIOCHAR_MAX_DATA 200 - -#define VIOMAJOR_SUBTYPE_MASK 0xff00 -#define VIOMINOR_SUBTYPE_MASK 0x00ff -#define VIOMAJOR_SUBTYPE_SHIFT 8 - -#define VIOVERSION 0x0101 - -/* - * This is the general structure for VIO errors; each module should have - * a table of them, and each table should be terminated by an entry of - * { 0, 0, NULL }. Then, to find a specific error message, a module - * should pass its local table and the return code. - */ -struct vio_error_entry { - u16 rc; - int errno; - const char *msg; -}; -extern const struct vio_error_entry *vio_lookup_rc( - const struct vio_error_entry *local_table, u16 rc); - -enum viosubtypes { - viomajorsubtype_monitor = 0x0100, - viomajorsubtype_blockio = 0x0200, - viomajorsubtype_chario = 0x0300, - viomajorsubtype_config = 0x0400, - viomajorsubtype_cdio = 0x0500, - viomajorsubtype_tape = 0x0600, - viomajorsubtype_scsi = 0x0700 -}; - -enum vioconfigsubtype { - vioconfigget = 0x0001, -}; - -enum viorc { - viorc_good = 0x0000, - viorc_noConnection = 0x0001, - viorc_noReceiver = 0x0002, - viorc_noBufferAvailable = 0x0003, - viorc_invalidMessageType = 0x0004, - viorc_invalidRange = 0x0201, - viorc_invalidToken = 0x0202, - viorc_DMAError = 0x0203, - viorc_useError = 0x0204, - viorc_releaseError = 0x0205, - viorc_invalidDisk = 0x0206, - viorc_openRejected = 0x0301 -}; - -/* - * The structure of the events that flow between us and OS/400 for chario - * events. You can't mess with this unless the OS/400 side changes too. - */ -struct viocharlpevent { - struct HvLpEvent event; - u32 reserved; - u16 version; - u16 subtype_result_code; - u8 virtual_device; - u8 len; - u8 data[VIOCHAR_MAX_DATA]; -}; - -#define VIOCHAR_WINDOW 10 - -enum viocharsubtype { - viocharopen = 0x0001, - viocharclose = 0x0002, - viochardata = 0x0003, - viocharack = 0x0004, - viocharconfig = 0x0005 -}; - -enum viochar_rc { - viochar_rc_ebusy = 1 -}; - -#endif /* _ASM_POWERPC_ISERIES_VIO_H */ diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 8b086299ba2..bca3fc427b4 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -34,11 +34,6 @@ #include #include #include -#include -#include -#include -#include -#include static struct bus_type vio_bus_type; @@ -1042,7 +1037,6 @@ static void vio_cmo_sysfs_init(void) vio_bus_type.bus_attrs = vio_cmo_bus_attrs; } #else /* CONFIG_PPC_SMLPAR */ -/* Dummy functions for iSeries platform */ int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } @@ -1060,9 +1054,6 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) struct iommu_table *tbl; unsigned long offset, size; - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return vio_build_iommu_table_iseries(dev); - dma_window = of_get_property(dev->dev.of_node, "ibm,my-dma-window", NULL); if (!dma_window) @@ -1195,8 +1186,7 @@ static void __devinit vio_dev_release(struct device *dev) { struct iommu_table *tbl = get_iommu_table_base(dev); - /* iSeries uses a common table for all vio devices */ - if (!firmware_has_feature(FW_FEATURE_ISERIES) && tbl) + if (tbl) iommu_free_table(tbl, dev->of_node ? dev->of_node->full_name : dev_name(dev)); of_node_put(dev->of_node); @@ -1244,12 +1234,6 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) viodev->name = of_node->name; viodev->type = of_node->type; viodev->unit_address = *unit_address; - if (firmware_has_feature(FW_FEATURE_ISERIES)) { - unit_address = of_get_property(of_node, - "linux,unit_address", NULL); - if (unit_address != NULL) - viodev->unit_address = *unit_address; - } viodev->dev.of_node = of_node_get(of_node); if (firmware_has_feature(FW_FEATURE_CMO)) -- cgit v1.2.3-70-g09d2 From 4f8cf36f48b4648a5231e9fc8e49faea377246f4 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 28 Feb 2012 13:44:58 +1100 Subject: powerpc: Remove legacy iSeries bits from assembly files This removes the various bits of assembly in the kernel entry, exception handling and SLB management code that were specific to running under the legacy iSeries hypervisor which is no longer supported. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 15 ----- arch/powerpc/kernel/entry_64.S | 42 +------------- arch/powerpc/kernel/exceptions-64s.S | 95 +++----------------------------- arch/powerpc/kernel/head_64.S | 44 +++------------ arch/powerpc/kernel/misc.S | 1 - arch/powerpc/kernel/vmlinux.lds.S | 5 -- arch/powerpc/mm/slb_low.S | 16 ------ 7 files changed, 15 insertions(+), 203 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 8057f4f6980..cc2bcf46474 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -272,26 +272,11 @@ label##_hv: \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_TEST_HV) -#ifdef CONFIG_PPC_ISERIES -#define DISABLE_INTS \ - li r11,0; \ - stb r11,PACASOFTIRQEN(r13); \ -BEGIN_FW_FTR_SECTION; \ - stb r11,PACAHARDIRQEN(r13); \ -END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES); \ - TRACE_DISABLE_INTS; \ -BEGIN_FW_FTR_SECTION; \ - mfmsr r10; \ - ori r10,r10,MSR_EE; \ - mtmsrd r10,1; \ -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#else #define DISABLE_INTS \ li r11,0; \ stb r11,PACASOFTIRQEN(r13); \ stb r11,PACAHARDIRQEN(r13); \ TRACE_DISABLE_INTS -#endif /* CONFIG_PPC_ISERIES */ #define ENABLE_INTS \ ld r12,_MSR(r1); \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 866462cbe2d..0c3764ba8d4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -127,17 +127,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) stb r10,PACASOFTIRQEN(r13) stb r10,PACAHARDIRQEN(r13) std r10,SOFTE(r1) -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - /* Hack for handling interrupts when soft-enabling on iSeries */ - cmpdi cr1,r0,0x5555 /* syscall 0x5555 */ - andi. r10,r12,MSR_PR /* from kernel */ - crand 4*cr0+eq,4*cr1+eq,4*cr0+eq - bne 2f - b hardware_interrupt_entry -2: -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ /* Hard enable interrupts */ #ifdef CONFIG_PPC_BOOK3E @@ -591,15 +580,10 @@ _GLOBAL(ret_from_except_lite) ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_USER_WORK_MASK bne do_work -#endif +#endif /* !CONFIG_PREEMPT */ restore: -BEGIN_FW_FTR_SECTION ld r5,SOFTE(r1) -FW_FTR_SECTION_ELSE - b .Liseries_check_pending_irqs -ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) -2: TRACE_AND_RESTORE_IRQ(r5); /* extract EE bit and use it to restore paca->hard_enabled */ @@ -669,30 +653,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) #endif /* CONFIG_PPC_BOOK3E */ -.Liseries_check_pending_irqs: -#ifdef CONFIG_PPC_ISERIES - ld r5,SOFTE(r1) - cmpdi 0,r5,0 - beq 2b - /* Check for pending interrupts (iSeries) */ - ld r3,PACALPPACAPTR(r13) - ld r3,LPPACAANYINT(r3) - cmpdi r3,0 - beq+ 2b /* skip do_IRQ if no interrupts */ - - li r3,0 - stb r3,PACASOFTIRQEN(r13) /* ensure we are soft-disabled */ -#ifdef CONFIG_TRACE_IRQFLAGS - bl .trace_hardirqs_off - mfmsr r10 -#endif - ori r10,r10,MSR_EE - mtmsrd r10 /* hard-enable again */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_IRQ - b .ret_from_except_lite /* loop back and handle more */ -#endif - do_work: #ifdef CONFIG_PREEMPT andi. r0,r3,MSR_PR /* Returning to user mode? */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 15c5a4f6de0..fea8a69df4b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -19,7 +19,7 @@ * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs + * 0x3000 - 0x5fff : interrupt support common interrupt prologs * 0x6000 - 0x6fff : Initial (CPU0) segment table * 0x7000 - 0x7fff : FWNMI data area * 0x8000 - : Early init and support code @@ -458,6 +458,7 @@ machine_check_common: bl .machine_check_exception b .ret_from_except + STD_EXCEPTION_COMMON_LITE(0x500, hardware_interrupt, do_IRQ) STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) @@ -672,12 +673,6 @@ _GLOBAL(slb_miss_realmode) ld r10,PACA_EXSLB+EX_LR(r13) ld r3,PACA_EXSLB+EX_R3(r13) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - ld r11,PACALPPACAPTR(r13) - ld r11,LPPACASRR0(r11) /* get SRR0 value */ -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ mtlr r10 @@ -690,12 +685,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ .machine pop -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ ld r9,PACA_EXSLB+EX_R9(r13) ld r10,PACA_EXSLB+EX_R10(r13) ld r11,PACA_EXSLB+EX_R11(r13) @@ -704,13 +693,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) rfid b . /* prevent speculative execution */ -2: -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - b unrecov_slb -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ - mfspr r11,SPRN_SRR0 +2: mfspr r11,SPRN_SRR0 ld r10,PACAKBASE(r13) LOAD_HANDLER(r10,unrecov_slb) mtspr SPRN_SRR0,r10 @@ -727,20 +710,6 @@ unrecov_slb: bl .unrecoverable_exception b 1b - .align 7 - .globl hardware_interrupt_common - .globl hardware_interrupt_entry -hardware_interrupt_common: - EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) - FINISH_NAP -hardware_interrupt_entry: - DISABLE_INTS -BEGIN_FTR_SECTION - bl .ppc64_runlatch_on -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_IRQ - b .ret_from_except_lite #ifdef CONFIG_PPC_970_NAP power4_fixup_nap: @@ -913,11 +882,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ - /* - * On iSeries, we soft-disable interrupts here, then - * hard-enable interrupts so that the hash_page code can spin on - * the hash_table_lock without problems on a shared processor. - */ + /* We run with interrupts both soft and hard disabled */ DISABLE_INTS /* @@ -956,25 +921,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) bl .hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ -BEGIN_FW_FTR_SECTION - /* - * If we had interrupts soft-enabled at the point where the - * DSI/ISI occurred, and an interrupt came in during hash_page, - * handle it now. - * We jump to ret_from_except_lite rather than fast_exception_return - * because ret_from_except_lite will check for and handle pending - * interrupts if necessary. - */ - beq 13f -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) - -BEGIN_FW_FTR_SECTION /* * Here we have interrupts hard-disabled, so it is sufficient * to restore paca->{soft,hard}_enable and get out. */ beq fast_exc_return_irq /* Return from exception on success */ -END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) /* For a hash failure, we don't bother re-enabling interrupts */ ble- 12f @@ -1141,51 +1092,19 @@ _GLOBAL(do_stab_bolted) .= 0x7000 .globl fwnmi_data_area fwnmi_data_area: -#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ - - /* iSeries does not use the FWNMI stuff, so it is safe to put - * this here, even if we later allow kernels that will boot on - * both pSeries and iSeries */ -#ifdef CONFIG_PPC_ISERIES - . = LPARMAP_PHYS - .globl xLparMap -xLparMap: - .quad HvEsidsToMap /* xNumberEsids */ - .quad HvRangesToMap /* xNumberRanges */ - .quad STAB0_PAGE /* xSegmentTableOffs */ - .zero 40 /* xRsvd */ - /* xEsids (HvEsidsToMap entries of 2 quads) */ - .quad PAGE_OFFSET_ESID /* xKernelEsid */ - .quad PAGE_OFFSET_VSID /* xKernelVsid */ - .quad VMALLOC_START_ESID /* xKernelEsid */ - .quad VMALLOC_START_VSID /* xKernelVsid */ - /* xRanges (HvRangesToMap entries of 3 quads) */ - .quad HvPagesToMap /* xPages */ - .quad 0 /* xOffset */ - .quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */ - -#endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* pseries and powernv need to keep the whole page from * 0x7000 to 0x8000 free for use by the firmware */ . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ -/* - * Space for CPU0's segment table. - * - * On iSeries, the hypervisor must fill in at least one entry before - * we get control (with relocate on). The address is given to the hv - * as a page number (see xLparMap above), so this must be at a - * fixed address (the linker can't compute (u64)&initial_stab >> - * PAGE_SHIFT). - */ - . = STAB0_OFFSET /* 0x8000 */ +/* Space for CPU0's segment table */ + .balign 4096 .globl initial_stab initial_stab: .space 4096 + #ifdef CONFIG_PPC_POWERNV _GLOBAL(opal_mc_secondary_handler) HMT_MEDIUM diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 06c7251c1bf..40759fbfb17 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -57,10 +56,6 @@ * entry in r9 for debugging purposes * 2. Secondary processors enter at 0x60 with PIR in gpr3 * - * For iSeries: - * 1. The MMU is on (as it always is for iSeries) - * 2. The kernel is entered at system_reset_iSeries - * * For Book3E processors: * 1. The MMU is on running in AS0 in a state defined in ePAPR * 2. The kernel is entered at __start @@ -93,15 +88,6 @@ __secondary_hold_spinloop: __secondary_hold_acknowledge: .llong 0x0 -#ifdef CONFIG_PPC_ISERIES - /* - * At offset 0x20, there is a pointer to iSeries LPAR data. - * This is required by the hypervisor - */ - . = 0x20 - .llong hvReleaseData-KERNELBASE -#endif /* CONFIG_PPC_ISERIES */ - #ifdef CONFIG_RELOCATABLE /* This flag is set to 1 by a loader if the kernel should run * at the loaded address instead of the linked address. This @@ -582,7 +568,7 @@ _GLOBAL(pmac_secondary_start) * 1. Processor number * 2. Segment table pointer (virtual address) * On entry the following are set: - * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries + * r1 = stack pointer (real addr of temp stack) * r24 = cpu# (in Linux terms) * r13 = paca virtual address * SPRG_PACA = paca virtual address @@ -595,7 +581,7 @@ __secondary_start: /* Set thread priority to MEDIUM */ HMT_MEDIUM - /* Initialize the kernel stack. Just a repeat for iSeries. */ + /* Initialize the kernel stack */ LOAD_REG_ADDR(r3, current_set) sldi r28,r24,3 /* get current_set[cpu#] */ ldx r14,r3,r28 @@ -615,20 +601,13 @@ __secondary_start: li r7,0 mtlr r7 + /* Mark interrupts both hard and soft disabled */ + stb r7,PACAHARDIRQEN(r13) + stb r7,PACASOFTIRQEN(r13) + /* enable MMU and jump to start_secondary */ LOAD_REG_ADDR(r3, .start_secondary_prolog) LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - ori r4,r4,MSR_EE - li r8,1 - stb r8,PACAHARDIRQEN(r13) -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif -BEGIN_FW_FTR_SECTION - stb r7,PACAHARDIRQEN(r13) -END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) - stb r7,PACASOFTIRQEN(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 @@ -774,17 +753,8 @@ _INIT_GLOBAL(start_here_common) bl .setup_system /* Load up the kernel context */ -5: - li r5,0 +5: li r5,0 stb r5,PACASOFTIRQEN(r13) /* Soft Disabled */ -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - mfmsr r5 - ori r5,r5,MSR_EE /* Hard Enabled on iSeries*/ - mtmsrd r5 - li r5,1 -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif stb r5,PACAHARDIRQEN(r13) /* Hard Disabled on others */ bl .start_kernel diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index b69463ec201..ba16874fe29 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -5,7 +5,6 @@ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) * and Paul Mackerras. * - * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) * * setjmp/longjmp code by Paul Mackerras. diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 710a54005df..65d1c08cf09 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -109,11 +109,6 @@ SECTIONS __ptov_table_begin = .; *(.ptov_fixup); __ptov_table_end = .; -#ifdef CONFIG_PPC_ISERIES - __dt_strings_start = .; - *(.dt_strings); - __dt_strings_end = .; -#endif } .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index ef653dc95b6..b9ee79ce220 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -217,21 +217,6 @@ slb_finish_load: * free slot first but that took too long. Unfortunately we * dont have any LRU information to help us choose a slot. */ -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - /* - * On iSeries, the "bolted" stack segment can be cast out on - * shared processor switch so we need to check for a miss on - * it and restore it to the right slot. - */ - ld r9,PACAKSAVE(r13) - clrrdi r9,r9,28 - clrrdi r3,r3,28 - li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ - cmpld r9,r3 - beq 3f -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ 7: ld r10,PACASTABRR(r13) addi r10,r10,1 @@ -282,7 +267,6 @@ _GLOBAL(slb_compare_rr_to_size) /* * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. - * We assume legacy iSeries will never have 1T segments. * * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9 */ -- cgit v1.2.3-70-g09d2 From 7450f6f03e9d6dc95d2014c4cceac8adf98560e8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 1 Mar 2012 10:52:01 +1100 Subject: powerpc: Use the same interrupt prolog for perfmon as other interrupts The perfmon interrupt is the sole user of a special variant of the interrupt prolog which differs from the one used by external and timer interrupts in that it saves the non-volatile GPRs and doesn't turn the runlatch on. The former is unnecessary and the later is arguably incorrect, so let's clean that up by using the same prolog. While at it we rename that prolog to use the _ASYNC prefix. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 17 +++-------------- arch/powerpc/kernel/exceptions-64s.S | 6 +++--- 2 files changed, 6 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index cc2bcf46474..041b2227967 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -297,21 +297,10 @@ label##_common: \ /* * Like STD_EXCEPTION_COMMON, but for exceptions that can occur - * in the idle task and therefore need the special idle handling. + * in the idle task and therefore need the special idle handling + * (finish nap and runlatch) */ -#define STD_EXCEPTION_COMMON_IDLE(trap, label, hdlr) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - FINISH_NAP; \ - DISABLE_INTS; \ - bl .save_nvgprs; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b .ret_from_except - -#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ +#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ .align 7; \ .globl label##_common; \ label##_common: \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fea8a69df4b..2240d4ecec0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -458,15 +458,15 @@ machine_check_common: bl .machine_check_exception b .ret_from_except - STD_EXCEPTION_COMMON_LITE(0x500, hardware_interrupt, do_IRQ) - STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) + STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) + STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) - STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) + STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) #ifdef CONFIG_ALTIVEC STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) -- cgit v1.2.3-70-g09d2 From fe1952fc0afb9a2e4c79f103c08aef5d13db1873 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 1 Mar 2012 12:45:27 +1100 Subject: powerpc: Rework runlatch code This moves the inlines into system.h and changes the runlatch code to use the thread local flags (non-atomic) rather than the TIF flags (atomic) to keep track of the latch state. The code to turn it back on in an asynchronous interrupt is now simplified and partially inlined. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 50 +++++++++++++++++--------------- arch/powerpc/include/asm/reg.h | 22 ++------------ arch/powerpc/include/asm/system.h | 38 ++++++++++++++++++++++++ arch/powerpc/include/asm/thread_info.h | 9 +++++- arch/powerpc/kernel/exceptions-64s.S | 3 ++ arch/powerpc/kernel/process.c | 24 +++++++-------- 6 files changed, 89 insertions(+), 57 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 041b2227967..bdc9eebd1d4 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -284,35 +284,39 @@ label##_hv: \ rlwimi r11,r12,0,MSR_EE; \ mtmsrd r11,1 -#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - DISABLE_INTS; \ - bl .save_nvgprs; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b .ret_from_except +#define ADD_NVGPRS \ + bl .save_nvgprs + +#define RUNLATCH_ON \ +BEGIN_FTR_SECTION \ + clrrdi r3,r1,THREAD_SHIFT; \ + ld r4,TI_LOCAL_FLAGS(r3); \ + andi. r0,r4,_TLF_RUNLATCH; \ + beql ppc64_runlatch_on_trampoline; \ +END_FTR_SECTION_IFSET(CPU_FTR_CTRL) + +#define EXCEPTION_COMMON(trap, label, hdlr, ret, additions) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + additions; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b ret + +#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ + EXCEPTION_COMMON(trap, label, hdlr, ret_from_except, \ + ADD_NVGPRS;DISABLE_INTS) /* * Like STD_EXCEPTION_COMMON, but for exceptions that can occur * in the idle task and therefore need the special idle handling * (finish nap and runlatch) */ -#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - FINISH_NAP; \ - DISABLE_INTS; \ -BEGIN_FTR_SECTION \ - bl .ppc64_runlatch_on; \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b .ret_from_except_lite +#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ + EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \ + FINISH_NAP;RUNLATCH_ON;DISABLE_INTS) /* * When the idle code in power4_idle puts the CPU into NAP mode, diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7fdc2c0b7fa..b1a215eabef 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1079,30 +1079,12 @@ #define proc_trap() asm volatile("trap") -#ifdef CONFIG_PPC64 - -extern void ppc64_runlatch_on(void); -extern void __ppc64_runlatch_off(void); - -#define ppc64_runlatch_off() \ - do { \ - if (cpu_has_feature(CPU_FTR_CTRL) && \ - test_thread_flag(TIF_RUNLATCH)) \ - __ppc64_runlatch_off(); \ - } while (0) +#define __get_SP() ({unsigned long sp; \ + asm volatile("mr %0,1": "=r" (sp)); sp;}) extern unsigned long scom970_read(unsigned int address); extern void scom970_write(unsigned int address, unsigned long value); -#else -#define ppc64_runlatch_on() -#define ppc64_runlatch_off() - -#endif /* CONFIG_PPC64 */ - -#define __get_SP() ({unsigned long sp; \ - asm volatile("mr %0,1": "=r" (sp)); sp;}) - struct pt_regs; extern void ppc_save_regs(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index c377457d1b8..a02883d5af4 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -550,5 +550,43 @@ extern void reloc_got2(unsigned long); extern struct dentry *powerpc_debugfs_root; +#ifdef CONFIG_PPC64 + +extern void __ppc64_runlatch_on(void); +extern void __ppc64_runlatch_off(void); + +/* + * We manually hard enable-disable, this is called + * in the idle loop and we don't want to mess up + * with soft-disable/enable & interrupt replay. + */ +#define ppc64_runlatch_off() \ + do { \ + if (cpu_has_feature(CPU_FTR_CTRL) && \ + test_thread_local_flags(_TLF_RUNLATCH)) { \ + unsigned long msr = mfmsr(); \ + __hard_irq_disable(); \ + __ppc64_runlatch_off(); \ + if (msr & MSR_EE) \ + __hard_irq_enable(); \ + } \ + } while (0) + +#define ppc64_runlatch_on() \ + do { \ + if (cpu_has_feature(CPU_FTR_CTRL) && \ + !test_thread_local_flags(_TLF_RUNLATCH)) { \ + unsigned long msr = mfmsr(); \ + __hard_irq_disable(); \ + __ppc64_runlatch_on(); \ + if (msr & MSR_EE) \ + __hard_irq_enable(); \ + } \ + } while (0) +#else +#define ppc64_runlatch_on() +#define ppc64_runlatch_off() +#endif /* CONFIG_PPC64 */ + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 96471494096..4a741c7efd0 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -110,7 +110,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOERROR 12 /* Force successful syscall return */ #define TIF_NOTIFY_RESUME 13 /* callback before returning to user */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ -#define TIF_RUNLATCH 16 /* Is the runlatch enabled? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<flags); } +static inline bool test_thread_local_flags(unsigned int flags) +{ + struct thread_info *ti = current_thread_info(); + return (ti->local_flags & flags) != 0; +} + #ifdef CONFIG_PPC64 #define is_32bit_task() (test_thread_flag(TIF_32BIT)) #else diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 2240d4ecec0..3af80e82830 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -483,6 +483,9 @@ machine_check_common: system_call_entry: b system_call_common +ppc64_runlatch_on_trampoline: + b .__ppc64_runlatch_on + /* * Here we have detected that the kernel stack pointer is bad. * R9 contains the saved CR, r13 points to the paca, diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d817ab01848..bf80a1d5f8f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1220,34 +1220,32 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); #ifdef CONFIG_PPC64 -void ppc64_runlatch_on(void) +/* Called with hard IRQs off */ +void __ppc64_runlatch_on(void) { + struct thread_info *ti = current_thread_info(); unsigned long ctrl; - if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) { - HMT_medium(); - - ctrl = mfspr(SPRN_CTRLF); - ctrl |= CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); + ctrl = mfspr(SPRN_CTRLF); + ctrl |= CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); - set_thread_flag(TIF_RUNLATCH); - } + ti->local_flags |= TLF_RUNLATCH; } +/* Called with hard IRQs off */ void __ppc64_runlatch_off(void) { + struct thread_info *ti = current_thread_info(); unsigned long ctrl; - HMT_medium(); - - clear_thread_flag(TIF_RUNLATCH); + ti->local_flags &= ~TLF_RUNLATCH; ctrl = mfspr(SPRN_CTRLF); ctrl &= ~CTRL_RUNLATCH; mtspr(SPRN_CTRLT, ctrl); } -#endif +#endif /* CONFIG_PPC64 */ #if THREAD_SHIFT < PAGE_SHIFT -- cgit v1.2.3-70-g09d2 From 1b70117924a4f254840ed70fbe3020d4519a1a9a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 1 Mar 2012 15:42:56 +1100 Subject: powerpc: Improve behaviour of irq tracing on 64-bit exception entry Some exceptions would unconditionally disable interrupts on entry, which is fine, but calling lockdep every time not only adds more overhead than strictly needed, but also means we get quite a few "redudant" disable logged, which makes it hard to spot the really bad ones. So instead, split the macro used by the exception code into a normal one and a separate one used when CONFIG_TRACE_IRQFLAGS is enabled, and make the later skip th tracing if interrupts were already disabled. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index bdc9eebd1d4..7f4718c4f04 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -272,15 +272,34 @@ label##_hv: \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_TEST_HV) +/* + * Our exception common code can be passed various "additions" + * to specify the behaviour of interrupts, whether to kick the + * runlatch, etc... + */ + +/* Exception addition: Hard disable interrupts */ +#ifdef CONFIG_TRACE_IRQFLAGS #define DISABLE_INTS \ + lbz r10,PACASOFTIRQEN(r13); \ li r11,0; \ - stb r11,PACASOFTIRQEN(r13); \ + cmpwi cr0,r10,0; \ stb r11,PACAHARDIRQEN(r13); \ - TRACE_DISABLE_INTS + beq 44f; \ + stb r11,PACASOFTIRQEN(r13); \ + TRACE_DISABLE_INTS; \ +44: +#else +#define DISABLE_INTS \ + li r11,0; \ + stb r11,PACASOFTIRQEN(r13); \ + stb r11,PACAHARDIRQEN(r13) +#endif /* CONFIG_TRACE_IRQFLAGS */ +/* Exception addition: Keep interrupt state */ #define ENABLE_INTS \ - ld r12,_MSR(r1); \ mfmsr r11; \ + ld r12,_MSR(r1); \ rlwimi r11,r12,0,MSR_EE; \ mtmsrd r11,1 -- cgit v1.2.3-70-g09d2 From a546498f3bf9aac311c66f965186373aee2ca0b0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 7 Mar 2012 16:48:45 +1100 Subject: powerpc: Call do_page_fault() with interrupts off We currently turn interrupts back to their previous state before calling do_page_fault(). This can be annoying when debugging as a bad fault will potentially have lost some processor state before getting into the debugger. We also end up calling some generic code with interrupts enabled such as notify_page_fault() with interrupts enabled, which could be unexpected. This changes our code to behave more like other architectures, and make the assembly entry code call into do_page_faults() with interrupts disabled. They are conditionally re-enabled from within do_page_fault() in the same spot x86 does it. While there, add the might_sleep() test in the case of a successful trylock of the mmap semaphore, again like x86. Also fix a bug in the existing assembly where r12 (_MSR) could get clobbered by C calls (the DTL accounting in the exception common macro and DISABLE_INTS) in some cases. Signed-off-by: Benjamin Herrenschmidt --- v2. Add the r12 clobber fix --- arch/powerpc/include/asm/hw_irq.h | 10 ++++++ arch/powerpc/kernel/exceptions-64e.S | 5 ++- arch/powerpc/kernel/exceptions-64s.S | 59 +++++++++++------------------------- arch/powerpc/kernel/head_32.S | 4 +-- arch/powerpc/kernel/head_40x.S | 4 +-- arch/powerpc/kernel/head_8xx.S | 4 +-- arch/powerpc/kernel/head_booke.h | 4 +-- arch/powerpc/kernel/head_fsl_booke.S | 2 +- arch/powerpc/mm/fault.c | 11 +++++++ 9 files changed, 49 insertions(+), 54 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index bb712c9488b..531ba00fcba 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -79,6 +79,11 @@ static inline bool arch_irqs_disabled(void) get_paca()->hard_enabled = 0; \ } while(0) +static inline bool arch_irq_disabled_regs(struct pt_regs *regs) +{ + return !regs->softe; +} + #else /* CONFIG_PPC64 */ #define SET_MSR_EE(x) mtmsr(x) @@ -139,6 +144,11 @@ static inline bool arch_irqs_disabled(void) #define hard_irq_disable() arch_local_irq_disable() +static inline bool arch_irq_disabled_regs(struct pt_regs *regs) +{ + return !(regs->msr & MSR_EE); +} + #endif /* CONFIG_PPC64 */ #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 429983c06f9..573613d747a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -313,7 +313,7 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR - EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE_ALL) b storage_fault_common /* Instruction Storage Interrupt */ @@ -321,7 +321,7 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 - EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_KEEP) + EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE_ALL) b storage_fault_common /* External Input Interrupt */ @@ -591,7 +591,6 @@ storage_fault_common: mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) - INTS_RESTORE_HARD bl .do_page_fault cmpdi r3,0 bne- 1f diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3af80e82830..d8ff6d37fc4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -559,6 +559,8 @@ data_access_common: mfspr r10,SPRN_DSISR stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + DISABLE_INTS + ld r12,_MSR(r1) ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) li r5,0x300 @@ -573,6 +575,7 @@ h_data_storage_common: stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) bl .save_nvgprs + DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD bl .unknown_exception b .ret_from_except @@ -581,6 +584,8 @@ h_data_storage_common: .globl instruction_access_common instruction_access_common: EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + DISABLE_INTS + ld r12,_MSR(r1) ld r3,_NIP(r1) andis. r4,r12,0x5820 li r5,0x400 @@ -884,24 +889,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ - - /* We run with interrupts both soft and hard disabled */ - DISABLE_INTS - - /* - * Currently, trace_hardirqs_off() will be called by DISABLE_INTS - * and will clobber volatile registers when irq tracing is enabled - * so we need to reload them. It may be possible to be smarter here - * and move the irq tracing elsewhere but let's keep it simple for - * now - */ -#ifdef CONFIG_TRACE_IRQFLAGS - ld r3,_DAR(r1) - ld r4,_DSISR(r1) - ld r5,_TRAP(r1) - ld r12,_MSR(r1) - clrrdi r5,r5,4 -#endif /* CONFIG_TRACE_IRQFLAGS */ /* * We need to set the _PAGE_USER bit if MSR_PR is set or if we are * accessing a userspace segment (even from the kernel). We assume @@ -931,36 +918,16 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) beq fast_exc_return_irq /* Return from exception on success */ /* For a hash failure, we don't bother re-enabling interrupts */ - ble- 12f - - /* - * hash_page couldn't handle it, set soft interrupt enable back - * to what it was before the trap. Note that .arch_local_irq_restore - * handles any interrupts pending at this point. - */ - ld r3,SOFTE(r1) - TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f) - bl .arch_local_irq_restore - b 11f - -/* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - bl .save_nvgprs - ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_dabr - b .ret_from_except_lite + ble- 13f /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: - ENABLE_INTS 11: ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl .do_page_fault cmpdi r3,0 - beq+ 13f + beq+ 12f bl .save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD @@ -968,12 +935,20 @@ handle_page_fault: bl .bad_page_fault b .ret_from_except -13: b .ret_from_except_lite +/* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + bl .save_nvgprs + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_dabr +12: b .ret_from_except_lite + /* We have a page fault that hash_page could handle but HV refused * the PTE insertion */ -12: bl .save_nvgprs +13: bl .save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 0654dba2c1f..dc0488b6f6e 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -395,7 +395,7 @@ DataAccess: bl hash_page 1: lwz r5,_DSISR(r11) /* get DSISR value */ mfspr r4,SPRN_DAR - EXC_XFER_EE_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. */ @@ -410,7 +410,7 @@ InstructionAccess: bl hash_page 1: mr r4,r12 mr r5,r9 - EXC_XFER_EE_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 872a6af83ba..4989661b710 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -394,7 +394,7 @@ label: NORMAL_EXCEPTION_PROLOG mr r4,r12 /* Pass SRR0 as arg2 */ li r5,0 /* Pass zero as arg3 */ - EXC_XFER_EE_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, handle_page_fault) /* 0x0500 - External Interrupt Exception */ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -747,7 +747,7 @@ DataAccess: mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ - EXC_XFER_EE_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, handle_page_fault) /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b68cb173ba2..b2a5860accf 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -220,7 +220,7 @@ DataAccess: mfspr r4,SPRN_DAR li r10,0x00f0 mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ - EXC_XFER_EE_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. * This is "never generated" by the MPC8xx. We jump to it for other @@ -231,7 +231,7 @@ InstructionAccess: EXCEPTION_PROLOG mr r4,r12 mr r5,r9 - EXC_XFER_EE_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fc921bf62e1..0e4175388f4 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -359,7 +359,7 @@ label: mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ - EXC_XFER_EE_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, handle_page_fault) #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ @@ -368,7 +368,7 @@ label: stw r5,_ESR(r11); \ mr r4,r12; /* Pass SRR0 as arg2 */ \ li r5,0; /* Pass zero as arg3 */ \ - EXC_XFER_EE_LITE(0x0400, handle_page_fault) + EXC_XFER_LITE(0x0400, handle_page_fault) #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index d5d78c4ceef..28e62598d0e 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -319,7 +319,7 @@ interrupt_base: mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - EXC_XFER_EE_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x0300, CacheLockingException) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2f0d1b032a8..7e890065cf3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -179,6 +179,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, } #endif + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + if (in_atomic() || mm == NULL) { if (!user_mode(regs)) return SIGSEGV; @@ -213,6 +217,13 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, goto bad_area_nosemaphore; down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in + * which case we'll have missed the might_sleep() from + * down_read(): + */ + might_sleep(); } vma = find_vma(mm, address); -- cgit v1.2.3-70-g09d2 From 990118c84b3e90b2b5354b6e2acd961044d7fa12 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 2 Mar 2012 11:01:31 +1100 Subject: powerpc: Fix register clobbering when accumulating stolen time When running under a hypervisor that supports stolen time accounting, we may call C code from the macro EXCEPTION_PROLOG_COMMON in the exception entry path, which clobbers CR0. However, the FPU and vector traps rely on CR0 indicating whether we are coming from userspace or kernel to decide what to do. So we need to restore that value after the C call Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ppc_asm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 368f72f7980..50f73aa2ba2 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -60,6 +60,8 @@ BEGIN_FW_FTR_SECTION; \ cmpd cr1,r11,r10; \ beq+ cr1,33f; \ bl .accumulate_stolen_time; \ + ld r12,_MSR(r1); \ + andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \ 33: \ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) -- cgit v1.2.3-70-g09d2 From d9ada91ae2969ae6b6dc3574fd08a6ebda5df766 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 2 Mar 2012 11:33:52 +1100 Subject: powerpc: Replace mfmsr instructions with load from PACA kernel_msr field On 64-bit, the mfmsr instruction can be quite slow, slower than loading a field from the cache-hot PACA, which happens to already contain the value we want in most cases. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 2 +- arch/powerpc/include/asm/hw_irq.h | 4 ++-- arch/powerpc/kernel/entry_64.S | 14 +++++--------- arch/powerpc/kernel/exceptions-64s.S | 5 ++--- 4 files changed, 10 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 7f4718c4f04..70354af0740 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -298,7 +298,7 @@ label##_hv: \ /* Exception addition: Keep interrupt state */ #define ENABLE_INTS \ - mfmsr r11; \ + ld r11,PACAKMSR(r13); \ ld r12,_MSR(r1); \ rlwimi r11,r12,0,MSR_EE; \ mtmsrd r11,1 diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 531ba00fcba..6c6fa955baa 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -68,8 +68,8 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory"); #define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory"); #else -#define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) -#define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) +#define __hard_irq_enable() __mtmsrd(local_paca->kernel_msr | MSR_EE, 1) +#define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) #endif #define hard_irq_disable() \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index cc030b73174..c513beb78b3 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -557,10 +557,8 @@ _GLOBAL(ret_from_except_lite) #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - mfmsr r10 /* Get current interrupt state */ - rldicl r9,r10,48,1 /* clear MSR_EE */ - rotldi r9,r9,16 - mtmsrd r9,1 /* Update machine state */ + ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ + mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PREEMPT @@ -625,8 +623,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) * userspace and we take an exception after restoring r13, * we end up corrupting the userspace r13 value. */ - mfmsr r4 - andc r4,r4,r0 /* r0 contains MSR_RI here */ + ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */ + andc r4,r4,r0 /* r0 contains MSR_RI here */ mtmsrd r4,1 /* @@ -686,9 +684,7 @@ do_work: #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - mfmsr r10 - rldicl r10,r10,48,1 - rotldi r10,r10,16 + ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ li r0,0 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0fb42ae2169..02448ea58ad 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -848,9 +848,8 @@ fast_exception_return: REST_GPR(0, r1) REST_8GPRS(2, r1) - mfmsr r10 - rldicl r10,r10,48,1 /* clear EE */ - rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */ + ld r10,PACAKMSR(r13) + clrrdi r10,r10,2 /* clear RI */ mtmsrd r10,1 mtspr SPRN_SRR1,r12 -- cgit v1.2.3-70-g09d2 From cb3bc9d0de1e247268622acd40e0d2f8120f299c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:03:51 +0000 Subject: powerpc/eeh: Cleanup comments in the EEH core The EEH has been implemented on pSeries platform. The original code looks a little bit nasty. The patch does cleanup on the current EEH implementation so that it looks more clean. * Duplicated comments have been removed from the corresponding header files. * Comments have been reorganized so that it looks more clean. * The leading comments of functions are adjusted for a little bit so that the result of "make pdfdocs" would be more unified. * Function definitions and calls have unified format as "xxx()". That means the format "xxx ()" has been replaced by "xxx()". * There're multiple functions implemented for resetting PE. The position of those functions have been move around so that they are adjacent to each other to reflect their relationship. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 26 +-- arch/powerpc/include/asm/ppc-pci.h | 71 +------ arch/powerpc/platforms/pseries/eeh.c | 400 +++++++++++++++++++++++------------ 3 files changed, 276 insertions(+), 221 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 66ea9b8b95c..232887721ff 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -1,6 +1,6 @@ /* - * eeh.h * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation. + * Copyright 2001-2012 IBM Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,8 +40,10 @@ extern int eeh_subsystem_enabled; #define EEH_MODE_RECOVERING (1<<3) #define EEH_MODE_IRQ_DISABLED (1<<4) -/* Max number of EEH freezes allowed before we consider the device - * to be permanently disabled. */ +/* + * Max number of EEH freezes allowed before we consider the device + * to be permanently disabled. + */ #define EEH_MAX_ALLOWED_FREEZES 5 void __init eeh_init(void); @@ -49,26 +51,8 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev); void __init pci_addr_cache_build(void); - -/** - * eeh_add_device_early - * eeh_add_device_late - * - * Perform eeh initialization for devices added after boot. - * Call eeh_add_device_early before doing any i/o to the - * device (including config space i/o). Call eeh_add_device_late - * to finish the eeh setup for this device. - */ void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_tree_late(struct pci_bus *); - -/** - * eeh_remove_device_recursive - undo EEH for device & children. - * @dev: pci device to be removed - * - * As above, this removes the device; it also removes child - * pci devices as well. - */ void eeh_remove_bus_device(struct pci_dev *); /** diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 6d422979eba..221d82fd823 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -47,92 +47,27 @@ extern int rtas_setup_phb(struct pci_controller *phb); extern unsigned long pci_probe_only; -/* ---- EEH internal-use-only related routines ---- */ #ifdef CONFIG_EEH void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); void pci_addr_cache_build(void); struct pci_dev *pci_get_device_by_addr(unsigned long addr); - -/** - * eeh_slot_error_detail -- record and EEH error condition to the log - * @pdn: pci device node - * @severity: EEH_LOG_TEMP_FAILURE or EEH_LOG_PERM_FAILURE - * - * Obtains the EEH error details from the RTAS subsystem, - * and then logs these details with the RTAS error log system. - */ #define EEH_LOG_TEMP_FAILURE 1 #define EEH_LOG_PERM_FAILURE 2 void eeh_slot_error_detail (struct pci_dn *pdn, int severity); - -/** - * rtas_pci_enable - enable IO transfers for this slot - * @pdn: pci device node - * @function: either EEH_THAW_MMIO or EEH_THAW_DMA - * - * Enable I/O transfers to this slot - */ #define EEH_THAW_MMIO 2 #define EEH_THAW_DMA 3 int rtas_pci_enable(struct pci_dn *pdn, int function); - -/** - * rtas_set_slot_reset -- unfreeze a frozen slot - * @pdn: pci device node - * - * Clear the EEH-frozen condition on a slot. This routine - * does this by asserting the PCI #RST line for 1/8th of - * a second; this routine will sleep while the adapter is - * being reset. - * - * Returns a non-zero value if the reset failed. - */ int rtas_set_slot_reset (struct pci_dn *); int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs); - -/** - * eeh_restore_bars - Restore device configuration info. - * @pdn: pci device node - * - * A reset of a PCI device will clear out its config space. - * This routines will restore the config space for this - * device, and is children, to values previously obtained - * from the firmware. - */ void eeh_restore_bars(struct pci_dn *); - -/** - * rtas_configure_bridge -- firmware initialization of pci bridge - * @pdn: pci device node - * - * Ask the firmware to configure all PCI bridges devices - * located behind the indicated node. Required after a - * pci device reset. Does essentially the same hing as - * eeh_restore_bars, but for brdges, and lets firmware - * do the work. - */ void rtas_configure_bridge(struct pci_dn *); - int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); - -/** - * eeh_mark_slot -- set mode flags for pertition endpoint - * @pdn: pci device node - * - * mark and clear slots: find "partition endpoint" PE and set or - * clear the flags for each subnode of the PE. - */ -void eeh_mark_slot (struct device_node *dn, int mode_flag); -void eeh_clear_slot (struct device_node *dn, int mode_flag); - -/** - * find_device_pe -- Find the associated "Partiationable Endpoint" PE - * @pdn: pci device node - */ -struct device_node * find_device_pe(struct device_node *dn); +void eeh_mark_slot(struct device_node *dn, int mode_flag); +void eeh_clear_slot(struct device_node *dn, int mode_flag); +struct device_node *find_device_pe(struct device_node *dn); void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index c0b40af4ce4..5f6d37bdd4f 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -1,8 +1,8 @@ /* - * eeh.c * Copyright IBM Corporation 2001, 2005, 2006 * Copyright Dave Engebretsen & Todd Inglett 2001 * Copyright Linas Vepstas 2005, 2006 + * Copyright 2001-2012 IBM Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ */ #include -#include /* for init_mm */ +#include #include #include #include @@ -129,9 +129,16 @@ static unsigned long slot_resets; #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) -/* --------------------------------------------------------------- */ -/* Below lies the EEH event infrastructure */ - +/** + * rtas_slot_error_detail - Retrieve error log through RTAS call + * @pdn: device node + * @severity: temporary or permanent error log + * @driver_log: driver log to be combined with the retrieved error log + * @loglen: length of driver log + * + * This routine should be called to retrieve error log through the dedicated + * RTAS call. + */ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, char *driver_log, size_t loglen) { @@ -163,7 +170,7 @@ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, } /** - * gather_pci_data - copy assorted PCI config space registers to buff + * gather_pci_data - Copy assorted PCI config space registers to buff * @pdn: device to report data for * @buf: point to buffer in which to log * @len: amount of room in buffer @@ -258,6 +265,16 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) return n; } +/** + * eeh_slot_error_detail - Generate combined log including driver log and error log + * @pdn: device node + * @severity: temporary or permanent error log + * + * This routine should be called to generate the combined log, which + * is comprised of driver log and error log. The driver log is figured + * out from the config space of the corresponding PCI device, while + * the error log is fetched through platform dependent function call. + */ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) { size_t loglen = 0; @@ -275,6 +292,9 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) * read_slot_reset_state - Read the reset state of a device node's slot * @dn: device node to read * @rets: array to return results in + * + * Read the reset state of a device node's slot through platform dependent + * function call. */ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) { @@ -300,9 +320,9 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) } /** - * eeh_wait_for_slot_status - returns error status of slot - * @pdn pci device node - * @max_wait_msecs maximum number to millisecs to wait + * eeh_wait_for_slot_status - Returns error status of slot + * @pdn: pci device node + * @max_wait_msecs: maximum number to millisecs to wait * * Return negative value if a permanent error, else return * Partition Endpoint (PE) status value. @@ -332,16 +352,16 @@ eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) mwait = rets[2]; if (mwait <= 0) { - printk (KERN_WARNING - "EEH: Firmware returned bad wait value=%d\n", mwait); + printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n", + mwait); mwait = 1000; } else if (mwait > 300*1000) { - printk (KERN_WARNING - "EEH: Firmware is taking too long, time=%d\n", mwait); + printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n", + mwait); mwait = 300*1000; } max_wait_msecs -= mwait; - msleep (mwait); + msleep(mwait); } printk(KERN_WARNING "EEH: Timed out waiting for slot status\n"); @@ -349,8 +369,11 @@ eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) } /** - * eeh_token_to_phys - convert EEH address token to phys address - * @token i/o token, should be address in the form 0xA.... + * eeh_token_to_phys - Convert EEH address token to phys address + * @token: I/O token, should be address in the form 0xA.... + * + * This routine should be called to convert virtual I/O address + * to physical one. */ static inline unsigned long eeh_token_to_phys(unsigned long token) { @@ -365,8 +388,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) return pa | (token & (PAGE_SIZE-1)); } -/** - * Return the "partitionable endpoint" (pe) under which this device lies +/** + * find_device_pe - Retrieve the PE for the given device + * @dn: device node + * + * Return the PE under which this device lies */ struct device_node * find_device_pe(struct device_node *dn) { @@ -377,14 +403,18 @@ struct device_node * find_device_pe(struct device_node *dn) return dn; } -/** Mark all devices that are children of this device as failed. - * Mark the device driver too, so that it can see the failure - * immediately; this is critical, since some drivers poll - * status registers in interrupts ... If a driver is polling, - * and the slot is frozen, then the driver can deadlock in - * an interrupt context, which is bad. +/** + * __eeh_mark_slot - Mark all child devices as failed + * @parent: parent device + * @mode_flag: failure flag + * + * Mark all devices that are children of this device as failed. + * Mark the device driver too, so that it can see the failure + * immediately; this is critical, since some drivers poll + * status registers in interrupts ... If a driver is polling, + * and the slot is frozen, then the driver can deadlock in + * an interrupt context, which is bad. */ - static void __eeh_mark_slot(struct device_node *parent, int mode_flag) { struct device_node *dn; @@ -404,10 +434,18 @@ static void __eeh_mark_slot(struct device_node *parent, int mode_flag) } } -void eeh_mark_slot (struct device_node *dn, int mode_flag) +/** + * eeh_mark_slot - Mark the indicated device and its children as failed + * @dn: parent device + * @mode_flag: failure flag + * + * Mark the indicated device and its child devices as failed. + * The device drivers are marked as failed as well. + */ +void eeh_mark_slot(struct device_node *dn, int mode_flag) { struct pci_dev *dev; - dn = find_device_pe (dn); + dn = find_device_pe(dn); /* Back up one, since config addrs might be shared */ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) @@ -423,6 +461,13 @@ void eeh_mark_slot (struct device_node *dn, int mode_flag) __eeh_mark_slot(dn, mode_flag); } +/** + * __eeh_clear_slot - Clear failure flag for the child devices + * @parent: parent device + * @mode_flag: flag to be cleared + * + * Clear failure flag for the child devices. + */ static void __eeh_clear_slot(struct device_node *parent, int mode_flag) { struct device_node *dn; @@ -436,12 +481,19 @@ static void __eeh_clear_slot(struct device_node *parent, int mode_flag) } } -void eeh_clear_slot (struct device_node *dn, int mode_flag) +/** + * eeh_clear_slot - Clear failure flag for the indicated device and its children + * @dn: parent device + * @mode_flag: flag to be cleared + * + * Clear failure flag for the indicated device and its children. + */ +void eeh_clear_slot(struct device_node *dn, int mode_flag) { unsigned long flags; raw_spin_lock_irqsave(&confirm_error_lock, flags); - dn = find_device_pe (dn); + dn = find_device_pe(dn); /* Back up one, since config addrs might be shared */ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) @@ -453,43 +505,10 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } -void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) -{ - struct device_node *dn; - - for_each_child_of_node(parent, dn) { - if (PCI_DN(dn)) { - - struct pci_dev *dev = PCI_DN(dn)->pcidev; - - if (dev && dev->driver) - *freset |= dev->needs_freset; - - __eeh_set_pe_freset(dn, freset); - } - } -} - -void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) -{ - struct pci_dev *dev; - dn = find_device_pe(dn); - - /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) - dn = dn->parent; - - dev = PCI_DN(dn)->pcidev; - if (dev) - *freset |= dev->needs_freset; - - __eeh_set_pe_freset(dn, freset); -} - /** - * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze - * @dn device node - * @dev pci device, if known + * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze + * @dn: device node + * @dev: pci device, if known * * Check for an EEH failure for the given device node. Call this * routine if the result of a read was all 0xff's and you want to @@ -548,11 +567,11 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) pdn->eeh_check_count ++; if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) { location = of_get_property(dn, "ibm,loc-code", NULL); - printk (KERN_ERR "EEH: %d reads ignored for recovering device at " + printk(KERN_ERR "EEH: %d reads ignored for recovering device at " "location=%s driver=%s pci addr=%s\n", pdn->eeh_check_count, location, eeh_driver_name(dev), eeh_pci_name(dev)); - printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n", + printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", eeh_driver_name(dev)); dump_stack(); } @@ -579,7 +598,8 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) } /* Note that config-io to empty slots may fail; - * they are empty when they don't have children. */ + * they are empty when they don't have children. + */ if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) { false_positives++; pdn->eeh_false_positives ++; @@ -609,15 +629,17 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) /* Avoid repeated reports of this failure, including problems * with other functions on this device, and functions under - * bridges. */ - eeh_mark_slot (dn, EEH_MODE_ISOLATED); + * bridges. + */ + eeh_mark_slot(dn, EEH_MODE_ISOLATED); raw_spin_unlock_irqrestore(&confirm_error_lock, flags); - eeh_send_failure_event (dn, dev); + eeh_send_failure_event(dn, dev); /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure - * out what happened. So print that out. */ + * out what happened. So print that out. + */ dump_stack(); return 1; @@ -629,9 +651,9 @@ dn_unlock: EXPORT_SYMBOL_GPL(eeh_dn_check_failure); /** - * eeh_check_failure - check if all 1's data is due to EEH slot freeze - * @token i/o token, should be address in the form 0xA.... - * @val value, should be all 1's (XXX why do we need this arg??) + * eeh_check_failure - Check if all 1's data is due to EEH slot freeze + * @token: I/O token, should be address in the form 0xA.... + * @val: value, should be all 1's (XXX why do we need this arg??) * * Check for an EEH failure at the given token address. Call this * routine if the result of a read was all 0xff's and you want to @@ -655,7 +677,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon } dn = pci_device_to_OF_node(dev); - eeh_dn_check_failure (dn, dev); + eeh_dn_check_failure(dn, dev); pci_dev_put(dev); return val; @@ -663,14 +685,15 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon EXPORT_SYMBOL(eeh_check_failure); -/* ------------------------------------------------------------- */ -/* The code below deals with error recovery */ /** - * rtas_pci_enable - enable MMIO or DMA transfers for this slot + * rtas_pci_enable - Enable MMIO or DMA transfers for this slot * @pdn pci device node + * + * This routine should be called to reenable frozen MMIO or DMA + * so that it would work correctly again. It's useful while doing + * recovery or log collection on the indicated device. */ - int rtas_pci_enable(struct pci_dn *pdn, int function) { @@ -692,7 +715,7 @@ rtas_pci_enable(struct pci_dn *pdn, int function) printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", function, rc, pdn->node->full_name); - rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); if ((rc == 4) && (function == EEH_THAW_MMIO)) return 0; @@ -700,27 +723,25 @@ rtas_pci_enable(struct pci_dn *pdn, int function) } /** - * rtas_pci_slot_reset - raises/lowers the pci #RST line - * @pdn pci device node + * rtas_pci_slot_reset - Raises/Lowers the pci #RST line + * @pdn: pci device node * @state: 1/0 to raise/lower the #RST * * Clear the EEH-frozen condition on a slot. This routine * asserts the PCI #RST line if the 'state' argument is '1', * and drops the #RST line if 'state is '0'. This routine is * safe to call in an interrupt context. - * */ - static void rtas_pci_slot_reset(struct pci_dn *pdn, int state) { int config_addr; int rc; - BUG_ON (pdn==NULL); + BUG_ON(pdn==NULL); if (!pdn->phb) { - printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n", + printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n", pdn->node->full_name); return; } @@ -752,12 +773,12 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) /** * pcibios_set_pcie_slot_reset - Set PCI-E reset state - * @dev: pci device struct - * @state: reset state to enter + * @dev: pci device struct + * @state: reset state to enter * * Return value: * 0 if success - **/ + */ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { struct device_node *dn = pci_device_to_OF_node(dev); @@ -781,10 +802,62 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat } /** - * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second - * @pdn: pci device node to be reset. + * __eeh_set_pe_freset - Check the required reset for child devices + * @parent: parent device + * @freset: return value + * + * Each device might have its preferred reset type: fundamental or + * hot reset. The routine is used to collect the information from + * the child devices so that they could be reset accordingly. */ +void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) +{ + struct device_node *dn; + + for_each_child_of_node(parent, dn) { + if (PCI_DN(dn)) { + struct pci_dev *dev = PCI_DN(dn)->pcidev; + + if (dev && dev->driver) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); + } + } +} + +/** + * eeh_set_pe_freset - Check the required reset for the indicated device and its children + * @dn: parent device + * @freset: return value + * + * Each device might have its preferred reset type: fundamental or + * hot reset. The routine is used to collected the information for + * the indicated device and its children so that the bunch of the + * devices could be reset properly. + */ +void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) +{ + struct pci_dev *dev; + dn = find_device_pe(dn); + + /* Back up one, since config addrs might be shared */ + if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + dn = dn->parent; + dev = PCI_DN(dn)->pcidev; + if (dev) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); +} + +/** + * __rtas_set_slot_reset - Assert the pci #RST line for 1/4 second + * @pdn: pci device node to be reset. + * + * Assert the PCI #RST line for 1/4 second. + */ static void __rtas_set_slot_reset(struct pci_dn *pdn) { unsigned int freset = 0; @@ -803,25 +876,35 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn) rtas_pci_slot_reset(pdn, 1); /* The PCI bus requires that the reset be held high for at least - * a 100 milliseconds. We wait a bit longer 'just in case'. */ - + * a 100 milliseconds. We wait a bit longer 'just in case'. + */ #define PCI_BUS_RST_HOLD_TIME_MSEC 250 - msleep (PCI_BUS_RST_HOLD_TIME_MSEC); + msleep(PCI_BUS_RST_HOLD_TIME_MSEC); /* We might get hit with another EEH freeze as soon as the * pci slot reset line is dropped. Make sure we don't miss - * these, and clear the flag now. */ - eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED); + * these, and clear the flag now. + */ + eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED); - rtas_pci_slot_reset (pdn, 0); + rtas_pci_slot_reset(pdn, 0); /* After a PCI slot has been reset, the PCI Express spec requires * a 1.5 second idle time for the bus to stabilize, before starting - * up traffic. */ + * up traffic. + */ #define PCI_BUS_SETTLE_TIME_MSEC 1800 - msleep (PCI_BUS_SETTLE_TIME_MSEC); + msleep(PCI_BUS_SETTLE_TIME_MSEC); } +/** + * rtas_set_slot_reset - Reset the indicated PE + * @pdn: PCI device node + * + * This routine should be called to reset indicated device, including + * PE. A PE might include multiple PCI devices and sometimes PCI bridges + * might be involved as well. + */ int rtas_set_slot_reset(struct pci_dn *pdn) { int i, rc; @@ -846,7 +929,6 @@ int rtas_set_slot_reset(struct pci_dn *pdn) return -1; } -/* ------------------------------------------------------- */ /** Save and restore of PCI BARs * * Although firmware will set up BARs during boot, it doesn't @@ -863,7 +945,7 @@ int rtas_set_slot_reset(struct pci_dn *pdn) * the expansion ROM base address, the latency timer, and etc. * from the saved values in the device node. */ -static inline void __restore_bars (struct pci_dn *pdn) +static inline void __restore_bars(struct pci_dn *pdn) { int i; u32 cmd; @@ -879,17 +961,18 @@ static inline void __restore_bars (struct pci_dn *pdn) #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)]) - rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1, + rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - rtas_write_config (pdn, PCI_LATENCY_TIMER, 1, + rtas_write_config(pdn, PCI_LATENCY_TIMER, 1, SAVED_BYTE(PCI_LATENCY_TIMER)); /* max latency, min grant, interrupt pin and line */ rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]); /* Restore PERR & SERR bits, some devices require it, - don't touch the other command bits */ + * don't touch the other command bits + */ rtas_read_config(pdn, PCI_COMMAND, 4, &cmd); if (pdn->config_space[1] & PCI_COMMAND_PARITY) cmd |= PCI_COMMAND_PARITY; @@ -903,7 +986,8 @@ static inline void __restore_bars (struct pci_dn *pdn) } /** - * eeh_restore_bars - restore the PCI config space info + * eeh_restore_bars - Restore the PCI config space info + * @pdn: PCI device node * * This routine performs a recursive walk to the children * of this device as well. @@ -915,14 +999,15 @@ void eeh_restore_bars(struct pci_dn *pdn) return; if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code)) - __restore_bars (pdn); + __restore_bars(pdn); for_each_child_of_node(pdn->node, dn) - eeh_restore_bars (PCI_DN(dn)); + eeh_restore_bars(PCI_DN(dn)); } /** - * eeh_save_bars - save device bars + * eeh_save_bars - Save device bars + * @pdn: PCI device node * * Save the values of the device bars. Unlike the restore * routine, this routine is *not* recursive. This is because @@ -940,6 +1025,14 @@ static void eeh_save_bars(struct pci_dn *pdn) rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]); } +/** + * rtas_configure_bridge - Configure PCI bridges for the indicated PE + * @pdn: PCI device node + * + * PCI bridges might be included in PE. In order to make the PE work + * again. The included PCI bridges should be recovered after the PE + * encounters frozen state. + */ void rtas_configure_bridge(struct pci_dn *pdn) { @@ -963,17 +1056,11 @@ rtas_configure_bridge(struct pci_dn *pdn) BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); if (rc) { - printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", + printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", rc, pdn->node->full_name); } } -/* ------------------------------------------------------------- */ -/* The code below deals with enabling EEH for devices during the - * early boot sequence. EEH must be enabled before any PCI probing - * can be done. - */ - #define EEH_ENABLE 1 struct eeh_early_enable_info { @@ -981,7 +1068,18 @@ struct eeh_early_enable_info { unsigned int buid_lo; }; -static int get_pe_addr (int config_addr, +/** + * get_pe_addr - Retrieve PE address with given BDF address + * @config_addr: BDF address + * @info: BUID of the associated PHB + * + * There're 2 kinds of addresses existing in EEH core components: + * BDF address and PE address. Besides, there has dedicated platform + * dependent function call to retrieve the PE address according to + * the given BDF address. Further more, we prefer PE address on BDF + * address in EEH core components. + */ +static int get_pe_addr(int config_addr, struct eeh_early_enable_info *info) { unsigned int rets[3]; @@ -990,12 +1088,12 @@ static int get_pe_addr (int config_addr, /* Use latest config-addr token on power6 */ if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { /* Make sure we have a PE in hand */ - ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, config_addr, info->buid_hi, info->buid_lo, 1); if (ret || (rets[0]==0)) return 0; - ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, config_addr, info->buid_hi, info->buid_lo, 0); if (ret) return 0; @@ -1004,7 +1102,7 @@ static int get_pe_addr (int config_addr, /* Use older config-addr token on power5 */ if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { - ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, config_addr, info->buid_hi, info->buid_lo, 0); if (ret) return 0; @@ -1013,7 +1111,15 @@ static int get_pe_addr (int config_addr, return 0; } -/* Enable eeh for the given device node. */ +/** + * early_enable_eeh - Early enable EEH on the indicated device + * @dn: device node + * @data: BUID + * + * Enable EEH functionality on the specified PCI device. The function + * is expected to be called before real PCI probing is done. However, + * the PHBs have been initialized at this point. + */ static void *early_enable_eeh(struct device_node *dn, void *data) { unsigned int rets[3]; @@ -1047,7 +1153,8 @@ static void *early_enable_eeh(struct device_node *dn, void *data) pdn->class_code = *class_code; /* Ok... see if this device supports EEH. Some do, some don't, - * and the only way to find out is to check each and every one. */ + * and the only way to find out is to check each and every one. + */ regs = of_get_property(dn, "reg", NULL); if (regs) { /* First register entry is addr (00BBSS00) */ @@ -1061,13 +1168,15 @@ static void *early_enable_eeh(struct device_node *dn, void *data) pdn->eeh_config_addr = regs[0]; /* If the newer, better, ibm,get-config-addr-info is supported, - * then use that instead. */ + * then use that instead. + */ pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info); /* Some older systems (Power4) allow the * ibm,set-eeh-option call to succeed even on nodes * where EEH is not supported. Verify support - * explicitly. */ + * explicitly. + */ ret = read_slot_reset_state(pdn, rets); if ((ret == 0) && (rets[1] == 1)) enable = 1; @@ -1083,7 +1192,8 @@ static void *early_enable_eeh(struct device_node *dn, void *data) } else { /* This device doesn't support EEH, but it may have an - * EEH parent, in which case we mark it as supported. */ + * EEH parent, in which case we mark it as supported. + */ if (dn->parent && PCI_DN(dn->parent) && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { /* Parent supports EEH. */ @@ -1101,7 +1211,9 @@ static void *early_enable_eeh(struct device_node *dn, void *data) return NULL; } -/* +/** + * eeh_init - EEH initialization + * * Initialize EEH by trying to enable it for all of the adapters in the system. * As a side effect we can determine here if eeh is supported at all. * Note that we leave EEH on so failed config cycles won't cause a machine @@ -1133,7 +1245,7 @@ void __init eeh_init(void) ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); - ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); + ibm_configure_bridge = rtas_token("ibm,configure-bridge"); ibm_configure_pe = rtas_token("ibm,configure-pe"); if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) @@ -1170,7 +1282,7 @@ void __init eeh_init(void) } /** - * eeh_add_device_early - enable EEH for the indicated device_node + * eeh_add_device_early - Enable EEH for the indicated device_node * @dn: device node for which to set up EEH * * This routine must be used to perform EEH initialization for PCI @@ -1199,6 +1311,14 @@ static void eeh_add_device_early(struct device_node *dn) early_enable_eeh(dn, &info); } +/** + * eeh_add_device_tree_early - Enable EEH for the indicated device + * @dn: device node + * + * This routine must be used to perform EEH initialization for the + * indicated PCI device that was added after system boot (e.g. + * hotplug, dlpar). + */ void eeh_add_device_tree_early(struct device_node *dn) { struct device_node *sib; @@ -1210,7 +1330,7 @@ void eeh_add_device_tree_early(struct device_node *dn) EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); /** - * eeh_add_device_late - perform EEH initialization for the indicated pci device + * eeh_add_device_late - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH * * This routine must be used to complete EEH initialization for PCI @@ -1234,13 +1354,21 @@ static void eeh_add_device_late(struct pci_dev *dev) } WARN_ON(pdn->pcidev); - pci_dev_get (dev); + pci_dev_get(dev); pdn->pcidev = dev; pci_addr_cache_insert_device(dev); eeh_sysfs_add_device(dev); } +/** + * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus + * @bus: PCI bus + * + * This routine must be used to perform EEH initialization for PCI + * devices which are attached to the indicated PCI bus. The PCI bus + * is added after system boot through hotplug or dlpar. + */ void eeh_add_device_tree_late(struct pci_bus *bus) { struct pci_dev *dev; @@ -1257,7 +1385,7 @@ void eeh_add_device_tree_late(struct pci_bus *bus) EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); /** - * eeh_remove_device - undo EEH setup for the indicated pci device + * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed * * This routine should be called when a device is removed from @@ -1281,12 +1409,20 @@ static void eeh_remove_device(struct pci_dev *dev) return; } PCI_DN(dn)->pcidev = NULL; - pci_dev_put (dev); + pci_dev_put(dev); pci_addr_cache_remove_device(dev); eeh_sysfs_remove_device(dev); } +/** + * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device + * @dev: PCI device + * + * This routine must be called when a device is removed from the + * running system through hotplug or dlpar. The corresponding + * PCI address cache will be removed. + */ void eeh_remove_bus_device(struct pci_dev *dev) { struct pci_bus *bus = dev->subordinate; -- cgit v1.2.3-70-g09d2 From cce4b2d243ddd9e8d5da159b893117afd7ccad5c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:03:52 +0000 Subject: powerpc/eeh: Cleanup function names in the EEH core The EEH has been implemented on pSeries platform. The original code looks a little bit nasty. The patch does cleanup on the current EEH implementation so that it looks more clean. * Try adding prefix "eeh" for functions. * Some function names have been adjusted so that they looks shorter and meaningful. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ppc-pci.h | 8 +-- arch/powerpc/platforms/pseries/eeh.c | 102 +++++++++++++--------------- arch/powerpc/platforms/pseries/eeh_driver.c | 10 +-- arch/powerpc/platforms/pseries/msi.c | 2 +- 4 files changed, 59 insertions(+), 63 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 221d82fd823..605a97000fc 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -58,16 +58,16 @@ struct pci_dev *pci_get_device_by_addr(unsigned long addr); void eeh_slot_error_detail (struct pci_dn *pdn, int severity); #define EEH_THAW_MMIO 2 #define EEH_THAW_DMA 3 -int rtas_pci_enable(struct pci_dn *pdn, int function); -int rtas_set_slot_reset (struct pci_dn *); +int eeh_pci_enable(struct pci_dn *pdn, int function); +int eeh_reset_pe(struct pci_dn *); int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs); void eeh_restore_bars(struct pci_dn *); -void rtas_configure_bridge(struct pci_dn *); +void eeh_configure_bridge(struct pci_dn *); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_mark_slot(struct device_node *dn, int mode_flag); void eeh_clear_slot(struct device_node *dn, int mode_flag); -struct device_node *find_device_pe(struct device_node *dn); +struct device_node *eeh_find_device_pe(struct device_node *dn); void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 5f6d37bdd4f..fa885891e1c 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -130,7 +130,7 @@ static unsigned long slot_resets; #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) /** - * rtas_slot_error_detail - Retrieve error log through RTAS call + * eeh_rtas_slot_error_detail - Retrieve error log through RTAS call * @pdn: device node * @severity: temporary or permanent error log * @driver_log: driver log to be combined with the retrieved error log @@ -139,7 +139,7 @@ static unsigned long slot_resets; * This routine should be called to retrieve error log through the dedicated * RTAS call. */ -static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, +static void eeh_rtas_slot_error_detail(struct pci_dn *pdn, int severity, char *driver_log, size_t loglen) { int config_addr; @@ -170,7 +170,7 @@ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, } /** - * gather_pci_data - Copy assorted PCI config space registers to buff + * eeh_gather_pci_data - Copy assorted PCI config space registers to buff * @pdn: device to report data for * @buf: point to buffer in which to log * @len: amount of room in buffer @@ -178,7 +178,7 @@ static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, * This routine captures assorted PCI configuration space data, * and puts them into a buffer for RTAS error logging. */ -static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) +static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) { struct pci_dev *dev = pdn->pcidev; u32 cfg; @@ -258,7 +258,7 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) for_each_child_of_node(pdn->node, dn) { pdn = PCI_DN(dn); if (pdn) - n += gather_pci_data(pdn, buf+n, len-n); + n += eeh_gather_pci_data(pdn, buf+n, len-n); } } @@ -280,23 +280,23 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) size_t loglen = 0; pci_regs_buf[0] = 0; - rtas_pci_enable(pdn, EEH_THAW_MMIO); - rtas_configure_bridge(pdn); + eeh_pci_enable(pdn, EEH_THAW_MMIO); + eeh_configure_bridge(pdn); eeh_restore_bars(pdn); - loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); + loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); - rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); + eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); } /** - * read_slot_reset_state - Read the reset state of a device node's slot + * eeh_read_slot_reset_state - Read the reset state of a device node's slot * @dn: device node to read * @rets: array to return results in * * Read the reset state of a device node's slot through platform dependent * function call. */ -static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) +static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[]) { int token, outputs; int config_addr; @@ -332,15 +332,14 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) * the max allowed wait time is exceeded, in which case * a -2 is returned. */ -int -eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) +int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) { int rc; int rets[3]; int mwait; while (1) { - rc = read_slot_reset_state(pdn, rets); + rc = eeh_read_slot_reset_state(pdn, rets); if (rc) return rc; if (rets[1] == 0) return -1; /* EEH is not supported */ @@ -389,12 +388,12 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) } /** - * find_device_pe - Retrieve the PE for the given device + * eeh_find_device_pe - Retrieve the PE for the given device * @dn: device node * * Return the PE under which this device lies */ -struct device_node * find_device_pe(struct device_node *dn) +struct device_node *eeh_find_device_pe(struct device_node *dn) { while ((dn->parent) && PCI_DN(dn->parent) && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { @@ -445,7 +444,7 @@ static void __eeh_mark_slot(struct device_node *parent, int mode_flag) void eeh_mark_slot(struct device_node *dn, int mode_flag) { struct pci_dev *dev; - dn = find_device_pe(dn); + dn = eeh_find_device_pe(dn); /* Back up one, since config addrs might be shared */ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) @@ -493,7 +492,7 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag) unsigned long flags; raw_spin_lock_irqsave(&confirm_error_lock, flags); - dn = find_device_pe(dn); + dn = eeh_find_device_pe(dn); /* Back up one, since config addrs might be shared */ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) @@ -538,7 +537,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) no_dn++; return 0; } - dn = find_device_pe(dn); + dn = eeh_find_device_pe(dn); pdn = PCI_DN(dn); /* Access to IO BARs might get this far and still not want checking. */ @@ -585,11 +584,11 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * function zero of a multi-function device. * In any case they must share a common PHB. */ - ret = read_slot_reset_state(pdn, rets); + ret = eeh_read_slot_reset_state(pdn, rets); /* If the call to firmware failed, punt */ if (ret != 0) { - printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n", + printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n", ret, dn->full_name); false_positives++; pdn->eeh_false_positives ++; @@ -687,15 +686,14 @@ EXPORT_SYMBOL(eeh_check_failure); /** - * rtas_pci_enable - Enable MMIO or DMA transfers for this slot + * eeh_pci_enable - Enable MMIO or DMA transfers for this slot * @pdn pci device node * * This routine should be called to reenable frozen MMIO or DMA * so that it would work correctly again. It's useful while doing * recovery or log collection on the indicated device. */ -int -rtas_pci_enable(struct pci_dn *pdn, int function) +int eeh_pci_enable(struct pci_dn *pdn, int function) { int config_addr; int rc; @@ -723,7 +721,7 @@ rtas_pci_enable(struct pci_dn *pdn, int function) } /** - * rtas_pci_slot_reset - Raises/Lowers the pci #RST line + * eeh_slot_reset - Raises/Lowers the pci #RST line * @pdn: pci device node * @state: 1/0 to raise/lower the #RST * @@ -732,8 +730,7 @@ rtas_pci_enable(struct pci_dn *pdn, int function) * and drops the #RST line if 'state is '0'. This routine is * safe to call in an interrupt context. */ -static void -rtas_pci_slot_reset(struct pci_dn *pdn, int state) +static void eeh_slot_reset(struct pci_dn *pdn, int state) { int config_addr; int rc; @@ -786,13 +783,13 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat switch (state) { case pcie_deassert_reset: - rtas_pci_slot_reset(pdn, 0); + eeh_slot_reset(pdn, 0); break; case pcie_hot_reset: - rtas_pci_slot_reset(pdn, 1); + eeh_slot_reset(pdn, 1); break; case pcie_warm_reset: - rtas_pci_slot_reset(pdn, 3); + eeh_slot_reset(pdn, 3); break; default: return -EINVAL; @@ -839,7 +836,7 @@ void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) { struct pci_dev *dev; - dn = find_device_pe(dn); + dn = eeh_find_device_pe(dn); /* Back up one, since config addrs might be shared */ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) @@ -853,12 +850,12 @@ void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) } /** - * __rtas_set_slot_reset - Assert the pci #RST line for 1/4 second + * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second * @pdn: pci device node to be reset. * * Assert the PCI #RST line for 1/4 second. */ -static void __rtas_set_slot_reset(struct pci_dn *pdn) +static void eeh_reset_pe_once(struct pci_dn *pdn) { unsigned int freset = 0; @@ -871,9 +868,9 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn) eeh_set_pe_freset(pdn->node, &freset); if (freset) - rtas_pci_slot_reset(pdn, 3); + eeh_slot_reset(pdn, 3); else - rtas_pci_slot_reset(pdn, 1); + eeh_slot_reset(pdn, 1); /* The PCI bus requires that the reset be held high for at least * a 100 milliseconds. We wait a bit longer 'just in case'. @@ -887,7 +884,7 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn) */ eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED); - rtas_pci_slot_reset(pdn, 0); + eeh_slot_reset(pdn, 0); /* After a PCI slot has been reset, the PCI Express spec requires * a 1.5 second idle time for the bus to stabilize, before starting @@ -898,20 +895,20 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn) } /** - * rtas_set_slot_reset - Reset the indicated PE + * eeh_reset_pe - Reset the indicated PE * @pdn: PCI device node * * This routine should be called to reset indicated device, including * PE. A PE might include multiple PCI devices and sometimes PCI bridges * might be involved as well. */ -int rtas_set_slot_reset(struct pci_dn *pdn) +int eeh_reset_pe(struct pci_dn *pdn) { int i, rc; /* Take three shots at resetting the bus */ for (i=0; i<3; i++) { - __rtas_set_slot_reset(pdn); + eeh_reset_pe_once(pdn); rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); if (rc == 0) @@ -938,14 +935,14 @@ int rtas_set_slot_reset(struct pci_dn *pdn) */ /** - * __restore_bars - Restore the Base Address Registers + * eeh_restore_one_device_bars - Restore the Base Address Registers for one device * @pdn: pci device node * * Loads the PCI configuration space base address registers, * the expansion ROM base address, the latency timer, and etc. * from the saved values in the device node. */ -static inline void __restore_bars(struct pci_dn *pdn) +static inline void eeh_restore_one_device_bars(struct pci_dn *pdn) { int i; u32 cmd; @@ -999,7 +996,7 @@ void eeh_restore_bars(struct pci_dn *pdn) return; if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code)) - __restore_bars(pdn); + eeh_restore_one_device_bars(pdn); for_each_child_of_node(pdn->node, dn) eeh_restore_bars(PCI_DN(dn)); @@ -1026,15 +1023,14 @@ static void eeh_save_bars(struct pci_dn *pdn) } /** - * rtas_configure_bridge - Configure PCI bridges for the indicated PE + * eeh_configure_bridge - Configure PCI bridges for the indicated PE * @pdn: PCI device node * * PCI bridges might be included in PE. In order to make the PE work * again. The included PCI bridges should be recovered after the PE * encounters frozen state. */ -void -rtas_configure_bridge(struct pci_dn *pdn) +void eeh_configure_bridge(struct pci_dn *pdn) { int config_addr; int rc; @@ -1069,7 +1065,7 @@ struct eeh_early_enable_info { }; /** - * get_pe_addr - Retrieve PE address with given BDF address + * eeh_get_pe_addr - Retrieve PE address with given BDF address * @config_addr: BDF address * @info: BUID of the associated PHB * @@ -1079,7 +1075,7 @@ struct eeh_early_enable_info { * the given BDF address. Further more, we prefer PE address on BDF * address in EEH core components. */ -static int get_pe_addr(int config_addr, +static int eeh_get_pe_addr(int config_addr, struct eeh_early_enable_info *info) { unsigned int rets[3]; @@ -1112,7 +1108,7 @@ static int get_pe_addr(int config_addr, } /** - * early_enable_eeh - Early enable EEH on the indicated device + * eeh_early_enable - Early enable EEH on the indicated device * @dn: device node * @data: BUID * @@ -1120,7 +1116,7 @@ static int get_pe_addr(int config_addr, * is expected to be called before real PCI probing is done. However, * the PHBs have been initialized at this point. */ -static void *early_enable_eeh(struct device_node *dn, void *data) +static void *eeh_early_enable(struct device_node *dn, void *data) { unsigned int rets[3]; struct eeh_early_enable_info *info = data; @@ -1170,14 +1166,14 @@ static void *early_enable_eeh(struct device_node *dn, void *data) /* If the newer, better, ibm,get-config-addr-info is supported, * then use that instead. */ - pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info); + pdn->eeh_pe_config_addr = eeh_get_pe_addr(pdn->eeh_config_addr, info); /* Some older systems (Power4) allow the * ibm,set-eeh-option call to succeed even on nodes * where EEH is not supported. Verify support * explicitly. */ - ret = read_slot_reset_state(pdn, rets); + ret = eeh_read_slot_reset_state(pdn, rets); if ((ret == 0) && (rets[1] == 1)) enable = 1; } @@ -1272,7 +1268,7 @@ void __init eeh_init(void) info.buid_lo = BUID_LO(buid); info.buid_hi = BUID_HI(buid); - traverse_pci_devices(phb, early_enable_eeh, &info); + traverse_pci_devices(phb, eeh_early_enable, &info); } if (eeh_subsystem_enabled) @@ -1308,7 +1304,7 @@ static void eeh_add_device_early(struct device_node *dn) info.buid_hi = BUID_HI(phb->buid); info.buid_lo = BUID_LO(phb->buid); - early_enable_eeh(dn, &info); + eeh_early_enable(dn, &info); } /** diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 1b6cb10589e..5315350bf1a 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -282,7 +282,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. */ - rc = rtas_set_slot_reset(pe_dn); + rc = eeh_reset_pe(pe_dn); if (rc) return rc; @@ -295,7 +295,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) struct pci_dn *ppe = PCI_DN(dn); /* On Power4, always true because eeh_pe_config_addr=0 */ if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) { - rtas_configure_bridge(ppe); + eeh_configure_bridge(ppe); eeh_restore_bars(ppe); } dn = dn->sibling; @@ -330,7 +330,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) enum pci_ers_result result = PCI_ERS_RESULT_NONE; const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; - frozen_dn = find_device_pe(event->dn); + frozen_dn = eeh_find_device_pe(event->dn); if (!frozen_dn) { location = of_get_property(event->dn, "ibm,loc-code", NULL); @@ -422,7 +422,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) /* If all devices reported they can proceed, then re-enable MMIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO); + rc = eeh_pci_enable(frozen_pdn, EEH_THAW_MMIO); if (rc < 0) goto hard_fail; @@ -436,7 +436,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA); + rc = eeh_pci_enable(frozen_pdn, EEH_THAW_DMA); if (rc < 0) goto hard_fail; diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 38d24e7e7bb..109fdb75578 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -217,7 +217,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) if (!dn) return NULL; - dn = find_device_pe(dn); + dn = eeh_find_device_pe(dn); if (!dn) return NULL; -- cgit v1.2.3-70-g09d2 From aa1e6374ae11788752535ae0c8c6395c9cad1393 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:03:53 +0000 Subject: powerpc/eeh: Platform dependent EEH operations EEH has been implemented on RTAS-compliant pSeries platform. That's to say, the EEH operations will be implemented through RTAS calls eventually. The situation limited feasible extension on EEH. In order to support EEH on multiple platforms like pseries and powernv simutaneously. We have to split the platform dependent EEH options up out of current implementation. The patch addresses supporting EEH on multiple platforms. The pseries platform dependent EEH operations will be abstracted by struct eeh_ops. EEH core components will be built based on the registered EEH operations. With the mechanism, what the individual platform needs to do is implement platform dependent EEH operations. For now, the pseries platform is covered under the mechanism. That means we have to think about other platforms to support EEH, like powernv. Besides, we only have framework for the mechanism and we have to implement it for pseries platform later. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 32 +++++ arch/powerpc/platforms/pseries/Makefile | 2 +- arch/powerpc/platforms/pseries/eeh.c | 53 ++++++++ arch/powerpc/platforms/pseries/eeh_pseries.c | 183 +++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/setup.c | 1 + 5 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/pseries/eeh_pseries.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 232887721ff..0666c52b8f1 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -31,6 +31,26 @@ struct device_node; #ifdef CONFIG_EEH +/* + * The struct is used to trace the registered EEH operation + * callback functions. Actually, those operation callback + * functions are heavily platform dependent. That means the + * platform should register its own EEH operation callback + * functions before any EEH further operations. + */ +struct eeh_ops { + char *name; + int (*init)(void); + int (*set_option)(struct device_node *dn, int option); + int (*get_pe_addr)(struct device_node *dn); + int (*get_state)(struct device_node *dn, int *state); + int (*reset)(struct device_node *dn, int option); + int (*wait_state)(struct device_node *dn, int max_wait); + int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len); + int (*configure_bridge)(struct device_node *dn); +}; + +extern struct eeh_ops *eeh_ops; extern int eeh_subsystem_enabled; /* Values for eeh_mode bits in device_node */ @@ -47,6 +67,11 @@ extern int eeh_subsystem_enabled; #define EEH_MAX_ALLOWED_FREEZES 5 void __init eeh_init(void); +#ifdef CONFIG_PPC_PSERIES +int __init eeh_pseries_init(void); +#endif +int __init eeh_ops_register(struct eeh_ops *ops); +int __exit eeh_ops_unregister(const char *name); unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev); @@ -73,6 +98,13 @@ void eeh_remove_bus_device(struct pci_dev *); #else /* !CONFIG_EEH */ static inline void eeh_init(void) { } +#ifdef CONFIG_PPC_PSERIES +static inline int eeh_pseries_init(void) +{ + return 0; +} +#endif /* CONFIG_PPC_PSERIES */ + static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) { return val; diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 67b3e6e0cf3..ee873cf4fe4 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,7 +6,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o +obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o eeh_pseries.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_PCI) += pci.o pci_dlpar.o obj-$(CONFIG_PSERIES_MSI) += msi.o diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index fa885891e1c..b0e3fb0b32a 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -97,6 +97,9 @@ static int ibm_get_config_addr_info2; static int ibm_configure_bridge; static int ibm_configure_pe; +/* Platform dependent EEH operations */ +struct eeh_ops *eeh_ops = NULL; + int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); @@ -1207,6 +1210,56 @@ static void *eeh_early_enable(struct device_node *dn, void *data) return NULL; } +/** + * eeh_ops_register - Register platform dependent EEH operations + * @ops: platform dependent EEH operations + * + * Register the platform dependent EEH operation callback + * functions. The platform should call this function before + * any other EEH operations. + */ +int __init eeh_ops_register(struct eeh_ops *ops) +{ + if (!ops->name) { + pr_warning("%s: Invalid EEH ops name for %p\n", + __func__, ops); + return -EINVAL; + } + + if (eeh_ops && eeh_ops != ops) { + pr_warning("%s: EEH ops of platform %s already existing (%s)\n", + __func__, eeh_ops->name, ops->name); + return -EEXIST; + } + + eeh_ops = ops; + + return 0; +} + +/** + * eeh_ops_unregister - Unreigster platform dependent EEH operations + * @name: name of EEH platform operations + * + * Unregister the platform dependent EEH operation callback + * functions. + */ +int __exit eeh_ops_unregister(const char *name) +{ + if (!name || !strlen(name)) { + pr_warning("%s: Invalid EEH ops name\n", + __func__); + return -EINVAL; + } + + if (eeh_ops && !strcmp(eeh_ops->name, name)) { + eeh_ops = NULL; + return 0; + } + + return -EEXIST; +} + /** * eeh_init - EEH initialization * diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c new file mode 100644 index 00000000000..61a9050ba96 --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -0,0 +1,183 @@ +/* + * The file intends to implement the platform dependent EEH operations on pseries. + * Actually, the pseries platform is built based on RTAS heavily. That means the + * pseries platform dependent EEH operations will be built on RTAS calls. The functions + * are devired from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has + * been done. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011. + * Copyright IBM Corporation 2001, 2005, 2006 + * Copyright Dave Engebretsen & Todd Inglett 2001 + * Copyright Linas Vepstas 2005, 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/** + * pseries_eeh_init - EEH platform dependent initialization + * + * EEH platform dependent initialization on pseries. + */ +static int pseries_eeh_init(void) +{ + return 0; +} + +/** + * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable + * @dn: device node + * @option: operation to be issued + * + * The function is used to control the EEH functionality globally. + * Currently, following options are support according to PAPR: + * Enable EEH, Disable EEH, Enable MMIO and Enable DMA + */ +static int pseries_eeh_set_option(struct device_node *dn, int option) +{ + return 0; +} + +/** + * pseries_eeh_get_pe_addr - Retrieve PE address + * @dn: device node + * + * Retrieve the assocated PE address. Actually, there're 2 RTAS + * function calls dedicated for the purpose. We need implement + * it through the new function and then the old one. Besides, + * you should make sure the config address is figured out from + * FDT node before calling the function. + * + * It's notable that zero'ed return value means invalid PE config + * address. + */ +static int pseries_eeh_get_pe_addr(struct device_node *dn) +{ + return 0; +} + +/** + * pseries_eeh_get_state - Retrieve PE state + * @dn: PE associated device node + * @state: return value + * + * Retrieve the state of the specified PE. On RTAS compliant + * pseries platform, there already has one dedicated RTAS function + * for the purpose. It's notable that the associated PE config address + * might be ready when calling the function. Therefore, endeavour to + * use the PE config address if possible. Further more, there're 2 + * RTAS calls for the purpose, we need to try the new one and back + * to the old one if the new one couldn't work properly. + */ +static int pseries_eeh_get_state(struct device_node *dn, int *state) +{ + return 0; +} + +/** + * pseries_eeh_reset - Reset the specified PE + * @dn: PE associated device node + * @option: reset option + * + * Reset the specified PE + */ +static int pseries_eeh_reset(struct device_node *dn, int option) +{ + return 0; +} + +/** + * pseries_eeh_wait_state - Wait for PE state + * @dn: PE associated device node + * @max_wait: maximal period in microsecond + * + * Wait for the state of associated PE. It might take some time + * to retrieve the PE's state. + */ +static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) +{ + return 0; +} + +/** + * pseries_eeh_get_log - Retrieve error log + * @dn: device node + * @severity: temporary or permanent error log + * @drv_log: driver log to be combined with retrieved error log + * @len: length of driver log + * + * Retrieve the temporary or permanent error from the PE. + * Actually, the error will be retrieved through the dedicated + * RTAS call. + */ +static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len) +{ + return 0; +} + +/** + * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE + * @dn: PE associated device node + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int pseries_eeh_configure_bridge(struct device_node *dn) +{ + return 0; +} + +static struct eeh_ops pseries_eeh_ops = { + .name = "pseries", + .init = pseries_eeh_init, + .set_option = pseries_eeh_set_option, + .get_pe_addr = pseries_eeh_get_pe_addr, + .get_state = pseries_eeh_get_state, + .reset = pseries_eeh_reset, + .wait_state = pseries_eeh_wait_state, + .get_log = pseries_eeh_get_log, + .configure_bridge = pseries_eeh_configure_bridge +}; + +/** + * eeh_pseries_init - Register platform dependent EEH operations + * + * EEH initialization on pseries platform. This function should be + * called before any EEH related functions. + */ +int __init eeh_pseries_init(void) +{ + return eeh_ops_register(&pseries_eeh_ops); +} diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index d928412cfb3..62b827626ca 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -381,6 +381,7 @@ static void __init pSeries_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); + eeh_pseries_init(); find_and_init_phbs(); pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); eeh_init(); -- cgit v1.2.3-70-g09d2 From 8fb8f709025c13ae72968a66a1ade24431a342b2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:03:55 +0000 Subject: powerpc/eeh: pseries platform EEH operations There're 4 EEH operations that are covered by the dedicated RTAS call : enable or disable EEH, enable MMIO and enable DMA. At early stage of system boot, the EEH would be tried to enable on PCI device related device node. MMIO and DMA for particular PE should be enabled when doing recovery on EEH errors so that the PE could function properly again. The patch implements it and abstract that through struct eeh_ops::set_eeh. It would be help for EEH to support multiple platforms in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 4 +++ arch/powerpc/include/asm/ppc-pci.h | 2 -- arch/powerpc/platforms/pseries/eeh.c | 26 +++---------------- arch/powerpc/platforms/pseries/eeh_driver.c | 4 +-- arch/powerpc/platforms/pseries/eeh_pseries.c | 39 +++++++++++++++++++++++++++- 5 files changed, 48 insertions(+), 27 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 0666c52b8f1..76f7b3f739c 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -38,6 +38,10 @@ struct device_node; * platform should register its own EEH operation callback * functions before any EEH further operations. */ +#define EEH_OPT_DISABLE 0 /* EEH disable */ +#define EEH_OPT_ENABLE 1 /* EEH enable */ +#define EEH_OPT_THAW_MMIO 2 /* MMIO enable */ +#define EEH_OPT_THAW_DMA 3 /* DMA enable */ struct eeh_ops { char *name; int (*init)(void); diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 605a97000fc..6150349ca9c 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -56,8 +56,6 @@ struct pci_dev *pci_get_device_by_addr(unsigned long addr); #define EEH_LOG_TEMP_FAILURE 1 #define EEH_LOG_PERM_FAILURE 2 void eeh_slot_error_detail (struct pci_dn *pdn, int severity); -#define EEH_THAW_MMIO 2 -#define EEH_THAW_DMA 3 int eeh_pci_enable(struct pci_dn *pdn, int function); int eeh_reset_pe(struct pci_dn *); int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index bb6de6c9791..70a9617cc6c 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -87,7 +87,6 @@ #define PCI_BUS_RESET_WAIT_MSEC (60*1000) /* RTAS tokens */ -static int ibm_set_eeh_option; static int ibm_set_slot_reset; static int ibm_read_slot_reset_state; static int ibm_read_slot_reset_state2; @@ -283,7 +282,7 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) size_t loglen = 0; pci_regs_buf[0] = 0; - eeh_pci_enable(pdn, EEH_THAW_MMIO); + eeh_pci_enable(pdn, EEH_OPT_THAW_MMIO); eeh_configure_bridge(pdn); eeh_restore_bars(pdn); loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); @@ -698,26 +697,15 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct pci_dn *pdn, int function) { - int config_addr; int rc; - /* Use PE configuration address, if present */ - config_addr = pdn->eeh_config_addr; - if (pdn->eeh_pe_config_addr) - config_addr = pdn->eeh_pe_config_addr; - - rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL, - config_addr, - BUID_HI(pdn->phb->buid), - BUID_LO(pdn->phb->buid), - function); - + rc = eeh_ops->set_option(pdn->node, function); if (rc) printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", function, rc, pdn->node->full_name); rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); - if ((rc == 4) && (function == EEH_THAW_MMIO)) + if ((rc == 4) && (function == EEH_OPT_THAW_MMIO)) return 0; return rc; @@ -1158,9 +1146,7 @@ static void *eeh_early_enable(struct device_node *dn, void *data) if (regs) { /* First register entry is addr (00BBSS00) */ /* Try to enable eeh */ - ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, - regs[0], info->buid_hi, info->buid_lo, - EEH_ENABLE); + ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE); enable = 0; if (ret == 0) { @@ -1299,7 +1285,6 @@ void __init eeh_init(void) if (np == NULL) return; - ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); @@ -1309,9 +1294,6 @@ void __init eeh_init(void) ibm_configure_bridge = rtas_token("ibm,configure-bridge"); ibm_configure_pe = rtas_token("ibm,configure-pe"); - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) - return; - eeh_error_buf_size = rtas_token("rtas-error-log-max"); if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { eeh_error_buf_size = 1024; diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 5315350bf1a..02eab3baf96 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -422,7 +422,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) /* If all devices reported they can proceed, then re-enable MMIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(frozen_pdn, EEH_THAW_MMIO); + rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_MMIO); if (rc < 0) goto hard_fail; @@ -436,7 +436,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(frozen_pdn, EEH_THAW_DMA); + rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_DMA); if (rc < 0) goto hard_fail; diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1a9410a2d45..c48a9e6ecdd 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -121,7 +121,44 @@ static int pseries_eeh_init(void) */ static int pseries_eeh_set_option(struct device_node *dn, int option) { - return 0; + int ret = 0; + struct pci_dn *pdn; + const u32 *reg; + int config_addr; + + pdn = PCI_DN(dn); + + /* + * When we're enabling or disabling EEH functioality on + * the particular PE, the PE config address is possibly + * unavailable. Therefore, we have to figure it out from + * the FDT node. + */ + switch (option) { + case EEH_OPT_DISABLE: + case EEH_OPT_ENABLE: + reg = of_get_property(dn, "reg", NULL); + config_addr = reg[0]; + break; + + case EEH_OPT_THAW_MMIO: + case EEH_OPT_THAW_DMA: + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + break; + + default: + pr_err("%s: Invalid option %d\n", + __func__, option); + return -EINVAL; + } + + ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), option); + + return ret; } /** -- cgit v1.2.3-70-g09d2 From eb594a4754e71e41c048e0f1559bb6f13dab7070 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:03:57 +0000 Subject: powerpc/eeh: pseries platform PE state retrieval On pSeries platform, there're 2 dedicated RTAS calls introduced to retrieve the corresponding PE's state: ibm,read-slot-reset-state and ibm,read-slot-reset-state2. The patch implements the retrieval of PE's state according to the given PE address. Besides, the implementation has been abstracted by struct eeh_ops::get_state so that EEH core components could support multiple platforms in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 8 +++ arch/powerpc/platforms/pseries/eeh.c | 96 +++++----------------------- arch/powerpc/platforms/pseries/eeh_driver.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 70 +++++++++++++++++++- 4 files changed, 94 insertions(+), 82 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 76f7b3f739c..1d3c9e5fa08 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -42,6 +42,14 @@ struct device_node; #define EEH_OPT_ENABLE 1 /* EEH enable */ #define EEH_OPT_THAW_MMIO 2 /* MMIO enable */ #define EEH_OPT_THAW_DMA 3 /* DMA enable */ +#define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */ +#define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */ +#define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */ +#define EEH_STATE_MMIO_ACTIVE (1 << 3) /* Active MMIO */ +#define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */ +#define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */ +#define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */ + struct eeh_ops { char *name; int (*init)(void); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 00797e079f4..8d11f1f1732 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -88,8 +88,6 @@ /* RTAS tokens */ static int ibm_set_slot_reset; -static int ibm_read_slot_reset_state; -static int ibm_read_slot_reset_state2; static int ibm_slot_error_detail; static int ibm_configure_bridge; static int ibm_configure_pe; @@ -288,37 +286,6 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); } -/** - * eeh_read_slot_reset_state - Read the reset state of a device node's slot - * @dn: device node to read - * @rets: array to return results in - * - * Read the reset state of a device node's slot through platform dependent - * function call. - */ -static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[]) -{ - int token, outputs; - int config_addr; - - if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { - token = ibm_read_slot_reset_state2; - outputs = 4; - } else { - token = ibm_read_slot_reset_state; - rets[2] = 0; /* fake PE Unavailable info */ - outputs = 3; - } - - /* Use PE configuration address, if present */ - config_addr = pdn->eeh_config_addr; - if (pdn->eeh_pe_config_addr) - config_addr = pdn->eeh_pe_config_addr; - - return rtas_call(token, 3, outputs, rets, config_addr, - BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); -} - /** * eeh_wait_for_slot_status - Returns error status of slot * @pdn: pci device node @@ -335,21 +302,15 @@ static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[]) int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) { int rc; - int rets[3]; int mwait; while (1) { - rc = eeh_read_slot_reset_state(pdn, rets); - if (rc) return rc; - if (rets[1] == 0) return -1; /* EEH is not supported */ - - if (rets[0] != 5) return rets[0]; /* return actual status */ - - if (rets[2] == 0) return -1; /* permanently unavailable */ + rc = eeh_ops->get_state(pdn->node, &mwait); + if (rc != EEH_STATE_UNAVAILABLE) + return rc; if (max_wait_msecs <= 0) break; - mwait = rets[2]; if (mwait <= 0) { printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n", mwait); @@ -522,7 +483,6 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag) int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) { int ret; - int rets[3]; unsigned long flags; struct pci_dn *pdn; int rc = 0; @@ -584,40 +544,18 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * function zero of a multi-function device. * In any case they must share a common PHB. */ - ret = eeh_read_slot_reset_state(pdn, rets); - - /* If the call to firmware failed, punt */ - if (ret != 0) { - printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n", - ret, dn->full_name); - false_positives++; - pdn->eeh_false_positives ++; - rc = 0; - goto dn_unlock; - } + ret = eeh_ops->get_state(pdn->node, NULL); /* Note that config-io to empty slots may fail; * they are empty when they don't have children. + * We will punt with the following conditions: Failure to get + * PE's state, EEH not support and Permanently unavailable + * state, PE is in good state. */ - if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) { - false_positives++; - pdn->eeh_false_positives ++; - rc = 0; - goto dn_unlock; - } - - /* If EEH is not supported on this device, punt. */ - if (rets[1] != 1) { - printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", - ret, dn->full_name); - false_positives++; - pdn->eeh_false_positives ++; - rc = 0; - goto dn_unlock; - } - - /* If not the kind of error we know about, punt. */ - if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { + if ((ret < 0) || + (ret == EEH_STATE_NOT_SUPPORT) || + (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { false_positives++; pdn->eeh_false_positives ++; rc = 0; @@ -703,7 +641,8 @@ int eeh_pci_enable(struct pci_dn *pdn, int function) function, rc, pdn->node->full_name); rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); - if ((rc == 4) && (function == EEH_OPT_THAW_MMIO)) + if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && + (function == EEH_OPT_THAW_MMIO)) return 0; return rc; @@ -900,7 +839,7 @@ int eeh_reset_pe(struct pci_dn *pdn) eeh_reset_pe_once(pdn); rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); - if (rc == 0) + if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) return 0; if (rc < 0) { @@ -1057,7 +996,6 @@ void eeh_configure_bridge(struct pci_dn *pdn) */ static void *eeh_early_enable(struct device_node *dn, void *data) { - unsigned int rets[3]; int ret; const u32 *class_code = of_get_property(dn, "class-code", NULL); const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL); @@ -1109,8 +1047,8 @@ static void *eeh_early_enable(struct device_node *dn, void *data) * where EEH is not supported. Verify support * explicitly. */ - ret = eeh_read_slot_reset_state(pdn, rets); - if ((ret == 0) && (rets[1] == 1)) + ret = eeh_ops->get_state(pdn->node, NULL); + if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) enable = 1; } @@ -1232,8 +1170,6 @@ void __init eeh_init(void) return; ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); - ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); - ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); ibm_configure_bridge = rtas_token("ibm,configure-bridge"); ibm_configure_pe = rtas_token("ibm,configure-pe"); diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 02eab3baf96..4c6e0c1cb1d 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -397,7 +397,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) /* Get the current PCI slot state. This can take a long time, * sometimes over 3 seconds for certain systems. */ rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000); - if (rc < 0) { + if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 2b9543a7823..39567b262de 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -238,7 +238,75 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) */ static int pseries_eeh_get_state(struct device_node *dn, int *state) { - return 0; + struct pci_dn *pdn; + int config_addr; + int ret; + int rets[4]; + int result; + + /* Figure out PE config address if possible */ + pdn = PCI_DN(dn); + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + + if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid)); + } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) { + /* Fake PE unavailable info */ + rets[2] = 0; + ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid)); + } else { + return EEH_STATE_NOT_SUPPORT; + } + + if (ret) + return ret; + + /* Parse the result out */ + result = 0; + if (rets[1]) { + switch(rets[0]) { + case 0: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + break; + case 1: + result |= EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + break; + case 2: + result &= ~EEH_STATE_RESET_ACTIVE; + result &= ~EEH_STATE_MMIO_ACTIVE; + result &= ~EEH_STATE_DMA_ACTIVE; + break; + case 4: + result &= ~EEH_STATE_RESET_ACTIVE; + result &= ~EEH_STATE_MMIO_ACTIVE; + result &= ~EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + break; + case 5: + if (rets[2]) { + if (state) *state = rets[2]; + result = EEH_STATE_UNAVAILABLE; + } else { + result = EEH_STATE_NOT_SUPPORT; + } + default: + result = EEH_STATE_NOT_SUPPORT; + } + } else { + result = EEH_STATE_NOT_SUPPORT; + } + + return result; } /** -- cgit v1.2.3-70-g09d2 From b0e5f742f1816bebedabeb6844be1a598bd50a91 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:03:58 +0000 Subject: powerpc/eeh: pseries platform EEH wait PE state On pSeries platform, the PE state might be temporarily unavailable. In that case, the firmware will return the corresponding wait time. That means the kernel has to wait for appropriate time in order to get the PE state. The patch does the implementation for that. Besides, the function has been abstracted through struct eeh_ops::wait_state so that EEH core components could support multiple platforms in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ppc-pci.h | 1 - arch/powerpc/platforms/pseries/eeh.c | 46 ++------------------------- arch/powerpc/platforms/pseries/eeh_driver.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 47 +++++++++++++++++++++++++++- 4 files changed, 49 insertions(+), 47 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 6150349ca9c..1cfb2b09bbd 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -58,7 +58,6 @@ struct pci_dev *pci_get_device_by_addr(unsigned long addr); void eeh_slot_error_detail (struct pci_dn *pdn, int severity); int eeh_pci_enable(struct pci_dn *pdn, int function); int eeh_reset_pe(struct pci_dn *); -int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs); void eeh_restore_bars(struct pci_dn *); void eeh_configure_bridge(struct pci_dn *); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 8d11f1f1732..b5b03d41fb4 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -286,48 +286,6 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); } -/** - * eeh_wait_for_slot_status - Returns error status of slot - * @pdn: pci device node - * @max_wait_msecs: maximum number to millisecs to wait - * - * Return negative value if a permanent error, else return - * Partition Endpoint (PE) status value. - * - * If @max_wait_msecs is positive, then this routine will - * sleep until a valid status can be obtained, or until - * the max allowed wait time is exceeded, in which case - * a -2 is returned. - */ -int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) -{ - int rc; - int mwait; - - while (1) { - rc = eeh_ops->get_state(pdn->node, &mwait); - if (rc != EEH_STATE_UNAVAILABLE) - return rc; - - if (max_wait_msecs <= 0) break; - - if (mwait <= 0) { - printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n", - mwait); - mwait = 1000; - } else if (mwait > 300*1000) { - printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n", - mwait); - mwait = 300*1000; - } - max_wait_msecs -= mwait; - msleep(mwait); - } - - printk(KERN_WARNING "EEH: Timed out waiting for slot status\n"); - return -2; -} - /** * eeh_token_to_phys - Convert EEH address token to phys address * @token: I/O token, should be address in the form 0xA.... @@ -640,7 +598,7 @@ int eeh_pci_enable(struct pci_dn *pdn, int function) printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", function, rc, pdn->node->full_name); - rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC); if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && (function == EEH_OPT_THAW_MMIO)) return 0; @@ -838,7 +796,7 @@ int eeh_reset_pe(struct pci_dn *pdn) for (i=0; i<3; i++) { eeh_reset_pe_once(pdn); - rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC); if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) return 0; diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 4c6e0c1cb1d..584defe1493 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -396,7 +396,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) /* Get the current PCI slot state. This can take a long time, * sometimes over 3 seconds for certain systems. */ - rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000); + rc = eeh_ops->wait_state(frozen_pdn->node, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 39567b262de..7b60131efe9 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -331,7 +331,52 @@ static int pseries_eeh_reset(struct device_node *dn, int option) */ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) { - return 0; + int ret; + int mwait; + + /* + * According to PAPR, the state of PE might be temporarily + * unavailable. Under the circumstance, we have to wait + * for indicated time determined by firmware. The maximal + * wait time is 5 minutes, which is acquired from the original + * EEH implementation. Also, the original implementation + * also defined the minimal wait time as 1 second. + */ +#define EEH_STATE_MIN_WAIT_TIME (1000) +#define EEH_STATE_MAX_WAIT_TIME (300 * 1000) + + while (1) { + ret = pseries_eeh_get_state(dn, &mwait); + + /* + * If the PE's state is temporarily unavailable, + * we have to wait for the specified time. Otherwise, + * the PE's state will be returned immediately. + */ + if (ret != EEH_STATE_UNAVAILABLE) + return ret; + + if (max_wait <= 0) { + pr_warning("%s: Timeout when getting PE's state (%d)\n", + __func__, max_wait); + return EEH_STATE_NOT_SUPPORT; + } + + if (mwait <= 0) { + pr_warning("%s: Firmware returned bad wait value %d\n", + __func__, mwait); + mwait = EEH_STATE_MIN_WAIT_TIME; + } else if (mwait > EEH_STATE_MAX_WAIT_TIME) { + pr_warning("%s: Firmware returned too long wait value %d\n", + __func__, mwait); + mwait = EEH_STATE_MAX_WAIT_TIME; + } + + max_wait -= mwait; + msleep(mwait); + } + + return EEH_STATE_NOT_SUPPORT; } /** -- cgit v1.2.3-70-g09d2 From 2652481f75186940c4608f68c9fd76b32ec9b159 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:03:59 +0000 Subject: powerpc/eeh: pseries platform EEH reset PE On RTAS compliant pSeries platform, there is a dedicated RTAS call (ibm,set-slot-reset) to reset the specified PE. Furthermore, two types of resets are supported: hot and fundamental. the type of reset is to be used actually depends on the included PCI device's requirements. The patch implements resetting PE on pSeries platform through RTAS call. Besides, it has been abstracted through struct eeh_ops::reset so that EEH core components could support multiple platforms in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 3 ++ arch/powerpc/platforms/pseries/eeh.c | 63 +++------------------------- arch/powerpc/platforms/pseries/eeh_pseries.c | 25 ++++++++++- 3 files changed, 33 insertions(+), 58 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 1d3c9e5fa08..894ea6c662c 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -49,6 +49,9 @@ struct device_node; #define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */ #define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */ #define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */ +#define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */ +#define EEH_RESET_HOT 1 /* Hot reset */ +#define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */ struct eeh_ops { char *name; diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index b5b03d41fb4..4f329f54881 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -87,7 +87,6 @@ #define PCI_BUS_RESET_WAIT_MSEC (60*1000) /* RTAS tokens */ -static int ibm_set_slot_reset; static int ibm_slot_error_detail; static int ibm_configure_bridge; static int ibm_configure_pe; @@ -606,54 +605,6 @@ int eeh_pci_enable(struct pci_dn *pdn, int function) return rc; } -/** - * eeh_slot_reset - Raises/Lowers the pci #RST line - * @pdn: pci device node - * @state: 1/0 to raise/lower the #RST - * - * Clear the EEH-frozen condition on a slot. This routine - * asserts the PCI #RST line if the 'state' argument is '1', - * and drops the #RST line if 'state is '0'. This routine is - * safe to call in an interrupt context. - */ -static void eeh_slot_reset(struct pci_dn *pdn, int state) -{ - int config_addr; - int rc; - - BUG_ON(pdn==NULL); - - if (!pdn->phb) { - printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n", - pdn->node->full_name); - return; - } - - /* Use PE configuration address, if present */ - config_addr = pdn->eeh_config_addr; - if (pdn->eeh_pe_config_addr) - config_addr = pdn->eeh_pe_config_addr; - - rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, - BUID_HI(pdn->phb->buid), - BUID_LO(pdn->phb->buid), - state); - - /* Fundamental-reset not supported on this PE, try hot-reset */ - if (rc == -8 && state == 3) { - rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, - BUID_HI(pdn->phb->buid), - BUID_LO(pdn->phb->buid), 1); - if (rc) - printk(KERN_WARNING - "EEH: Unable to reset the failed slot," - " #RST=%d dn=%s\n", - rc, pdn->node->full_name); - } -} - /** * pcibios_set_pcie_slot_reset - Set PCI-E reset state * @dev: pci device struct @@ -665,17 +616,16 @@ static void eeh_slot_reset(struct pci_dn *pdn, int state) int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { struct device_node *dn = pci_device_to_OF_node(dev); - struct pci_dn *pdn = PCI_DN(dn); switch (state) { case pcie_deassert_reset: - eeh_slot_reset(pdn, 0); + eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); break; case pcie_hot_reset: - eeh_slot_reset(pdn, 1); + eeh_ops->reset(dn, EEH_RESET_HOT); break; case pcie_warm_reset: - eeh_slot_reset(pdn, 3); + eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); break; default: return -EINVAL; @@ -754,9 +704,9 @@ static void eeh_reset_pe_once(struct pci_dn *pdn) eeh_set_pe_freset(pdn->node, &freset); if (freset) - eeh_slot_reset(pdn, 3); + eeh_ops->reset(pdn->node, EEH_RESET_FUNDAMENTAL); else - eeh_slot_reset(pdn, 1); + eeh_ops->reset(pdn->node, EEH_RESET_HOT); /* The PCI bus requires that the reset be held high for at least * a 100 milliseconds. We wait a bit longer 'just in case'. @@ -770,7 +720,7 @@ static void eeh_reset_pe_once(struct pci_dn *pdn) */ eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED); - eeh_slot_reset(pdn, 0); + eeh_ops->reset(pdn->node, EEH_RESET_DEACTIVATE); /* After a PCI slot has been reset, the PCI Express spec requires * a 1.5 second idle time for the bus to stabilize, before starting @@ -1127,7 +1077,6 @@ void __init eeh_init(void) if (np == NULL) return; - ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); ibm_configure_bridge = rtas_token("ibm,configure-bridge"); ibm_configure_pe = rtas_token("ibm,configure-pe"); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 7b60131efe9..6643e0677e9 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -318,7 +318,30 @@ static int pseries_eeh_get_state(struct device_node *dn, int *state) */ static int pseries_eeh_reset(struct device_node *dn, int option) { - return 0; + struct pci_dn *pdn; + int config_addr; + int ret; + + /* Figure out PE address */ + pdn = PCI_DN(dn); + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + + /* Reset PE through RTAS call */ + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), option); + + /* If fundamental-reset not supported, try hot-reset */ + if (option == EEH_RESET_FUNDAMENTAL && + ret == -8) { + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), EEH_RESET_HOT); + } + + return ret; } /** -- cgit v1.2.3-70-g09d2 From 8d633291b4fc0539ecad31f972447104be6953ec Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:04:00 +0000 Subject: powerpc/eeh: pseries platform EEH error log retrieval On RTAS compliant pSeries platform, one dedicated RTAS call has been introduced to retrieve EEH temporary or permanent error log. The patch implements the function of retriving EEH error log through RTAS call. Besides, it has been abstracted by struct eeh_ops::get_log so that EEH core components could support multiple platforms in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 2 + arch/powerpc/include/asm/ppc-pci.h | 2 - arch/powerpc/platforms/pseries/eeh.c | 63 +--------------------------- arch/powerpc/platforms/pseries/eeh_driver.c | 4 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 47 ++++++++++++++++++++- 5 files changed, 51 insertions(+), 67 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 894ea6c662c..ad8f31834e0 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -52,6 +52,8 @@ struct device_node; #define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */ #define EEH_RESET_HOT 1 /* Hot reset */ #define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */ +#define EEH_LOG_TEMP 1 /* EEH temporary error log */ +#define EEH_LOG_PERM 2 /* EEH permanent error log */ struct eeh_ops { char *name; diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 1cfb2b09bbd..bd1a84f6529 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -53,8 +53,6 @@ void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); void pci_addr_cache_build(void); struct pci_dev *pci_get_device_by_addr(unsigned long addr); -#define EEH_LOG_TEMP_FAILURE 1 -#define EEH_LOG_PERM_FAILURE 2 void eeh_slot_error_detail (struct pci_dn *pdn, int severity); int eeh_pci_enable(struct pci_dn *pdn, int function); int eeh_reset_pe(struct pci_dn *); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 4f329f54881..39fcecb1c16 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -87,7 +87,6 @@ #define PCI_BUS_RESET_WAIT_MSEC (60*1000) /* RTAS tokens */ -static int ibm_slot_error_detail; static int ibm_configure_bridge; static int ibm_configure_pe; @@ -100,14 +99,6 @@ EXPORT_SYMBOL(eeh_subsystem_enabled); /* Lock to avoid races due to multiple reports of an error */ static DEFINE_RAW_SPINLOCK(confirm_error_lock); -/* Buffer for reporting slot-error-detail rtas calls. Its here - * in BSS, and not dynamically alloced, so that it ends up in - * RMO where RTAS can access it. - */ -static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; -static DEFINE_SPINLOCK(slot_errbuf_lock); -static int eeh_error_buf_size; - /* Buffer for reporting pci register dumps. Its here in BSS, and * not dynamically alloced, so that it ends up in RMO where RTAS * can access it. @@ -126,46 +117,6 @@ static unsigned long slot_resets; #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) -/** - * eeh_rtas_slot_error_detail - Retrieve error log through RTAS call - * @pdn: device node - * @severity: temporary or permanent error log - * @driver_log: driver log to be combined with the retrieved error log - * @loglen: length of driver log - * - * This routine should be called to retrieve error log through the dedicated - * RTAS call. - */ -static void eeh_rtas_slot_error_detail(struct pci_dn *pdn, int severity, - char *driver_log, size_t loglen) -{ - int config_addr; - unsigned long flags; - int rc; - - /* Log the error with the rtas logger */ - spin_lock_irqsave(&slot_errbuf_lock, flags); - memset(slot_errbuf, 0, eeh_error_buf_size); - - /* Use PE configuration address, if present */ - config_addr = pdn->eeh_config_addr; - if (pdn->eeh_pe_config_addr) - config_addr = pdn->eeh_pe_config_addr; - - rc = rtas_call(ibm_slot_error_detail, - 8, 1, NULL, config_addr, - BUID_HI(pdn->phb->buid), - BUID_LO(pdn->phb->buid), - virt_to_phys(driver_log), loglen, - virt_to_phys(slot_errbuf), - eeh_error_buf_size, - severity); - - if (rc == 0) - log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); - spin_unlock_irqrestore(&slot_errbuf_lock, flags); -} - /** * eeh_gather_pci_data - Copy assorted PCI config space registers to buff * @pdn: device to report data for @@ -282,7 +233,7 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) eeh_restore_bars(pdn); loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); - eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); + eeh_ops->get_log(pdn->node, severity, pci_regs_buf, loglen); } /** @@ -1071,26 +1022,14 @@ void __init eeh_init(void) } raw_spin_lock_init(&confirm_error_lock); - spin_lock_init(&slot_errbuf_lock); np = of_find_node_by_path("/rtas"); if (np == NULL) return; - ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); ibm_configure_bridge = rtas_token("ibm,configure-bridge"); ibm_configure_pe = rtas_token("ibm,configure-pe"); - eeh_error_buf_size = rtas_token("rtas-error-log-max"); - if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - eeh_error_buf_size = 1024; - } - if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated " - "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX); - eeh_error_buf_size = RTAS_ERROR_LOG_MAX; - } - /* Enable EEH for all adapters. Note that eeh requires buid's */ for (phb = of_find_node_by_name(NULL, "pci"); phb; phb = of_find_node_by_name(phb, "pci")) { diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 584defe1493..68403573a1f 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -406,7 +406,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) * don't post the error log until after all dev drivers * have been informed. */ - eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE); + eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they @@ -497,7 +497,7 @@ hard_fail: location, drv_str, pci_str); perm_error: - eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE); + eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ pci_walk_bus(frozen_bus, eeh_report_failure, NULL); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 6643e0677e9..7c8434f902b 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -56,6 +56,15 @@ static int ibm_get_config_addr_info2; static int ibm_configure_bridge; static int ibm_configure_pe; +/* + * Buffer for reporting slot-error-detail rtas calls. Its here + * in BSS, and not dynamically alloced, so that it ends up in + * RMO where RTAS can access it. + */ +static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; +static DEFINE_SPINLOCK(slot_errbuf_lock); +static int eeh_error_buf_size; + /** * pseries_eeh_init - EEH platform dependent initialization * @@ -107,6 +116,19 @@ static int pseries_eeh_init(void) return -EINVAL; } + /* Initialize error log lock and size */ + spin_lock_init(&slot_errbuf_lock); + eeh_error_buf_size = rtas_token("rtas-error-log-max"); + if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { + pr_warning("%s: unknown EEH error log size\n", + __func__); + eeh_error_buf_size = 1024; + } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { + pr_warning("%s: EEH error log size %d exceeds the maximal %d\n", + __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); + eeh_error_buf_size = RTAS_ERROR_LOG_MAX; + } + return 0; } @@ -415,7 +437,30 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) */ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len) { - return 0; + struct pci_dn *pdn; + int config_addr; + unsigned long flags; + int ret; + + pdn = PCI_DN(dn); + spin_lock_irqsave(&slot_errbuf_lock, flags); + memset(slot_errbuf, 0, eeh_error_buf_size); + + /* Figure out the PE address */ + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + + ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr, + BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid), + virt_to_phys(drv_log), len, + virt_to_phys(slot_errbuf), eeh_error_buf_size, + severity); + if (!ret) + log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); + spin_unlock_irqrestore(&slot_errbuf_lock, flags); + + return ret; } /** -- cgit v1.2.3-70-g09d2 From 1823fbf119e434dd3fb8eb7482613994c09d8527 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:04:01 +0000 Subject: powerpc/eeh: pseries platform EEH configure bridge In order to enable particular PCI device, which has been included in the parent PE. The involved PCI bridges should be enabled explicitly if there has. On pSeries platform, there're dedicated RTAS calls to fulfil the purpose. The patch implements the function of configuring PCI bridges through the dedicated RTAS calls. Besides, the function has been abstracted by struct eeh_ops::configure_bridge so that the EEH core components could support multiple platforms in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ppc-pci.h | 1 - arch/powerpc/platforms/pseries/eeh.c | 44 +--------------------------- arch/powerpc/platforms/pseries/eeh_driver.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 29 +++++++++++++++++- 4 files changed, 30 insertions(+), 46 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index bd1a84f6529..b4b18d82b79 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -57,7 +57,6 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity); int eeh_pci_enable(struct pci_dn *pdn, int function); int eeh_reset_pe(struct pci_dn *); void eeh_restore_bars(struct pci_dn *); -void eeh_configure_bridge(struct pci_dn *); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_mark_slot(struct device_node *dn, int mode_flag); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 39fcecb1c16..bd4ed83863f 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -86,10 +86,6 @@ /* Time to wait for a PCI slot to report status, in milliseconds */ #define PCI_BUS_RESET_WAIT_MSEC (60*1000) -/* RTAS tokens */ -static int ibm_configure_bridge; -static int ibm_configure_pe; - /* Platform dependent EEH operations */ struct eeh_ops *eeh_ops = NULL; @@ -229,7 +225,7 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) pci_regs_buf[0] = 0; eeh_pci_enable(pdn, EEH_OPT_THAW_MMIO); - eeh_configure_bridge(pdn); + eeh_ops->configure_bridge(pdn->node); eeh_restore_bars(pdn); loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); @@ -809,41 +805,6 @@ static void eeh_save_bars(struct pci_dn *pdn) rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]); } -/** - * eeh_configure_bridge - Configure PCI bridges for the indicated PE - * @pdn: PCI device node - * - * PCI bridges might be included in PE. In order to make the PE work - * again. The included PCI bridges should be recovered after the PE - * encounters frozen state. - */ -void eeh_configure_bridge(struct pci_dn *pdn) -{ - int config_addr; - int rc; - int token; - - /* Use PE configuration address, if present */ - config_addr = pdn->eeh_config_addr; - if (pdn->eeh_pe_config_addr) - config_addr = pdn->eeh_pe_config_addr; - - /* Use new configure-pe function, if supported */ - if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) - token = ibm_configure_pe; - else - token = ibm_configure_bridge; - - rc = rtas_call(token, 3, 1, NULL, - config_addr, - BUID_HI(pdn->phb->buid), - BUID_LO(pdn->phb->buid)); - if (rc) { - printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", - rc, pdn->node->full_name); - } -} - /** * eeh_early_enable - Early enable EEH on the indicated device * @dn: device node @@ -1027,9 +988,6 @@ void __init eeh_init(void) if (np == NULL) return; - ibm_configure_bridge = rtas_token("ibm,configure-bridge"); - ibm_configure_pe = rtas_token("ibm,configure-pe"); - /* Enable EEH for all adapters. Note that eeh requires buid's */ for (phb = of_find_node_by_name(NULL, "pci"); phb; phb = of_find_node_by_name(phb, "pci")) { diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 68403573a1f..61450e1b3f7 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -295,7 +295,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) struct pci_dn *ppe = PCI_DN(dn); /* On Power4, always true because eeh_pe_config_addr=0 */ if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) { - eeh_configure_bridge(ppe); + eeh_ops->configure_bridge(dn); eeh_restore_bars(ppe); } dn = dn->sibling; diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 7c8434f902b..4ed06b24ba4 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -473,7 +473,34 @@ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_l */ static int pseries_eeh_configure_bridge(struct device_node *dn) { - return 0; + struct pci_dn *pdn; + int config_addr; + int ret; + + /* Figure out the PE address */ + pdn = PCI_DN(dn); + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + + /* Use new configure-pe function, if supported */ + if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid)); + } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, + config_addr, BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid)); + } else { + return -EFAULT; + } + + if (ret) + pr_warning("%s: Unable to configure bridge %d for %s\n", + __func__, ret, dn->full_name); + + return ret; } static struct eeh_ops pseries_eeh_ops = { -- cgit v1.2.3-70-g09d2 From 29f8bf1b7f79f689f33e9c4679d182a68b6a5b2c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:04:02 +0000 Subject: powerpc/pseries: Cleanup comments in EEH aux components There're several EEH aux components and the patch does some cleanup for them so that they look more clean. * Duplicated comments have been removed from the header file. * Comments have been reorganized so that it looks more clean. * The leading comments of functions are adjusted for a little bit so that the result of "make pdfdocs" would be more unified. * Function calls "xxx ()" has been replaced by "xxx()". Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh_event.h | 34 ++----- arch/powerpc/platforms/pseries/eeh_cache.c | 9 +- arch/powerpc/platforms/pseries/eeh_driver.c | 136 +++++++++++++++++----------- arch/powerpc/platforms/pseries/eeh_event.c | 23 +++-- arch/powerpc/platforms/pseries/eeh_sysfs.c | 2 +- 5 files changed, 107 insertions(+), 97 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index cc3cb04539a..25ebf6a5aa5 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -1,6 +1,4 @@ /* - * eeh_event.h - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -22,32 +20,20 @@ #define ASM_POWERPC_EEH_EVENT_H #ifdef __KERNEL__ -/** EEH event -- structure holding pci controller data that describes - * a change in the isolation status of a PCI slot. A pointer - * to this struct is passed as the data pointer in a notify callback. +/* + * structure holding pci controller data that describes a + * change in the isolation status of a PCI slot. A pointer + * to this struct is passed as the data pointer in a notify + * callback. */ struct eeh_event { - struct list_head list; - struct device_node *dn; /* struct device node */ - struct pci_dev *dev; /* affected device */ + struct list_head list; /* to form event queue */ + struct device_node *dn; /* struct device node */ + struct pci_dev *dev; /* affected device */ }; -/** - * eeh_send_failure_event - generate a PCI error event - * @dev pci device - * - * This routine builds a PCI error event which will be delivered - * to all listeners on the eeh_notifier_chain. - * - * This routine can be called within an interrupt context; - * the actual event will be delivered in a normal context - * (from a workqueue). - */ -int eeh_send_failure_event (struct device_node *dn, - struct pci_dev *dev); - -/* Main recovery function */ -struct pci_dn * handle_eeh_events (struct eeh_event *); +int eeh_send_failure_event(struct device_node *dn, struct pci_dev *dev); +struct pci_dn *handle_eeh_events(struct eeh_event *); #endif /* __KERNEL__ */ #endif /* ASM_POWERPC_EEH_EVENT_H */ diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index fc5ae767989..850c00c166d 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -1,5 +1,4 @@ /* - * eeh_cache.c * PCI address cache; allows the lookup of PCI devices based on I/O address * * Copyright IBM Corporation 2004 @@ -47,8 +46,7 @@ * than any hash algo I could think of for this problem, even * with the penalty of slow pointer chases for d-cache misses). */ -struct pci_io_addr_range -{ +struct pci_io_addr_range { struct rb_node rb_node; unsigned long addr_lo; unsigned long addr_hi; @@ -56,8 +54,7 @@ struct pci_io_addr_range unsigned int flags; }; -static struct pci_io_addr_cache -{ +static struct pci_io_addr_cache { struct rb_root rb_root; spinlock_t piar_lock; } pci_io_addr_cache_root; @@ -166,7 +163,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, #ifdef DEBUG printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", - alo, ahi, pci_name (dev)); + alo, ahi, pci_name(dev)); #endif rb_link_node(&piar->rb_node, parent, p); diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 61450e1b3f7..3f25fab741e 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -33,8 +33,14 @@ #include #include - -static inline const char * pcid_name (struct pci_dev *pdev) +/** + * eeh_pcid_name - Retrieve name of PCI device driver + * @pdev: PCI device + * + * This routine is used to retrieve the name of PCI device driver + * if that's valid. + */ +static inline const char *pcid_name(struct pci_dev *pdev) { if (pdev && pdev->dev.driver) return pdev->dev.driver->name; @@ -64,7 +70,14 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent) #endif /** - * eeh_disable_irq - disable interrupt for the recovering device + * eeh_disable_irq - Disable interrupt for the recovering device + * @dev: PCI device + * + * This routine must be called when reporting temporary or permanent + * error to the particular PCI device to disable interrupt of that + * device. If the device has enabled MSI or MSI-X interrupt, we needn't + * do real work because EEH should freeze DMA transfers for those PCI + * devices encountering EEH errors, which includes MSI or MSI-X. */ static void eeh_disable_irq(struct pci_dev *dev) { @@ -73,7 +86,7 @@ static void eeh_disable_irq(struct pci_dev *dev) /* Don't disable MSI and MSI-X interrupts. They are * effectively disabled by the DMA Stopped state * when an EEH error occurs. - */ + */ if (dev->msi_enabled || dev->msix_enabled) return; @@ -85,7 +98,11 @@ static void eeh_disable_irq(struct pci_dev *dev) } /** - * eeh_enable_irq - enable interrupt for the recovering device + * eeh_enable_irq - Enable interrupt for the recovering device + * @dev: PCI device + * + * This routine must be called to enable interrupt while failed + * device could be resumed. */ static void eeh_enable_irq(struct pci_dev *dev) { @@ -97,15 +114,15 @@ static void eeh_enable_irq(struct pci_dev *dev) } } -/* ------------------------------------------------------- */ /** - * eeh_report_error - report pci error to each device driver + * eeh_report_error - Report pci error to each device driver + * @dev: PCI device + * @userdata: return value * * Report an EEH error to each device driver, collect up and * merge the device driver responses. Cumulative response * passed back in "userdata". */ - static int eeh_report_error(struct pci_dev *dev, void *userdata) { enum pci_ers_result rc, *res = userdata; @@ -122,7 +139,7 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata) !driver->err_handler->error_detected) return 0; - rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); + rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); /* A driver that needs a reset trumps all others */ if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; @@ -132,13 +149,14 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata) } /** - * eeh_report_mmio_enabled - tell drivers that MMIO has been enabled + * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled + * @dev: PCI device + * @userdata: return value * * Tells each device driver that IO ports, MMIO and config space I/O * are now enabled. Collects up and merges the device driver responses. * Cumulative response passed back in "userdata". */ - static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) { enum pci_ers_result rc, *res = userdata; @@ -149,7 +167,7 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) !driver->err_handler->mmio_enabled) return 0; - rc = driver->err_handler->mmio_enabled (dev); + rc = driver->err_handler->mmio_enabled(dev); /* A driver that needs a reset trumps all others */ if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; @@ -159,9 +177,15 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) } /** - * eeh_report_reset - tell device that slot has been reset + * eeh_report_reset - Tell device that slot has been reset + * @dev: PCI device + * @userdata: return value + * + * This routine must be called while EEH tries to reset particular + * PCI device so that the associated PCI device driver could take + * some actions, usually to save data the driver needs so that the + * driver can work again while the device is recovered. */ - static int eeh_report_reset(struct pci_dev *dev, void *userdata) { enum pci_ers_result rc, *res = userdata; @@ -188,9 +212,14 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata) } /** - * eeh_report_resume - tell device to resume normal operations + * eeh_report_resume - Tell device to resume normal operations + * @dev: PCI device + * @userdata: return value + * + * This routine must be called to notify the device driver that it + * could resume so that the device driver can do some initialization + * to make the recovered device work again. */ - static int eeh_report_resume(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; @@ -212,12 +241,13 @@ static int eeh_report_resume(struct pci_dev *dev, void *userdata) } /** - * eeh_report_failure - tell device driver that device is dead. + * eeh_report_failure - Tell device driver that device is dead. + * @dev: PCI device + * @userdata: return value * * This informs the device driver that the device is permanently * dead, and that no further recovery attempts will be made on it. */ - static int eeh_report_failure(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; @@ -238,37 +268,16 @@ static int eeh_report_failure(struct pci_dev *dev, void *userdata) return 0; } -/* ------------------------------------------------------- */ /** - * handle_eeh_events -- reset a PCI device after hard lockup. - * - * pSeries systems will isolate a PCI slot if the PCI-Host - * bridge detects address or data parity errors, DMA's - * occurring to wild addresses (which usually happen due to - * bugs in device drivers or in PCI adapter firmware). - * Slot isolations also occur if #SERR, #PERR or other misc - * PCI-related errors are detected. + * eeh_reset_device - Perform actual reset of a pci slot + * @pe_dn: PE associated device node + * @bus: PCI bus corresponding to the isolcated slot * - * Recovery process consists of unplugging the device driver - * (which generated hotplug events to userspace), then issuing - * a PCI #RST to the device, then reconfiguring the PCI config - * space for all bridges & devices under this slot, and then - * finally restarting the device drivers (which cause a second - * set of hotplug events to go out to userspace). + * This routine must be called to do reset on the indicated PE. + * During the reset, udev might be invoked because those affected + * PCI devices will be removed and then added. */ - -/** - * eeh_reset_device() -- perform actual reset of a pci slot - * @bus: pointer to the pci bus structure corresponding - * to the isolated slot. A non-null value will - * cause all devices under the bus to be removed - * and then re-added. - * @pe_dn: pointer to a "Partionable Endpoint" device node. - * This is the top-level structure on which pci - * bus resets can be performed. - */ - -static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) +static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus) { struct device_node *dn; int cnt, rc; @@ -281,12 +290,13 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system - * up if the reset failed for some reason. */ + * up if the reset failed for some reason. + */ rc = eeh_reset_pe(pe_dn); if (rc) return rc; - /* Walk over all functions on this device. */ + /* Walk over all functions on this device. */ dn = pe_dn->node; if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) dn = dn->parent->child; @@ -308,7 +318,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) * potentially weird things happen. */ if (bus) { - ssleep (5); + ssleep(5); pcibios_add_pci_devices(bus); } pe_dn->eeh_freeze_count = cnt; @@ -321,7 +331,24 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) */ #define MAX_WAIT_FOR_RECOVERY 150 -struct pci_dn * handle_eeh_events (struct eeh_event *event) +/** + * eeh_handle_event - Reset a PCI device after hard lockup. + * @event: EEH event + * + * While PHB detects address or data parity errors on particular PCI + * slot, the associated PE will be frozen. Besides, DMA's occurring + * to wild addresses (which usually happen due to bugs in device + * drivers or in PCI adapter firmware) can cause EEH error. #SERR, + * #PERR or other misc PCI-related errors also can trigger EEH errors. + * + * Recovery process consists of unplugging the device driver (which + * generated hotplug events to userspace), then issuing a PCI #RST to + * the device, then reconfiguring the PCI config space for all bridges + * & devices under this slot, and then finally restarting the device + * drivers (which cause a second set of hotplug events to go out to + * userspace). + */ +struct pci_dn *handle_eeh_events(struct eeh_event *event) { struct device_node *frozen_dn; struct pci_dn *frozen_pdn; @@ -350,9 +377,10 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) * which was always an EADS pci bridge. In the new style, * there might not be any EADS bridges, and even when there are, * the firmware marks them as "EEH incapable". So another - * two-step is needed to find the pci bus.. */ + * two-step is needed to find the pci bus.. + */ if (!frozen_bus) - frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); + frozen_bus = pcibios_find_pci_bus(frozen_dn->parent); if (!frozen_bus) { printk(KERN_ERR "EEH: Cannot find PCI bus " @@ -395,7 +423,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) pci_walk_bus(frozen_bus, eeh_report_error, &result); /* Get the current PCI slot state. This can take a long time, - * sometimes over 3 seconds for certain systems. */ + * sometimes over 3 seconds for certain systems. + */ rc = eeh_ops->wait_state(frozen_pdn->node, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); @@ -508,4 +537,3 @@ perm_error: return NULL; } -/* ---------- end of file ---------- */ diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index d2383cfb6df..e98347cb9cc 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -1,6 +1,4 @@ /* - * eeh_event.c - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -46,7 +44,7 @@ DECLARE_WORK(eeh_event_wq, eeh_thread_launcher); DEFINE_MUTEX(eeh_event_mutex); /** - * eeh_event_handler - dispatch EEH events. + * eeh_event_handler - Dispatch EEH events. * @dummy - unused * * The detection of a frozen slot can occur inside an interrupt, @@ -61,7 +59,7 @@ static int eeh_event_handler(void * dummy) struct eeh_event *event; struct pci_dn *pdn; - daemonize ("eehd"); + daemonize("eehd"); set_current_state(TASK_INTERRUPTIBLE); spin_lock_irqsave(&eeh_eventlist_lock, flags); @@ -93,7 +91,7 @@ static int eeh_event_handler(void * dummy) /* If there are no new errors after an hour, clear the counter. */ if (pdn && pdn->eeh_freeze_count>0) { - msleep_interruptible (3600*1000); + msleep_interruptible(3600*1000); if (pdn->eeh_freeze_count>0) pdn->eeh_freeze_count--; } @@ -102,8 +100,11 @@ static int eeh_event_handler(void * dummy) } /** - * eeh_thread_launcher + * eeh_thread_launcher - Start kernel thread to handle EEH events * @dummy - unused + * + * This routine is called to start the kernel thread for processing + * EEH event. */ static void eeh_thread_launcher(struct work_struct *dummy) { @@ -112,14 +113,14 @@ static void eeh_thread_launcher(struct work_struct *dummy) } /** - * eeh_send_failure_event - generate a PCI error event - * @dev pci device + * eeh_send_failure_event - Generate a PCI error event + * @dev: pci device * * This routine can be called within an interrupt context; * the actual event will be delivered in a normal context * (from a workqueue). */ -int eeh_send_failure_event (struct device_node *dn, +int eeh_send_failure_event(struct device_node *dn, struct pci_dev *dev) { unsigned long flags; @@ -135,7 +136,7 @@ int eeh_send_failure_event (struct device_node *dn, } event = kmalloc(sizeof(*event), GFP_ATOMIC); if (event == NULL) { - printk (KERN_ERR "EEH: out of memory, event not handled\n"); + printk(KERN_ERR "EEH: out of memory, event not handled\n"); return 1; } @@ -154,5 +155,3 @@ int eeh_send_failure_event (struct device_node *dn, return 0; } - -/********************** END OF FILE ******************************/ diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c index eb744ee234d..5e4eab1f444 100644 --- a/arch/powerpc/platforms/pseries/eeh_sysfs.c +++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c @@ -28,7 +28,7 @@ #include /** - * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic + * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic * @_name: name of file in sysfs directory * @_memb: name of member in struct pci_dn to access * @_format: printf format for display -- cgit v1.2.3-70-g09d2 From def9d83da4f8587dbad5ea57c57d91e53a51006a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:04:03 +0000 Subject: powerpc/eeh: Cleanup function names in EEH aux components The patch does some cleanup on the function names of EEH aux components. Currently, only couple of function names from eeh_cache have been adjusted so that: * The function name has prefix "eeh_addr_cache". * Move around pci_addr_cache_build() in the header file to reflect function call sequence. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ppc-pci.h | 4 ++-- arch/powerpc/platforms/pseries/eeh.c | 2 +- arch/powerpc/platforms/pseries/eeh_cache.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index b4b18d82b79..c02d5a76cac 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -49,10 +49,10 @@ extern unsigned long pci_probe_only; #ifdef CONFIG_EEH +void pci_addr_cache_build(void); void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); -void pci_addr_cache_build(void); -struct pci_dev *pci_get_device_by_addr(unsigned long addr); +struct pci_dev *pci_addr_cache_get_device(unsigned long addr); void eeh_slot_error_detail (struct pci_dn *pdn, int severity); int eeh_pci_enable(struct pci_dn *pdn, int function); int eeh_reset_pe(struct pci_dn *); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index bd4ed83863f..646b520e6be 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -511,7 +511,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon /* Finding the phys addr + pci device; this is pretty quick. */ addr = eeh_token_to_phys((unsigned long __force) token); - dev = pci_get_device_by_addr(addr); + dev = pci_addr_cache_get_device(addr); if (!dev) { no_device++; return val; diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index 850c00c166d..7c36a9cc78c 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -59,7 +59,7 @@ static struct pci_io_addr_cache { spinlock_t piar_lock; } pci_io_addr_cache_root; -static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) +static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) { struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; @@ -83,7 +83,7 @@ static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) } /** - * pci_get_device_by_addr - Get device, given only address + * pci_addr_cache_get_device - Get device, given only address * @addr: mmio (PIO) phys address or i/o port number * * Given an mmio phys address, or a port number, find a pci device @@ -92,13 +92,13 @@ static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) * from zero (that is, they do *not* have pci_io_addr added in). * It is safe to call this function within an interrupt. */ -struct pci_dev *pci_get_device_by_addr(unsigned long addr) +struct pci_dev *pci_addr_cache_get_device(unsigned long addr) { struct pci_dev *dev; unsigned long flags; spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - dev = __pci_get_device_by_addr(addr); + dev = __pci_addr_cache_get_device(addr); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); return dev; } -- cgit v1.2.3-70-g09d2 From eb740b5f3e6559a8f1c22e2505914d07f9632881 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:04:04 +0000 Subject: powerpc/eeh: Introduce EEH device Original EEH implementation depends on struct pci_dn heavily. However, EEH shouldn't depend on that actually because EEH needn't share much information with other PCI components. That's to say, EEH should have worked independently. The patch introduces struct eeh_dev so that EEH core components needn't be working based on struct pci_dn in future. Also, struct pci_dn, struct eeh_dev instances are created in dynamic fasion and the binding with EEH device, OF node, PCI device is implemented as well. The EEH devices are created after PHBs are detected and initialized, but PCI emunation hasn't started yet. Apart from that, PHB might be created dynamically through DLPAR component and the EEH devices should be creatd as well. Another case might be OF node is created dynamically by DR (Dynamic Reconfiguration), which has been defined by PAPR. For those OF nodes created by DR, EEH devices should be also created accordingly. The binding between EEH device and OF node is done while the EEH device is initially created. The binding between EEH device and PCI device should be done after PCI emunation is done. Besides, PCI hotplug also needs the binding so that the EEH devices could be traced from the newly coming PCI buses or PCI devices. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/device.h | 3 + arch/powerpc/include/asm/eeh.h | 57 ++++++++++++++-- arch/powerpc/kernel/of_platform.c | 6 +- arch/powerpc/kernel/rtas_pci.c | 3 + arch/powerpc/platforms/pseries/Makefile | 3 +- arch/powerpc/platforms/pseries/eeh_dev.c | 102 +++++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/pci_dlpar.c | 3 + arch/powerpc/platforms/pseries/setup.c | 6 +- include/linux/of.h | 10 +++ include/linux/pci.h | 7 ++ 10 files changed, 190 insertions(+), 10 deletions(-) create mode 100644 arch/powerpc/platforms/pseries/eeh_dev.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index d57c08acedf..63d5ca49cec 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -31,6 +31,9 @@ struct dev_archdata { #ifdef CONFIG_SWIOTLB dma_addr_t max_direct_dma_addr; #endif +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif }; struct pdev_archdata { diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index ad8f31834e0..daaad91ed57 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -31,6 +31,43 @@ struct device_node; #ifdef CONFIG_EEH +/* + * The struct is used to trace EEH state for the associated + * PCI device node or PCI device. In future, it might + * represent PE as well so that the EEH device to form + * another tree except the currently existing tree of PCI + * buses and PCI devices + */ +#define EEH_MODE_SUPPORTED (1<<0) /* EEH supported on the device */ +#define EEH_MODE_NOCHECK (1<<1) /* EEH check should be skipped */ +#define EEH_MODE_ISOLATED (1<<2) /* The device has been isolated */ +#define EEH_MODE_RECOVERING (1<<3) /* Recovering the device */ +#define EEH_MODE_IRQ_DISABLED (1<<4) /* Interrupt disabled */ + +struct eeh_dev { + int mode; /* EEH mode */ + int class_code; /* Class code of the device */ + int config_addr; /* Config address */ + int pe_config_addr; /* PE config address */ + int check_count; /* Times of ignored error */ + int freeze_count; /* Times of froze up */ + int false_positives; /* Times of reported #ff's */ + u32 config_space[16]; /* Saved PCI config space */ + struct pci_controller *phb; /* Associated PHB */ + struct device_node *dn; /* Associated device node */ + struct pci_dev *pdev; /* Associated PCI device */ +}; + +static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) +{ + return edev->dn; +} + +static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) +{ + return edev->pdev; +} + /* * The struct is used to trace the registered EEH operation * callback functions. Actually, those operation callback @@ -70,19 +107,15 @@ struct eeh_ops { extern struct eeh_ops *eeh_ops; extern int eeh_subsystem_enabled; -/* Values for eeh_mode bits in device_node */ -#define EEH_MODE_SUPPORTED (1<<0) -#define EEH_MODE_NOCHECK (1<<1) -#define EEH_MODE_ISOLATED (1<<2) -#define EEH_MODE_RECOVERING (1<<3) -#define EEH_MODE_IRQ_DISABLED (1<<4) - /* * Max number of EEH freezes allowed before we consider the device * to be permanently disabled. */ #define EEH_MAX_ALLOWED_FREEZES 5 +void * __devinit eeh_dev_init(struct device_node *dn, void *data); +void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb); +void __init eeh_dev_phb_init(void); void __init eeh_init(void); #ifdef CONFIG_PPC_PSERIES int __init eeh_pseries_init(void); @@ -113,6 +146,16 @@ void eeh_remove_bus_device(struct pci_dev *); #define EEH_IO_ERROR_VALUE(size) (~0U >> ((4 - (size)) * 8)) #else /* !CONFIG_EEH */ + +static inline void *eeh_dev_init(struct device_node *dn, void *data) +{ + return NULL; +} + +static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } + +static inline void eeh_dev_phb_init(void) { } + static inline void eeh_init(void) { } #ifdef CONFIG_PPC_PSERIES diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index e1612dfb4a9..2049f2d00ff 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -21,12 +21,13 @@ #include #include #include +#include #include #include #include #include -#include +#include #ifdef CONFIG_PPC_OF_PLATFORM_PCI @@ -66,6 +67,9 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev) /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); + /* Create EEH devices for the PHB */ + eeh_dev_phb_init_dynamic(phb); + /* Register devices with EEH */ #ifdef CONFIG_EEH if (dev->dev.of_node->child) diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 6cd8f0196b6..517bd86bc3f 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -275,6 +275,9 @@ void __init find_and_init_phbs(void) of_node_put(root); pci_devs_phb_init(); + /* Create EEH devices for all PHBs */ + eeh_dev_phb_init(); + /* * pci_probe_only and pci_assign_all_buses can be set via properties * in chosen. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index ee873cf4fe4..c222189f5bb 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,7 +6,8 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o eeh_pseries.o +obj-$(CONFIG_EEH) += eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \ + eeh_event.o eeh_sysfs.o eeh_pseries.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_PCI) += pci.o pci_dlpar.o obj-$(CONFIG_PSERIES_MSI) += msi.o diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c new file mode 100644 index 00000000000..f3aed7dcae9 --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_dev.c @@ -0,0 +1,102 @@ +/* + * The file intends to implement dynamic creation of EEH device, which will + * be bound with OF node and PCI device simutaneously. The EEH devices would + * be foundamental information for EEH core components to work proerly. Besides, + * We have to support multiple situations where dynamic creation of EEH device + * is required: + * + * 1) Before PCI emunation starts, we need create EEH devices according to the + * PCI sensitive OF nodes. + * 2) When PCI emunation is done, we need do the binding between PCI device and + * the associated EEH device. + * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device + * will be created while PCI sensitive OF node is detected from DR. + * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If + * PHB is newly inserted, we also need create EEH devices accordingly. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * eeh_dev_init - Create EEH device according to OF node + * @dn: device node + * @data: PHB + * + * It will create EEH device according to the given OF node. The function + * might be called by PCI emunation, DR, PHB hotplug. + */ +void * __devinit eeh_dev_init(struct device_node *dn, void *data) +{ + struct pci_controller *phb = data; + struct eeh_dev *edev; + + /* Allocate EEH device */ + edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL); + if (!edev) { + pr_warning("%s: out of memory\n", __func__); + return NULL; + } + + /* Associate EEH device with OF node */ + dn->edev = edev; + edev->dn = dn; + edev->phb = phb; + + return NULL; +} + +/** + * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB + * @phb: PHB + * + * Scan the PHB OF node and its child association, then create the + * EEH devices accordingly + */ +void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb) +{ + struct device_node *dn = phb->dn; + + /* EEH device for PHB */ + eeh_dev_init(dn, phb); + + /* EEH devices for children OF nodes */ + traverse_pci_devices(dn, eeh_dev_init, phb); +} + +/** + * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs + * + * Scan all the existing PHBs and create EEH devices for their OF + * nodes and their children OF nodes + */ +void __init eeh_dev_phb_init(void) +{ + struct pci_controller *phb, *tmp; + + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + eeh_dev_phb_init_dynamic(phb); +} diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 55d4ec1bd1a..fbb21fc3080 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -147,6 +147,9 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) pci_devs_phb_init_dynamic(phb); + /* Create EEH devices for the PHB */ + eeh_dev_phb_init_dynamic(phb); + if (dn->child) eeh_add_device_tree_early(dn); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 62b827626ca..8f137af616a 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -260,8 +260,12 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act switch (action) { case PSERIES_RECONFIG_ADD: pci = np->parent->data; - if (pci) + if (pci) { update_dn_pci_info(np, pci->phb); + + /* Create EEH device for the OF node */ + eeh_dev_init(np, pci->phb); + } break; default: err = NOTIFY_DONE; diff --git a/include/linux/of.h b/include/linux/of.h index a75a831e205..3e710d87808 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -58,6 +58,9 @@ struct device_node { struct kref kref; unsigned long _flags; void *data; +#if defined(CONFIG_EEH) + struct eeh_dev *edev; +#endif #if defined(CONFIG_SPARC) char *path_component_name; unsigned int unique_id; @@ -72,6 +75,13 @@ struct of_phandle_args { uint32_t args[MAX_PHANDLE_ARGS]; }; +#if defined(CONFIG_EEH) +static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn) +{ + return dn->edev; +} +#endif + #if defined(CONFIG_SPARC) || !defined(CONFIG_OF) /* Dummy ref counting routines - to be implemented later */ static inline struct device_node *of_node_get(struct device_node *node) diff --git a/include/linux/pci.h b/include/linux/pci.h index a16b1df3def..cfeee2a6a5a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1647,6 +1647,13 @@ static inline void pci_set_bus_of_node(struct pci_bus *bus) { } static inline void pci_release_bus_of_node(struct pci_bus *bus) { } #endif /* CONFIG_OF */ +#ifdef CONFIG_EEH +static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) +{ + return pdev->dev.archdata.edev; +} +#endif + /** * pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device * @pdev: the PCI device -- cgit v1.2.3-70-g09d2 From f631acd3e98c31b21937682d2b3f39bf9333a18e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:04:07 +0000 Subject: powerpc/eeh: Replace pci_dn with eeh_dev for EEH core The original EEH implementation is heavily depending on struct pci_dn. We have to put EEH related information to pci_dn. Actually, we could split struct pci_dn so that the EEH sensitive information to form an individual struct, then EEH looks more independent. The patch replaces pci_dn with eeh_dev for EEH core. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ppc-pci.h | 8 +- arch/powerpc/platforms/pseries/eeh.c | 269 ++++++++++++++++++----------------- 2 files changed, 144 insertions(+), 133 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index c02d5a76cac..e660b37aa7d 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -53,10 +53,10 @@ void pci_addr_cache_build(void); void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); struct pci_dev *pci_addr_cache_get_device(unsigned long addr); -void eeh_slot_error_detail (struct pci_dn *pdn, int severity); -int eeh_pci_enable(struct pci_dn *pdn, int function); -int eeh_reset_pe(struct pci_dn *); -void eeh_restore_bars(struct pci_dn *); +void eeh_slot_error_detail(struct eeh_dev *edev, int severity); +int eeh_pci_enable(struct eeh_dev *edev, int function); +int eeh_reset_pe(struct eeh_dev *); +void eeh_restore_bars(struct eeh_dev *); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_mark_slot(struct device_node *dn, int mode_flag); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 646b520e6be..aec10f66f5f 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -115,28 +115,29 @@ static unsigned long slot_resets; /** * eeh_gather_pci_data - Copy assorted PCI config space registers to buff - * @pdn: device to report data for + * @edev: device to report data for * @buf: point to buffer in which to log * @len: amount of room in buffer * * This routine captures assorted PCI configuration space data, * and puts them into a buffer for RTAS error logging. */ -static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) +static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) { - struct pci_dev *dev = pdn->pcidev; + struct device_node *dn = eeh_dev_to_of_node(edev); + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); u32 cfg; int cap, i; int n = 0; - n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name); - printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name); + n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); + printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); - rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg); + rtas_read_config(PCI_DN(dn), PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); - rtas_read_config(pdn, PCI_COMMAND, 4, &cfg); + rtas_read_config(PCI_DN(dn), PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); @@ -147,11 +148,11 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) /* Gather bridge-specific registers */ if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { - rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg); + rtas_read_config(PCI_DN(dn), PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); - rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); + rtas_read_config(PCI_DN(dn), PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); } @@ -159,11 +160,11 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) /* Dump out the PCI-X command and status regs */ cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (cap) { - rtas_read_config(pdn, cap, 4, &cfg); + rtas_read_config(PCI_DN(dn), cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); - rtas_read_config(pdn, cap+4, 4, &cfg); + rtas_read_config(PCI_DN(dn), cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); } @@ -176,7 +177,7 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) "EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - rtas_read_config(pdn, cap+4*i, 4, &cfg); + rtas_read_config(PCI_DN(dn), cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); } @@ -188,7 +189,7 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) "EEH: PCI-E AER capability register set follows:\n"); for (i=0; i<14; i++) { - rtas_read_config(pdn, cap+4*i, 4, &cfg); + rtas_read_config(PCI_DN(dn), cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); } @@ -197,12 +198,11 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) /* Gather status on devices under the bridge */ if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { - struct device_node *dn; + struct device_node *child; - for_each_child_of_node(pdn->node, dn) { - pdn = PCI_DN(dn); - if (pdn) - n += eeh_gather_pci_data(pdn, buf+n, len-n); + for_each_child_of_node(dn, child) { + if (of_node_to_eeh_dev(child)) + n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n); } } @@ -211,7 +211,7 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) /** * eeh_slot_error_detail - Generate combined log including driver log and error log - * @pdn: device node + * @edev: device to report error log for * @severity: temporary or permanent error log * * This routine should be called to generate the combined log, which @@ -219,17 +219,17 @@ static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) * out from the config space of the corresponding PCI device, while * the error log is fetched through platform dependent function call. */ -void eeh_slot_error_detail(struct pci_dn *pdn, int severity) +void eeh_slot_error_detail(struct eeh_dev *edev, int severity) { size_t loglen = 0; pci_regs_buf[0] = 0; - eeh_pci_enable(pdn, EEH_OPT_THAW_MMIO); - eeh_ops->configure_bridge(pdn->node); - eeh_restore_bars(pdn); - loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); + eeh_pci_enable(edev, EEH_OPT_THAW_MMIO); + eeh_ops->configure_bridge(eeh_dev_to_of_node(edev)); + eeh_restore_bars(edev); + loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); - eeh_ops->get_log(pdn->node, severity, pci_regs_buf, loglen); + eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen); } /** @@ -260,8 +260,8 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) */ struct device_node *eeh_find_device_pe(struct device_node *dn) { - while ((dn->parent) && PCI_DN(dn->parent) && - (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { + while (dn->parent && of_node_to_eeh_dev(dn->parent) && + (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) { dn = dn->parent; } return dn; @@ -284,11 +284,11 @@ static void __eeh_mark_slot(struct device_node *parent, int mode_flag) struct device_node *dn; for_each_child_of_node(parent, dn) { - if (PCI_DN(dn)) { + if (of_node_to_eeh_dev(dn)) { /* Mark the pci device driver too */ - struct pci_dev *dev = PCI_DN(dn)->pcidev; + struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev; - PCI_DN(dn)->eeh_mode |= mode_flag; + of_node_to_eeh_dev(dn)->mode |= mode_flag; if (dev && dev->driver) dev->error_state = pci_channel_io_frozen; @@ -312,13 +312,13 @@ void eeh_mark_slot(struct device_node *dn, int mode_flag) dn = eeh_find_device_pe(dn); /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) dn = dn->parent; - PCI_DN(dn)->eeh_mode |= mode_flag; + of_node_to_eeh_dev(dn)->mode |= mode_flag; /* Mark the pci device too */ - dev = PCI_DN(dn)->pcidev; + dev = of_node_to_eeh_dev(dn)->pdev; if (dev) dev->error_state = pci_channel_io_frozen; @@ -337,9 +337,9 @@ static void __eeh_clear_slot(struct device_node *parent, int mode_flag) struct device_node *dn; for_each_child_of_node(parent, dn) { - if (PCI_DN(dn)) { - PCI_DN(dn)->eeh_mode &= ~mode_flag; - PCI_DN(dn)->eeh_check_count = 0; + if (of_node_to_eeh_dev(dn)) { + of_node_to_eeh_dev(dn)->mode &= ~mode_flag; + of_node_to_eeh_dev(dn)->check_count = 0; __eeh_clear_slot(dn, mode_flag); } } @@ -360,11 +360,11 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag) dn = eeh_find_device_pe(dn); /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) dn = dn->parent; - PCI_DN(dn)->eeh_mode &= ~mode_flag; - PCI_DN(dn)->eeh_check_count = 0; + of_node_to_eeh_dev(dn)->mode &= ~mode_flag; + of_node_to_eeh_dev(dn)->check_count = 0; __eeh_clear_slot(dn, mode_flag); raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } @@ -388,7 +388,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) { int ret; unsigned long flags; - struct pci_dn *pdn; + struct eeh_dev *edev; int rc = 0; const char *location; @@ -402,18 +402,18 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) return 0; } dn = eeh_find_device_pe(dn); - pdn = PCI_DN(dn); + edev = of_node_to_eeh_dev(dn); /* Access to IO BARs might get this far and still not want checking. */ - if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || - pdn->eeh_mode & EEH_MODE_NOCHECK) { + if (!(edev->mode & EEH_MODE_SUPPORTED) || + edev->mode & EEH_MODE_NOCHECK) { ignored_check++; pr_debug("EEH: Ignored check (%x) for %s %s\n", - pdn->eeh_mode, eeh_pci_name(dev), dn->full_name); + edev->mode, eeh_pci_name(dev), dn->full_name); return 0; } - if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) { + if (!edev->config_addr && !edev->pe_config_addr) { no_cfg_addr++; return 0; } @@ -426,13 +426,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) */ raw_spin_lock_irqsave(&confirm_error_lock, flags); rc = 1; - if (pdn->eeh_mode & EEH_MODE_ISOLATED) { - pdn->eeh_check_count ++; - if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) { + if (edev->mode & EEH_MODE_ISOLATED) { + edev->check_count++; + if (edev->check_count % EEH_MAX_FAILS == 0) { location = of_get_property(dn, "ibm,loc-code", NULL); printk(KERN_ERR "EEH: %d reads ignored for recovering device at " "location=%s driver=%s pci addr=%s\n", - pdn->eeh_check_count, location, + edev->check_count, location, eeh_driver_name(dev), eeh_pci_name(dev)); printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", eeh_driver_name(dev)); @@ -448,7 +448,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * function zero of a multi-function device. * In any case they must share a common PHB. */ - ret = eeh_ops->get_state(pdn->node, NULL); + ret = eeh_ops->get_state(dn, NULL); /* Note that config-io to empty slots may fail; * they are empty when they don't have children. @@ -461,7 +461,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { false_positives++; - pdn->eeh_false_positives ++; + edev->false_positives ++; rc = 0; goto dn_unlock; } @@ -475,7 +475,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) eeh_mark_slot(dn, EEH_MODE_ISOLATED); raw_spin_unlock_irqrestore(&confirm_error_lock, flags); - eeh_send_failure_event(dn, dev); + eeh_send_failure_event(edev->dn, edev->pdev); /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure @@ -529,22 +529,23 @@ EXPORT_SYMBOL(eeh_check_failure); /** * eeh_pci_enable - Enable MMIO or DMA transfers for this slot - * @pdn pci device node + * @edev: pci device node * * This routine should be called to reenable frozen MMIO or DMA * so that it would work correctly again. It's useful while doing * recovery or log collection on the indicated device. */ -int eeh_pci_enable(struct pci_dn *pdn, int function) +int eeh_pci_enable(struct eeh_dev *edev, int function) { int rc; + struct device_node *dn = eeh_dev_to_of_node(edev); - rc = eeh_ops->set_option(pdn->node, function); + rc = eeh_ops->set_option(dn, function); if (rc) printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", - function, rc, pdn->node->full_name); + function, rc, dn->full_name); - rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC); if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && (function == EEH_OPT_THAW_MMIO)) return 0; @@ -595,8 +596,8 @@ void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) struct device_node *dn; for_each_child_of_node(parent, dn) { - if (PCI_DN(dn)) { - struct pci_dev *dev = PCI_DN(dn)->pcidev; + if (of_node_to_eeh_dev(dn)) { + struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev; if (dev && dev->driver) *freset |= dev->needs_freset; @@ -622,10 +623,10 @@ void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) dn = eeh_find_device_pe(dn); /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) dn = dn->parent; - dev = PCI_DN(dn)->pcidev; + dev = of_node_to_eeh_dev(dn)->pdev; if (dev) *freset |= dev->needs_freset; @@ -634,13 +635,14 @@ void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) /** * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second - * @pdn: pci device node to be reset. + * @edev: pci device node to be reset. * * Assert the PCI #RST line for 1/4 second. */ -static void eeh_reset_pe_once(struct pci_dn *pdn) +static void eeh_reset_pe_once(struct eeh_dev *edev) { unsigned int freset = 0; + struct device_node *dn = eeh_dev_to_of_node(edev); /* Determine type of EEH reset required for * Partitionable Endpoint, a hot-reset (1) @@ -648,12 +650,12 @@ static void eeh_reset_pe_once(struct pci_dn *pdn) * A fundamental reset required by any device under * Partitionable Endpoint trumps hot-reset. */ - eeh_set_pe_freset(pdn->node, &freset); + eeh_set_pe_freset(dn, &freset); if (freset) - eeh_ops->reset(pdn->node, EEH_RESET_FUNDAMENTAL); + eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); else - eeh_ops->reset(pdn->node, EEH_RESET_HOT); + eeh_ops->reset(dn, EEH_RESET_HOT); /* The PCI bus requires that the reset be held high for at least * a 100 milliseconds. We wait a bit longer 'just in case'. @@ -665,9 +667,9 @@ static void eeh_reset_pe_once(struct pci_dn *pdn) * pci slot reset line is dropped. Make sure we don't miss * these, and clear the flag now. */ - eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED); + eeh_clear_slot(dn, EEH_MODE_ISOLATED); - eeh_ops->reset(pdn->node, EEH_RESET_DEACTIVATE); + eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); /* After a PCI slot has been reset, the PCI Express spec requires * a 1.5 second idle time for the bus to stabilize, before starting @@ -679,31 +681,32 @@ static void eeh_reset_pe_once(struct pci_dn *pdn) /** * eeh_reset_pe - Reset the indicated PE - * @pdn: PCI device node + * @edev: PCI device associated EEH device * * This routine should be called to reset indicated device, including * PE. A PE might include multiple PCI devices and sometimes PCI bridges * might be involved as well. */ -int eeh_reset_pe(struct pci_dn *pdn) +int eeh_reset_pe(struct eeh_dev *edev) { int i, rc; + struct device_node *dn = eeh_dev_to_of_node(edev); /* Take three shots at resetting the bus */ for (i=0; i<3; i++) { - eeh_reset_pe_once(pdn); + eeh_reset_pe_once(edev); - rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC); if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) return 0; if (rc < 0) { printk(KERN_ERR "EEH: unrecoverable slot failure %s\n", - pdn->node->full_name); + dn->full_name); return -1; } printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n", - i+1, pdn->node->full_name, rc); + i+1, dn->full_name, rc); } return -1; @@ -719,90 +722,95 @@ int eeh_reset_pe(struct pci_dn *pdn) /** * eeh_restore_one_device_bars - Restore the Base Address Registers for one device - * @pdn: pci device node + * @edev: PCI device associated EEH device * * Loads the PCI configuration space base address registers, * the expansion ROM base address, the latency timer, and etc. * from the saved values in the device node. */ -static inline void eeh_restore_one_device_bars(struct pci_dn *pdn) +static inline void eeh_restore_one_device_bars(struct eeh_dev *edev) { int i; u32 cmd; + struct device_node *dn = eeh_dev_to_of_node(edev); + + if (!edev->phb) + return; - if (NULL==pdn->phb) return; for (i=4; i<10; i++) { - rtas_write_config(pdn, i*4, 4, pdn->config_space[i]); + rtas_write_config(PCI_DN(dn), i*4, 4, edev->config_space[i]); } /* 12 == Expansion ROM Address */ - rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]); + rtas_write_config(PCI_DN(dn), 12*4, 4, edev->config_space[12]); #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) -#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)]) +#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) - rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + rtas_write_config(PCI_DN(dn), PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - rtas_write_config(pdn, PCI_LATENCY_TIMER, 1, + rtas_write_config(PCI_DN(dn), PCI_LATENCY_TIMER, 1, SAVED_BYTE(PCI_LATENCY_TIMER)); /* max latency, min grant, interrupt pin and line */ - rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]); + rtas_write_config(PCI_DN(dn), 15*4, 4, edev->config_space[15]); /* Restore PERR & SERR bits, some devices require it, * don't touch the other command bits */ - rtas_read_config(pdn, PCI_COMMAND, 4, &cmd); - if (pdn->config_space[1] & PCI_COMMAND_PARITY) + rtas_read_config(PCI_DN(dn), PCI_COMMAND, 4, &cmd); + if (edev->config_space[1] & PCI_COMMAND_PARITY) cmd |= PCI_COMMAND_PARITY; else cmd &= ~PCI_COMMAND_PARITY; - if (pdn->config_space[1] & PCI_COMMAND_SERR) + if (edev->config_space[1] & PCI_COMMAND_SERR) cmd |= PCI_COMMAND_SERR; else cmd &= ~PCI_COMMAND_SERR; - rtas_write_config(pdn, PCI_COMMAND, 4, cmd); + rtas_write_config(PCI_DN(dn), PCI_COMMAND, 4, cmd); } /** * eeh_restore_bars - Restore the PCI config space info - * @pdn: PCI device node + * @edev: EEH device * * This routine performs a recursive walk to the children * of this device as well. */ -void eeh_restore_bars(struct pci_dn *pdn) +void eeh_restore_bars(struct eeh_dev *edev) { struct device_node *dn; - if (!pdn) + if (!edev) return; - if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code)) - eeh_restore_one_device_bars(pdn); + if ((edev->mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(edev->class_code)) + eeh_restore_one_device_bars(edev); - for_each_child_of_node(pdn->node, dn) - eeh_restore_bars(PCI_DN(dn)); + for_each_child_of_node(eeh_dev_to_of_node(edev), dn) + eeh_restore_bars(of_node_to_eeh_dev(dn)); } /** * eeh_save_bars - Save device bars - * @pdn: PCI device node + * @edev: PCI device associated EEH device * * Save the values of the device bars. Unlike the restore * routine, this routine is *not* recursive. This is because * PCI devices are added individually; but, for the restore, * an entire slot is reset at a time. */ -static void eeh_save_bars(struct pci_dn *pdn) +static void eeh_save_bars(struct eeh_dev *edev) { int i; + struct device_node *dn; - if (!pdn ) + if (!edev) return; + dn = eeh_dev_to_of_node(edev); for (i = 0; i < 16; i++) - rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]); + rtas_read_config(PCI_DN(dn), i * 4, 4, &edev->config_space[i]); } /** @@ -822,13 +830,13 @@ static void *eeh_early_enable(struct device_node *dn, void *data) const u32 *device_id = of_get_property(dn, "device-id", NULL); const u32 *regs; int enable; - struct pci_dn *pdn = PCI_DN(dn); + struct eeh_dev *edev = of_node_to_eeh_dev(dn); - pdn->class_code = 0; - pdn->eeh_mode = 0; - pdn->eeh_check_count = 0; - pdn->eeh_freeze_count = 0; - pdn->eeh_false_positives = 0; + edev->class_code = 0; + edev->mode = 0; + edev->check_count = 0; + edev->freeze_count = 0; + edev->false_positives = 0; if (!of_device_is_available(dn)) return NULL; @@ -839,10 +847,10 @@ static void *eeh_early_enable(struct device_node *dn, void *data) /* There is nothing to check on PCI to ISA bridges */ if (dn->type && !strcmp(dn->type, "isa")) { - pdn->eeh_mode |= EEH_MODE_NOCHECK; + edev->mode |= EEH_MODE_NOCHECK; return NULL; } - pdn->class_code = *class_code; + edev->class_code = *class_code; /* Ok... see if this device supports EEH. Some do, some don't, * and the only way to find out is to check each and every one. @@ -855,40 +863,40 @@ static void *eeh_early_enable(struct device_node *dn, void *data) enable = 0; if (ret == 0) { - pdn->eeh_config_addr = regs[0]; + edev->config_addr = regs[0]; /* If the newer, better, ibm,get-config-addr-info is supported, * then use that instead. */ - pdn->eeh_pe_config_addr = eeh_ops->get_pe_addr(dn); + edev->pe_config_addr = eeh_ops->get_pe_addr(dn); /* Some older systems (Power4) allow the * ibm,set-eeh-option call to succeed even on nodes * where EEH is not supported. Verify support * explicitly. */ - ret = eeh_ops->get_state(pdn->node, NULL); + ret = eeh_ops->get_state(dn, NULL); if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) enable = 1; } if (enable) { eeh_subsystem_enabled = 1; - pdn->eeh_mode |= EEH_MODE_SUPPORTED; + edev->mode |= EEH_MODE_SUPPORTED; pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n", - dn->full_name, pdn->eeh_config_addr, - pdn->eeh_pe_config_addr); + dn->full_name, edev->config_addr, + edev->pe_config_addr); } else { /* This device doesn't support EEH, but it may have an * EEH parent, in which case we mark it as supported. */ - if (dn->parent && PCI_DN(dn->parent) - && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { + if (dn->parent && of_node_to_eeh_dev(dn->parent) && + (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) { /* Parent supports EEH. */ - pdn->eeh_mode |= EEH_MODE_SUPPORTED; - pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr; + edev->mode |= EEH_MODE_SUPPORTED; + edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; return NULL; } } @@ -897,7 +905,7 @@ static void *eeh_early_enable(struct device_node *dn, void *data) dn->full_name); } - eeh_save_bars(pdn); + eeh_save_bars(edev); return NULL; } @@ -994,7 +1002,7 @@ void __init eeh_init(void) unsigned long buid; buid = get_phb_buid(phb); - if (buid == 0 || PCI_DN(phb) == NULL) + if (buid == 0 || !of_node_to_eeh_dev(phb)) continue; traverse_pci_devices(phb, eeh_early_enable, NULL); @@ -1022,9 +1030,9 @@ static void eeh_add_device_early(struct device_node *dn) { struct pci_controller *phb; - if (!dn || !PCI_DN(dn)) + if (!dn || !of_node_to_eeh_dev(dn)) return; - phb = PCI_DN(dn)->phb; + phb = of_node_to_eeh_dev(dn)->phb; /* USB Bus children of PCI devices will not have BUID's */ if (NULL == phb || 0 == phb->buid) @@ -1061,7 +1069,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); static void eeh_add_device_late(struct pci_dev *dev) { struct device_node *dn; - struct pci_dn *pdn; + struct eeh_dev *edev; if (!dev || !eeh_subsystem_enabled) return; @@ -1069,15 +1077,16 @@ static void eeh_add_device_late(struct pci_dev *dev) pr_debug("EEH: Adding device %s\n", pci_name(dev)); dn = pci_device_to_OF_node(dev); - pdn = PCI_DN(dn); - if (pdn->pcidev == dev) { + edev = pci_dev_to_eeh_dev(dev); + if (edev->pdev == dev) { pr_debug("EEH: Already referenced !\n"); return; } - WARN_ON(pdn->pcidev); + WARN_ON(edev->pdev); pci_dev_get(dev); - pdn->pcidev = dev; + edev->pdev = dev; + dev->dev.archdata.edev = edev; pci_addr_cache_insert_device(dev); eeh_sysfs_add_device(dev); @@ -1118,19 +1127,21 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); */ static void eeh_remove_device(struct pci_dev *dev) { - struct device_node *dn; + struct eeh_dev *edev; + if (!dev || !eeh_subsystem_enabled) return; + edev = pci_dev_to_eeh_dev(dev); /* Unregister the device with the EEH/PCI address search system */ pr_debug("EEH: Removing device %s\n", pci_name(dev)); - dn = pci_device_to_OF_node(dev); - if (PCI_DN(dn)->pcidev == NULL) { + if (!edev || !edev->pdev) { pr_debug("EEH: Not referenced !\n"); return; } - PCI_DN(dn)->pcidev = NULL; + edev->pdev = NULL; + dev->dev.archdata.edev = NULL; pci_dev_put(dev); pci_addr_cache_remove_device(dev); -- cgit v1.2.3-70-g09d2 From 40a7cd92197bfa947a1dab80c73a42dd33ac5067 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:04:08 +0000 Subject: powerpc/eeh: Replace pci_dn with eeh_dev for EEH aux components The original EEH implementation is heavily depending on struct pci_dn. We have to put EEH related information to pci_dn. Actually, we could split struct pci_dn so that the EEH sensitive information to form an individual struct, then EEH looks more independent. The patch replaces pci_dn with eeh_dev for EEH aux components like event and driver. Also, the eeh_event struct has been adjusted for a little bit since eeh_dev has linked the associated FDT (Flat Device Tree) node and PCI device. It's not necessary for eeh_event struct to trace FDT node and PCI device. We can just simply to trace eeh_dev in eeh_event. The patch also renames function pcid_name() to eeh_pcid_name(), which should be missed in the previous patch where the EEH aux components have been cleaned up. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh_event.h | 7 ++- arch/powerpc/platforms/pseries/eeh.c | 2 +- arch/powerpc/platforms/pseries/eeh_driver.c | 81 ++++++++++++++--------------- arch/powerpc/platforms/pseries/eeh_event.c | 36 +++++++------ 4 files changed, 63 insertions(+), 63 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index 25ebf6a5aa5..c68b012b779 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -28,12 +28,11 @@ */ struct eeh_event { struct list_head list; /* to form event queue */ - struct device_node *dn; /* struct device node */ - struct pci_dev *dev; /* affected device */ + struct eeh_dev *edev; /* EEH device */ }; -int eeh_send_failure_event(struct device_node *dn, struct pci_dev *dev); -struct pci_dn *handle_eeh_events(struct eeh_event *); +int eeh_send_failure_event(struct eeh_dev *edev); +struct eeh_dev *handle_eeh_events(struct eeh_event *); #endif /* __KERNEL__ */ #endif /* ASM_POWERPC_EEH_EVENT_H */ diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index aec10f66f5f..9b1fd0c0929 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -475,7 +475,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) eeh_mark_slot(dn, EEH_MODE_ISOLATED); raw_spin_unlock_irqrestore(&confirm_error_lock, flags); - eeh_send_failure_event(edev->dn, edev->pdev); + eeh_send_failure_event(edev); /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 3f25fab741e..baf92cd9dfa 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -40,7 +40,7 @@ * This routine is used to retrieve the name of PCI device driver * if that's valid. */ -static inline const char *pcid_name(struct pci_dev *pdev) +static inline const char *eeh_pcid_name(struct pci_dev *pdev) { if (pdev && pdev->dev.driver) return pdev->dev.driver->name; @@ -81,7 +81,7 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent) */ static void eeh_disable_irq(struct pci_dev *dev) { - struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); /* Don't disable MSI and MSI-X interrupts. They are * effectively disabled by the DMA Stopped state @@ -93,7 +93,7 @@ static void eeh_disable_irq(struct pci_dev *dev) if (!irq_has_action(dev->irq)) return; - PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; + edev->mode |= EEH_MODE_IRQ_DISABLED; disable_irq_nosync(dev->irq); } @@ -106,10 +106,10 @@ static void eeh_disable_irq(struct pci_dev *dev) */ static void eeh_enable_irq(struct pci_dev *dev) { - struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { - PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; + if ((edev->mode) & EEH_MODE_IRQ_DISABLED) { + edev->mode &= ~EEH_MODE_IRQ_DISABLED; enable_irq(dev->irq); } } @@ -270,20 +270,20 @@ static int eeh_report_failure(struct pci_dev *dev, void *userdata) /** * eeh_reset_device - Perform actual reset of a pci slot - * @pe_dn: PE associated device node + * @edev: PE associated EEH device * @bus: PCI bus corresponding to the isolcated slot * * This routine must be called to do reset on the indicated PE. * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ -static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus) +static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) { struct device_node *dn; int cnt, rc; /* pcibios will clear the counter; save the value */ - cnt = pe_dn->eeh_freeze_count; + cnt = edev->freeze_count; if (bus) pcibios_remove_pci_devices(bus); @@ -292,21 +292,22 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus) * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. */ - rc = eeh_reset_pe(pe_dn); + rc = eeh_reset_pe(edev); if (rc) return rc; /* Walk over all functions on this device. */ - dn = pe_dn->node; - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + dn = eeh_dev_to_of_node(edev); + if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) dn = dn->parent->child; while (dn) { - struct pci_dn *ppe = PCI_DN(dn); + struct eeh_dev *pedev = of_node_to_eeh_dev(dn); + /* On Power4, always true because eeh_pe_config_addr=0 */ - if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) { + if (edev->pe_config_addr == pedev->pe_config_addr) { eeh_ops->configure_bridge(dn); - eeh_restore_bars(ppe); + eeh_restore_bars(pedev); } dn = dn->sibling; } @@ -321,7 +322,7 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus) ssleep(5); pcibios_add_pci_devices(bus); } - pe_dn->eeh_freeze_count = cnt; + edev->freeze_count = cnt; return 0; } @@ -348,23 +349,22 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus) * drivers (which cause a second set of hotplug events to go out to * userspace). */ -struct pci_dn *handle_eeh_events(struct eeh_event *event) +struct eeh_dev *handle_eeh_events(struct eeh_event *event) { struct device_node *frozen_dn; - struct pci_dn *frozen_pdn; + struct eeh_dev *frozen_edev; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; - frozen_dn = eeh_find_device_pe(event->dn); + frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev)); if (!frozen_dn) { - - location = of_get_property(event->dn, "ibm,loc-code", NULL); + location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL); location = location ? location : "unknown"; printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " "for location=%s pci addr=%s\n", - location, eeh_pci_name(event->dev)); + location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev))); return NULL; } @@ -389,22 +389,21 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event) return NULL; } - frozen_pdn = PCI_DN(frozen_dn); - frozen_pdn->eeh_freeze_count++; + frozen_edev = of_node_to_eeh_dev(frozen_dn); + frozen_edev->freeze_count++; + pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev)); + drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev)); - pci_str = eeh_pci_name(event->dev); - drv_str = pcid_name(event->dev); - - if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) + if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; printk(KERN_WARNING "EEH: This PCI device has failed %d times in the last hour:\n", - frozen_pdn->eeh_freeze_count); + frozen_edev->freeze_count); - if (frozen_pdn->pcidev) { - bus_pci_str = pci_name(frozen_pdn->pcidev); - bus_drv_str = pcid_name(frozen_pdn->pcidev); + if (frozen_edev->pdev) { + bus_pci_str = pci_name(frozen_edev->pdev); + bus_drv_str = eeh_pcid_name(frozen_edev->pdev); printk(KERN_WARNING "EEH: Bus location=%s driver=%s pci addr=%s\n", location, bus_drv_str, bus_pci_str); @@ -425,7 +424,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event) /* Get the current PCI slot state. This can take a long time, * sometimes over 3 seconds for certain systems. */ - rc = eeh_ops->wait_state(frozen_pdn->node, MAX_WAIT_FOR_RECOVERY*1000); + rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; @@ -435,14 +434,14 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event) * don't post the error log until after all dev drivers * have been informed. */ - eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP); + eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they * go down willingly, without panicing the system. */ if (result == PCI_ERS_RESULT_NONE) { - rc = eeh_reset_device(frozen_pdn, frozen_bus); + rc = eeh_reset_device(frozen_edev, frozen_bus); if (rc) { printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); goto hard_fail; @@ -451,7 +450,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event) /* If all devices reported they can proceed, then re-enable MMIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_MMIO); + rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO); if (rc < 0) goto hard_fail; @@ -465,7 +464,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event) /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_DMA); + rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA); if (rc < 0) goto hard_fail; @@ -483,7 +482,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { - rc = eeh_reset_device(frozen_pdn, NULL); + rc = eeh_reset_device(frozen_edev, NULL); if (rc) { printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); goto hard_fail; @@ -502,7 +501,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event) /* Tell all device drivers that they can resume operations */ pci_walk_bus(frozen_bus, eeh_report_resume, NULL); - return frozen_pdn; + return frozen_edev; excess_failures: /* @@ -515,7 +514,7 @@ excess_failures: "has failed %d times in the last hour " "and has been permanently disabled.\n" "Please try reseating this device or replacing it.\n", - location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); + location, drv_str, pci_str, frozen_edev->freeze_count); goto perm_error; hard_fail: @@ -526,7 +525,7 @@ hard_fail: location, drv_str, pci_str); perm_error: - eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM); + eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ pci_walk_bus(frozen_bus, eeh_report_failure, NULL); diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index e98347cb9cc..4a475256585 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -56,8 +56,8 @@ DEFINE_MUTEX(eeh_event_mutex); static int eeh_event_handler(void * dummy) { unsigned long flags; - struct eeh_event *event; - struct pci_dn *pdn; + struct eeh_event *event; + struct eeh_dev *edev; daemonize("eehd"); set_current_state(TASK_INTERRUPTIBLE); @@ -77,23 +77,26 @@ static int eeh_event_handler(void * dummy) /* Serialize processing of EEH events */ mutex_lock(&eeh_event_mutex); - eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); + edev = event->edev; + eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", - eeh_pci_name(event->dev)); + eeh_pci_name(edev->pdev)); + + edev = handle_eeh_events(event); - pdn = handle_eeh_events(event); + eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); + pci_dev_put(edev->pdev); - eeh_clear_slot(event->dn, EEH_MODE_RECOVERING); - pci_dev_put(event->dev); kfree(event); mutex_unlock(&eeh_event_mutex); /* If there are no new errors after an hour, clear the counter. */ - if (pdn && pdn->eeh_freeze_count>0) { + if (edev && edev->freeze_count>0) { msleep_interruptible(3600*1000); - if (pdn->eeh_freeze_count>0) - pdn->eeh_freeze_count--; + if (edev->freeze_count>0) + edev->freeze_count--; + } return 0; @@ -114,17 +117,17 @@ static void eeh_thread_launcher(struct work_struct *dummy) /** * eeh_send_failure_event - Generate a PCI error event - * @dev: pci device + * @edev: EEH device * * This routine can be called within an interrupt context; * the actual event will be delivered in a normal context * (from a workqueue). */ -int eeh_send_failure_event(struct device_node *dn, - struct pci_dev *dev) +int eeh_send_failure_event(struct eeh_dev *edev) { unsigned long flags; struct eeh_event *event; + struct device_node *dn = eeh_dev_to_of_node(edev); const char *location; if (!mem_init_done) { @@ -140,11 +143,10 @@ int eeh_send_failure_event(struct device_node *dn, return 1; } - if (dev) - pci_dev_get(dev); + if (edev->pdev) + pci_dev_get(edev->pdev); - event->dn = dn; - event->dev = dev; + event->edev = edev; /* We may or may not be called in an interrupt context */ spin_lock_irqsave(&eeh_eventlist_lock, flags); -- cgit v1.2.3-70-g09d2 From 3780444c4fcec28c96ab7002858bb051215a5fc1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 27 Feb 2012 20:04:11 +0000 Subject: powerpc/eeh: pseries platform config space access in EEH With the original EEH implementation, the access to config space of the corresponding PCI device is done by RTAS sensitive function. That depends on pci_dn heavily. That would limit EEH extension to other platforms like powernv because other platforms might have different ways to access PCI config space. The patch splits those functions used to access PCI config space and implement them in platform related EEH component. It would be helpful to support EEH on multiple platforms simutaneously in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 2 ++ arch/powerpc/platforms/pseries/eeh.c | 32 +++++++++++----------- arch/powerpc/platforms/pseries/eeh_pseries.c | 40 +++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index daaad91ed57..d60f99814ff 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -102,6 +102,8 @@ struct eeh_ops { int (*wait_state)(struct device_node *dn, int max_wait); int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct device_node *dn); + int (*read_config)(struct device_node *dn, int where, int size, u32 *val); + int (*write_config)(struct device_node *dn, int where, int size, u32 val); }; extern struct eeh_ops *eeh_ops; diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 1d08cd76e68..8011088392d 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -141,11 +141,11 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); - rtas_read_config(PCI_DN(dn), PCI_VENDOR_ID, 4, &cfg); + eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); - rtas_read_config(PCI_DN(dn), PCI_COMMAND, 4, &cfg); + eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); @@ -156,11 +156,11 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) /* Gather bridge-specific registers */ if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { - rtas_read_config(PCI_DN(dn), PCI_SEC_STATUS, 2, &cfg); + eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); - rtas_read_config(PCI_DN(dn), PCI_BRIDGE_CONTROL, 2, &cfg); + eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); } @@ -168,11 +168,11 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) /* Dump out the PCI-X command and status regs */ cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (cap) { - rtas_read_config(PCI_DN(dn), cap, 4, &cfg); + eeh_ops->read_config(dn, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); - rtas_read_config(PCI_DN(dn), cap+4, 4, &cfg); + eeh_ops->read_config(dn, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); } @@ -185,7 +185,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) "EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - rtas_read_config(PCI_DN(dn), cap+4*i, 4, &cfg); + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); } @@ -197,7 +197,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) "EEH: PCI-E AER capability register set follows:\n"); for (i=0; i<14; i++) { - rtas_read_config(PCI_DN(dn), cap+4*i, 4, &cfg); + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); } @@ -746,28 +746,28 @@ static inline void eeh_restore_one_device_bars(struct eeh_dev *edev) return; for (i=4; i<10; i++) { - rtas_write_config(PCI_DN(dn), i*4, 4, edev->config_space[i]); + eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); } /* 12 == Expansion ROM Address */ - rtas_write_config(PCI_DN(dn), 12*4, 4, edev->config_space[12]); + eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) - rtas_write_config(PCI_DN(dn), PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - rtas_write_config(PCI_DN(dn), PCI_LATENCY_TIMER, 1, + eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, SAVED_BYTE(PCI_LATENCY_TIMER)); /* max latency, min grant, interrupt pin and line */ - rtas_write_config(PCI_DN(dn), 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); /* Restore PERR & SERR bits, some devices require it, * don't touch the other command bits */ - rtas_read_config(PCI_DN(dn), PCI_COMMAND, 4, &cmd); + eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd); if (edev->config_space[1] & PCI_COMMAND_PARITY) cmd |= PCI_COMMAND_PARITY; else @@ -776,7 +776,7 @@ static inline void eeh_restore_one_device_bars(struct eeh_dev *edev) cmd |= PCI_COMMAND_SERR; else cmd &= ~PCI_COMMAND_SERR; - rtas_write_config(PCI_DN(dn), PCI_COMMAND, 4, cmd); + eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); } /** @@ -818,7 +818,7 @@ static void eeh_save_bars(struct eeh_dev *edev) dn = eeh_dev_to_of_node(edev); for (i = 0; i < 16; i++) - rtas_read_config(PCI_DN(dn), i * 4, 4, &edev->config_space[i]); + eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); } /** diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 36a1af1d114..8752f79a6af 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -503,6 +503,42 @@ static int pseries_eeh_configure_bridge(struct device_node *dn) return ret; } +/** + * pseries_eeh_read_config - Read PCI config space + * @dn: device node + * @where: PCI address + * @size: size to read + * @val: return value + * + * Read config space from the speicifed device + */ +static int pseries_eeh_read_config(struct device_node *dn, int where, int size, u32 *val) +{ + struct pci_dn *pdn; + + pdn = PCI_DN(dn); + + return rtas_read_config(pdn, where, size, val); +} + +/** + * pseries_eeh_write_config - Write PCI config space + * @dn: device node + * @where: PCI address + * @size: size to write + * @val: value to be written + * + * Write config space to the specified device + */ +static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val) +{ + struct pci_dn *pdn; + + pdn = PCI_DN(dn); + + return rtas_write_config(pdn, where, size, val); +} + static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, @@ -512,7 +548,9 @@ static struct eeh_ops pseries_eeh_ops = { .reset = pseries_eeh_reset, .wait_state = pseries_eeh_wait_state, .get_log = pseries_eeh_get_log, - .configure_bridge = pseries_eeh_configure_bridge + .configure_bridge = pseries_eeh_configure_bridge, + .read_config = pseries_eeh_read_config, + .write_config = pseries_eeh_write_config }; /** -- cgit v1.2.3-70-g09d2 From 7230c5644188cd9e3fb380cc97dde00c464a3ba7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Mar 2012 18:27:59 +1100 Subject: powerpc: Rework lazy-interrupt handling The current implementation of lazy interrupts handling has some issues that this tries to address. We don't do the various workarounds we need to do when re-enabling interrupts in some cases such as when returning from an interrupt and thus we may still lose or get delayed decrementer or doorbell interrupts. The current scheme also makes it much harder to handle the external "edge" interrupts provided by some BookE processors when using the EPR facility (External Proxy) and the Freescale Hypervisor. Additionally, we tend to keep interrupts hard disabled in a number of cases, such as decrementer interrupts, external interrupts, or when a masked decrementer interrupt is pending. This is sub-optimal. This is an attempt at fixing it all in one go by reworking the way we do the lazy interrupt disabling from the ground up. The base idea is to replace the "hard_enabled" field with a "irq_happened" field in which we store a bit mask of what interrupt occurred while soft-disabled. When re-enabling, either via arch_local_irq_restore() or when returning from an interrupt, we can now decide what to do by testing bits in that field. We then implement replaying of the missed interrupts either by re-using the existing exception frame (in exception exit case) or via the creation of a new one from an assembly trampoline (in the arch_local_irq_enable case). This removes the need to play with the decrementer to try to create fake interrupts, among others. In addition, this adds a few refinements: - We no longer hard disable decrementer interrupts that occur while soft-disabled. We now simply bump the decrementer back to max (on BookS) or leave it stopped (on BookE) and continue with hard interrupts enabled, which means that we'll potentially get better sample quality from performance monitor interrupts. - Timer, decrementer and doorbell interrupts now hard-enable shortly after removing the source of the interrupt, which means they no longer run entirely hard disabled. Again, this will improve perf sample quality. - On Book3E 64-bit, we now make the performance monitor interrupt act as an NMI like Book3S (the necessary C code for that to work appear to already be present in the FSL perf code, notably calling nmi_enter instead of irq_enter). (This also fixes a bug where BookE perfmon interrupts could clobber r14 ... oops) - We could make "masked" decrementer interrupts act as NMIs when doing timer-based perf sampling to improve the sample quality. Signed-off-by-yet: Benjamin Herrenschmidt --- v2: - Add hard-enable to decrementer, timer and doorbells - Fix CR clobber in masked irq handling on BookE - Make embedded perf interrupt act as an NMI - Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want to retrigger an interrupt without preventing hard-enable v3: - Fix or vs. ori bug on Book3E - Fix enabling of interrupts for some exceptions on Book3E v4: - Fix resend of doorbells on return from interrupt on Book3E v5: - Rebased on top of my latest series, which involves some significant rework of some aspects of the patch. v6: - 32-bit compile fix - more compile fixes with various .config combos - factor out the asm code to soft-disable interrupts - remove the C wrapper around preempt_schedule_irq v7: - Fix a bug with hard irq state tracking on native power7 --- arch/powerpc/include/asm/exception-64s.h | 34 ++-- arch/powerpc/include/asm/hw_irq.h | 49 +++++- arch/powerpc/include/asm/irqflags.h | 37 ++-- arch/powerpc/include/asm/paca.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/dbell.c | 2 + arch/powerpc/kernel/entry_64.S | 153 ++++++++++++---- arch/powerpc/kernel/exceptions-64e.S | 221 ++++++++++++++++-------- arch/powerpc/kernel/exceptions-64s.S | 150 +++++++--------- arch/powerpc/kernel/head_64.S | 24 ++- arch/powerpc/kernel/idle.c | 6 +- arch/powerpc/kernel/idle_book3e.S | 25 +-- arch/powerpc/kernel/idle_power4.S | 24 ++- arch/powerpc/kernel/idle_power7.S | 23 ++- arch/powerpc/kernel/irq.c | 204 +++++++++++++++------- arch/powerpc/kernel/process.c | 3 + arch/powerpc/kernel/time.c | 8 +- arch/powerpc/platforms/pseries/processor_idle.c | 18 +- arch/powerpc/xmon/xmon.c | 4 +- 19 files changed, 647 insertions(+), 342 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 70354af0740..548da3aa0a3 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -232,23 +232,30 @@ label##_hv: \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_HV, KVMTEST, vec) -#define __SOFTEN_TEST(h) \ +/* This associate vector numbers with bits in paca->irq_happened */ +#define SOFTEN_VALUE_0x500 PACA_IRQ_EE +#define SOFTEN_VALUE_0x502 PACA_IRQ_EE +#define SOFTEN_VALUE_0x900 PACA_IRQ_DEC +#define SOFTEN_VALUE_0x982 PACA_IRQ_DEC + +#define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ cmpwi r10,0; \ + li r10,SOFTEN_VALUE_##vec; \ beq masked_##h##interrupt -#define _SOFTEN_TEST(h) __SOFTEN_TEST(h) +#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) #define SOFTEN_TEST_PR(vec) \ KVMTEST_PR(vec); \ - _SOFTEN_TEST(EXC_STD) + _SOFTEN_TEST(EXC_STD, vec) #define SOFTEN_TEST_HV(vec) \ KVMTEST(vec); \ - _SOFTEN_TEST(EXC_HV) + _SOFTEN_TEST(EXC_HV, vec) #define SOFTEN_TEST_HV_201(vec) \ KVMTEST(vec); \ - _SOFTEN_TEST(EXC_STD) + _SOFTEN_TEST(EXC_STD, vec) #define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ HMT_MEDIUM; \ @@ -279,22 +286,7 @@ label##_hv: \ */ /* Exception addition: Hard disable interrupts */ -#ifdef CONFIG_TRACE_IRQFLAGS -#define DISABLE_INTS \ - lbz r10,PACASOFTIRQEN(r13); \ - li r11,0; \ - cmpwi cr0,r10,0; \ - stb r11,PACAHARDIRQEN(r13); \ - beq 44f; \ - stb r11,PACASOFTIRQEN(r13); \ - TRACE_DISABLE_INTS; \ -44: -#else -#define DISABLE_INTS \ - li r11,0; \ - stb r11,PACASOFTIRQEN(r13); \ - stb r11,PACAHARDIRQEN(r13) -#endif /* CONFIG_TRACE_IRQFLAGS */ +#define DISABLE_INTS SOFT_DISABLE_INTS(r10,r11) /* Exception addition: Keep interrupt state */ #define ENABLE_INTS \ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 6c6fa955baa..51010bfc792 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -11,6 +11,27 @@ #include #include +#ifdef CONFIG_PPC64 + +/* + * PACA flags in paca->irq_happened. + * + * This bits are set when interrupts occur while soft-disabled + * and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS + * is set whenever we manually hard disable. + */ +#define PACA_IRQ_HARD_DIS 0x01 +#define PACA_IRQ_DBELL 0x02 +#define PACA_IRQ_EE 0x04 +#define PACA_IRQ_DEC 0x08 /* Or FIT */ +#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ + +#endif /* CONFIG_PPC64 */ + +#ifndef __ASSEMBLY__ + +extern void __replay_interrupt(unsigned int vector); + extern void timer_interrupt(struct pt_regs *); #ifdef CONFIG_PPC64 @@ -42,7 +63,6 @@ static inline unsigned long arch_local_irq_disable(void) } extern void arch_local_irq_restore(unsigned long); -extern void iseries_handle_interrupts(void); static inline void arch_local_irq_enable(void) { @@ -72,12 +92,24 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) #endif -#define hard_irq_disable() \ - do { \ - __hard_irq_disable(); \ - get_paca()->soft_enabled = 0; \ - get_paca()->hard_enabled = 0; \ - } while(0) +static inline void hard_irq_disable(void) +{ + __hard_irq_disable(); + get_paca()->soft_enabled = 0; + get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; +} + +/* + * This is called by asynchronous interrupts to conditionally + * re-enable hard interrupts when soft-disabled after having + * cleared the source of the interrupt + */ +static inline void may_hard_irq_enable(void) +{ + get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; + if (!(get_paca()->irq_happened & PACA_IRQ_EE)) + __hard_irq_enable(); +} static inline bool arch_irq_disabled_regs(struct pt_regs *regs) { @@ -149,6 +181,8 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !(regs->msr & MSR_EE); } +static inline void may_hard_irq_enable(void) { } + #endif /* CONFIG_PPC64 */ #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST @@ -159,5 +193,6 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) */ struct irq_chip; +#endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index b0b06d85788..6f9b6e23dc5 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -39,24 +39,31 @@ #define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on) #define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off) -#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip) \ - cmpdi en,0; \ - bne 95f; \ - stb en,PACASOFTIRQEN(r13); \ - TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off) \ - b skip; \ -95: TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on) \ - li en,1; -#define TRACE_AND_RESTORE_IRQ(en) \ - TRACE_AND_RESTORE_IRQ_PARTIAL(en,96f); \ - stb en,PACASOFTIRQEN(r13); \ -96: +/* + * This is used by assembly code to soft-disable interrupts + */ +#define SOFT_DISABLE_INTS(__rA, __rB) \ + lbz __rA,PACASOFTIRQEN(r13); \ + lbz __rB,PACAIRQHAPPENED(r13); \ + cmpwi cr0,__rA,0; \ + li __rA,0; \ + ori __rB,__rB,PACA_IRQ_HARD_DIS; \ + stb __rB,PACAIRQHAPPENED(r13); \ + beq 44f; \ + stb __rA,PACASOFTIRQEN(r13); \ + TRACE_DISABLE_INTS; \ +44: + #else #define TRACE_ENABLE_INTS #define TRACE_DISABLE_INTS -#define TRACE_AND_RESTORE_IRQ_PARTIAL(en,skip) -#define TRACE_AND_RESTORE_IRQ(en) \ - stb en,PACASOFTIRQEN(r13) + +#define SOFT_DISABLE_INTS(__rA, __rB) \ + lbz __rA,PACAIRQHAPPENED(r13); \ + li __rB,0; \ + ori __rA,__rA,PACA_IRQ_HARD_DIS; \ + stb __rB,PACASOFTIRQEN(r13); \ + stb __rA,PACAIRQHAPPENED(r13) #endif #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 269c05a36d9..daf813fea91 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -132,7 +132,7 @@ struct paca_struct { u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 soft_enabled; /* irq soft-enable flag */ - u8 hard_enabled; /* set if irqs are enabled in MSR */ + u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ u8 nap_state_lost; /* NV GPR values lost in power7_idle */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 04caee7d9bc..cdd0d264415 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -147,7 +147,7 @@ int main(void) DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); - DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); + DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 2cc451aaaca..5b25c8060fd 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -37,6 +37,8 @@ void doorbell_exception(struct pt_regs *regs) irq_enter(); + may_hard_irq_enable(); + smp_ipi_demux(); irq_exit(); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c513beb78b3..f8a7a1a1a9f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -32,6 +32,7 @@ #include #include #include +#include /* * System calls. @@ -583,18 +584,72 @@ _GLOBAL(ret_from_except_lite) bne do_work #endif /* !CONFIG_PREEMPT */ + .globl fast_exc_return_irq +fast_exc_return_irq: restore: + /* + * This is the main kernel exit path, we first check if we + * have to change our interrupt state. + */ ld r5,SOFTE(r1) - TRACE_AND_RESTORE_IRQ(r5); + lbz r6,PACASOFTIRQEN(r13) + cmpwi cr1,r5,0 + cmpw cr0,r5,r6 + beq cr0,4f + + /* We do, handle disable first, which is easy */ + bne cr1,3f; + li r0,0 + stb r0,PACASOFTIRQEN(r13); + TRACE_DISABLE_INTS + b 4f - /* extract EE bit and use it to restore paca->hard_enabled */ - ld r3,_MSR(r1) - rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ - stb r4,PACAHARDIRQEN(r13) +3: /* + * We are about to soft-enable interrupts (we are hard disabled + * at this point). We check if there's anything that needs to + * be replayed first. + */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + bne- restore_check_irq_replay + + /* + * Get here when nothing happened while soft-disabled, just + * soft-enable and move-on. We will hard-enable as a side + * effect of rfi + */ +restore_no_replay: + TRACE_ENABLE_INTS + li r0,1 + stb r0,PACASOFTIRQEN(r13); + /* + * Final return path. BookE is handled in a different file + */ +4: #ifdef CONFIG_PPC_BOOK3E b .exception_return_book3e #else + /* + * Clear the reservation. If we know the CPU tracks the address of + * the reservation then we can potentially save some cycles and use + * a larx. On POWER6 and POWER7 this is significantly faster. + */ +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r4,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + /* + * Some code path such as load_up_fpu or altivec return directly + * here. They run entirely hard disabled and do not alter the + * interrupt state. They also don't use lwarx/stwcx. and thus + * are known not to leave dangling reservations. + */ + .globl fast_exception_return +fast_exception_return: + ld r3,_MSR(r1) ld r4,_CTR(r1) ld r0,_LINK(r1) mtctr r4 @@ -607,17 +662,6 @@ restore: andi. r0,r3,MSR_RI beq- unrecov_restore - /* - * Clear the reservation. If we know the CPU tracks the address of - * the reservation then we can potentially save some cycles and use - * a larx. On POWER6 and POWER7 this is significantly faster. - */ -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r4,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - /* * Clear RI before restoring r13. If we are returning to * userspace and we take an exception after restoring r13, @@ -629,7 +673,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) /* * r13 is our per cpu area, only restore it if we are returning to - * userspace + * userspace the value stored in the stack frame may belong to + * another CPU. */ andi. r0,r3,MSR_PR beq 1f @@ -654,6 +699,55 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) #endif /* CONFIG_PPC_BOOK3E */ + /* + * Something did happen, check if a re-emit is needed + * (this also clears paca->irq_happened) + */ +restore_check_irq_replay: + /* XXX: We could implement a fast path here where we check + * for irq_happened being just 0x01, in which case we can + * clear it and return. That means that we would potentially + * miss a decrementer having wrapped all the way around. + * + * Still, this might be useful for things like hash_page + */ + bl .__check_irq_replay + cmpwi cr0,r3,0 + beq restore_no_replay + + /* + * We need to re-emit an interrupt. We do so by re-using our + * existing exception frame. We first change the trap value, + * but we need to ensure we preserve the low nibble of it + */ + ld r4,_TRAP(r1) + clrldi r4,r4,60 + or r4,r4,r3 + std r4,_TRAP(r1) + + /* + * Then find the right handler and call it. Interrupts are + * still soft-disabled and we keep them that way. + */ + cmpwi cr0,r3,0x500 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .do_IRQ + b .ret_from_except +1: cmpwi cr0,r3,0x900 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .timer_interrupt + b .ret_from_except +#ifdef CONFIG_PPC_BOOK3E +1: cmpwi cr0,r3,0x280 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl .doorbell_exception + b .ret_from_except +#endif /* CONFIG_PPC_BOOK3E */ +1: b .ret_from_except /* What else to do here ? */ + do_work: #ifdef CONFIG_PREEMPT andi. r0,r3,MSR_PR /* Returning to user mode? */ @@ -666,18 +760,11 @@ do_work: crandc eq,cr1*4+eq,eq bne restore - /* Here we are preempting the current task. - * - * Ensure interrupts are soft-disabled. We also properly mark - * the PACA to reflect the fact that they are hard-disabled - * and trace the change + /* + * Here we are preempting the current task. We want to make + * sure we are soft-disabled first */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) - TRACE_DISABLE_INTS - - /* Call the scheduler with soft IRQs off */ + SOFT_DISABLE_INTS(r3,r4) 1: bl .preempt_schedule_irq /* Hard-disable interrupts again (and update PACA) */ @@ -687,8 +774,8 @@ do_work: ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ - li r0,0 - stb r0,PACAHARDIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) /* Re-test flags and eventually loop */ clrrdi r9,r1,THREAD_SHIFT @@ -710,14 +797,12 @@ user_work: andi. r0,r4,_TIF_NEED_RESCHED beq 1f - li r5,1 - TRACE_AND_RESTORE_IRQ(r5); + bl .restore_interrupts bl .schedule b .ret_from_except_lite 1: bl .save_nvgprs - li r5,1 - TRACE_AND_RESTORE_IRQ(r5); + bl .restore_interrupts addi r3,r1,STACK_FRAME_OVERHEAD bl .do_notify_resume b .ret_from_except diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index c4c34665c22..7215cc2495d 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -24,6 +24,7 @@ #include #include #include +#include /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... @@ -77,59 +78,55 @@ #define SPRN_MC_SRR1 SPRN_MCSRR1 #define NORMAL_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, GEN, addition##_GEN) + EXCEPTION_PROLOG(n, GEN, addition##_GEN(n)) #define CRIT_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, CRIT, addition##_CRIT) + EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n)) #define DBG_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, DBG, addition##_DBG) + EXCEPTION_PROLOG(n, DBG, addition##_DBG(n)) #define MC_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, MC, addition##_MC) + EXCEPTION_PROLOG(n, MC, addition##_MC(n)) /* Variants of the "addition" argument for the prolog */ -#define PROLOG_ADDITION_NONE_GEN -#define PROLOG_ADDITION_NONE_CRIT -#define PROLOG_ADDITION_NONE_DBG -#define PROLOG_ADDITION_NONE_MC +#define PROLOG_ADDITION_NONE_GEN(n) +#define PROLOG_ADDITION_NONE_CRIT(n) +#define PROLOG_ADDITION_NONE_DBG(n) +#define PROLOG_ADDITION_NONE_MC(n) -#define PROLOG_ADDITION_MASKABLE_GEN \ +#define PROLOG_ADDITION_MASKABLE_GEN(n) \ lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ cmpwi cr0,r11,0; /* yes -> go out of line */ \ - beq masked_interrupt_book3e; + beq masked_interrupt_book3e_##n -#define PROLOG_ADDITION_2REGS_GEN \ +#define PROLOG_ADDITION_2REGS_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); \ std r15,PACA_EXGEN+EX_R15(r13) -#define PROLOG_ADDITION_1REG_GEN \ +#define PROLOG_ADDITION_1REG_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); -#define PROLOG_ADDITION_2REGS_CRIT \ +#define PROLOG_ADDITION_2REGS_CRIT(n) \ std r14,PACA_EXCRIT+EX_R14(r13); \ std r15,PACA_EXCRIT+EX_R15(r13) -#define PROLOG_ADDITION_2REGS_DBG \ +#define PROLOG_ADDITION_2REGS_DBG(n) \ std r14,PACA_EXDBG+EX_R14(r13); \ std r15,PACA_EXDBG+EX_R15(r13) -#define PROLOG_ADDITION_2REGS_MC \ +#define PROLOG_ADDITION_2REGS_MC(n) \ std r14,PACA_EXMC+EX_R14(r13); \ std r15,PACA_EXMC+EX_R15(r13) -#define PROLOG_ADDITION_DOORBELL_GEN \ - lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ - cmpwi cr0,r11,0; /* yes -> go out of line */ \ - beq masked_doorbell_book3e - /* Core exception code for all exceptions except TLB misses. * XXX: Needs to make SPRN_SPRG_GEN depend on exception type */ #define EXCEPTION_COMMON(n, excf, ints) \ +exc_##n##_common: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ @@ -167,20 +164,21 @@ std r0,RESULT(r1); /* clear regs->result */ \ ints; -/* Variants for the "ints" argument */ +/* Variants for the "ints" argument. This one does nothing when we want + * to keep interrupts in their original state + */ #define INTS_KEEP -#define INTS_DISABLE_SOFT \ - stb r0,PACASOFTIRQEN(r13); /* mark interrupts soft-disabled */ \ - TRACE_DISABLE_INTS; -#define INTS_DISABLE_HARD \ - stb r0,PACAHARDIRQEN(r13); /* and hard disabled */ -#define INTS_DISABLE_ALL \ - INTS_DISABLE_SOFT \ - INTS_DISABLE_HARD - -/* This is called by exceptions that used INTS_KEEP (that is did not clear - * neither soft nor hard IRQ indicators in the PACA. This will restore MSR:EE - * to it's previous value + +/* This second version is meant for exceptions that don't immediately + * hard-enable. We set a bit in paca->irq_happened to ensure that + * a subsequent call to arch_local_irq_restore() will properly + * hard-enable and avoid the fast-path + */ +#define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4) + +/* This is called by exceptions that used INTS_KEEP (that did not touch + * irq indicators in the PACA). This will restore MSR:EE to it's previous + * value * * XXX In the long run, we may want to open-code it in order to separate the * load from the wrtee, thus limiting the latency caused by the dependency @@ -238,7 +236,7 @@ exc_##n##_bad_stack: \ #define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ - EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \ + EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \ ack(r8); \ CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -289,7 +287,7 @@ interrupt_end_book3e: /* Critical Input Interrupt */ START_EXCEPTION(critical_input); CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL) +// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -300,7 +298,7 @@ interrupt_end_book3e: /* Machine Check Interrupt */ START_EXCEPTION(machine_check); CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL) +// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) // bl special_reg_save_mc // addi r3,r1,STACK_FRAME_OVERHEAD // CHECK_NAPPING(); @@ -313,7 +311,7 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR - EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE_ALL) + EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE) b storage_fault_common /* Instruction Storage Interrupt */ @@ -321,7 +319,7 @@ interrupt_end_book3e: NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 - EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE_ALL) + EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE) b storage_fault_common /* External Input Interrupt */ @@ -339,12 +337,11 @@ interrupt_end_book3e: START_EXCEPTION(program); NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR - EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE_SOFT) + EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) bl .save_nvgprs - INTS_RESTORE_HARD bl .program_check_exception b .ret_from_except @@ -358,7 +355,7 @@ interrupt_end_book3e: beq- 1f bl .load_up_fpu b fast_exception_return -1: INTS_DISABLE_ALL +1: INTS_DISABLE bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .kernel_fp_unavailable_exception @@ -373,7 +370,7 @@ interrupt_end_book3e: /* Watchdog Timer Interrupt */ START_EXCEPTION(watchdog); CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL) +// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -392,7 +389,7 @@ interrupt_end_book3e: /* Auxiliary Processor Unavailable Interrupt */ START_EXCEPTION(ap_unavailable); NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) - EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE_ALL) + EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE) bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .unknown_exception @@ -450,7 +447,7 @@ interrupt_end_book3e: mfspr r15,SPRN_SPRG_CRIT_SCRATCH mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR - EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE_ALL) + EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 @@ -465,7 +462,7 @@ kernel_dbg_exc: /* Debug exception as a debug interrupt*/ START_EXCEPTION(debug_debug); - DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS) /* * If there is a single step or branch-taken exception in an @@ -515,7 +512,7 @@ kernel_dbg_exc: mfspr r15,SPRN_SPRG_DBG_SCRATCH mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR - EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) + EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 @@ -525,21 +522,20 @@ kernel_dbg_exc: bl .DebugException b .ret_from_except - MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) - -/* Doorbell interrupt */ - START_EXCEPTION(doorbell) - NORMAL_EXCEPTION_PROLOG(0x2070, PROLOG_ADDITION_DOORBELL) - EXCEPTION_COMMON(0x2070, PACA_EXGEN, INTS_DISABLE_ALL) - CHECK_NAPPING() + START_EXCEPTION(perfmon); + NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) addi r3,r1,STACK_FRAME_OVERHEAD - bl .doorbell_exception + bl .performance_monitor_exception b .ret_from_except_lite +/* Doorbell interrupt */ + MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE) + /* Doorbell critical Interrupt */ START_EXCEPTION(doorbell_crit); - CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL) + CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); // addi r3,r1,STACK_FRAME_OVERHEAD @@ -547,36 +543,114 @@ kernel_dbg_exc: // b ret_from_crit_except b . +/* Guest Doorbell */ MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) - MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE) - MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE) - MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE) +/* Guest Doorbell critical Interrupt */ + START_EXCEPTION(guest_doorbell_crit); + CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE) +// EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE) +// bl special_reg_save_crit +// CHECK_NAPPING(); +// addi r3,r1,STACK_FRAME_OVERHEAD +// bl .guest_doorbell_critical_exception +// b ret_from_crit_except + b . + +/* Hypervisor call */ + START_EXCEPTION(hypercall); + NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except + +/* Embedded Hypervisor priviledged */ + START_EXCEPTION(ehpriv); + NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except /* - * An interrupt came in while soft-disabled; clear EE in SRR1, - * clear paca->hard_enabled and return. + * An interrupt came in while soft-disabled; We mark paca->irq_happened + * accordingly and if the interrupt is level sensitive, we hard disable */ -masked_doorbell_book3e: - mtcr r10 - /* Resend the doorbell to fire again when ints enabled */ - mfspr r10,SPRN_PIR - PPC_MSGSND(r10) - b masked_interrupt_book3e_common -masked_interrupt_book3e: +masked_interrupt_book3e_0x500: + /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */ + li r11,PACA_IRQ_EE + b masked_interrupt_book3e_full_mask + +masked_interrupt_book3e_0x900: + ACK_DEC(r11); + li r11,PACA_IRQ_DEC + b masked_interrupt_book3e_no_mask +masked_interrupt_book3e_0x980: + ACK_FIT(r11); + li r11,PACA_IRQ_DEC + b masked_interrupt_book3e_no_mask +masked_interrupt_book3e_0x280: +masked_interrupt_book3e_0x2c0: + li r11,PACA_IRQ_DBELL + b masked_interrupt_book3e_no_mask + +masked_interrupt_book3e_no_mask: mtcr r10 -masked_interrupt_book3e_common: - stb r11,PACAHARDIRQEN(r13) + lbz r10,PACAIRQHAPPENED(r13) + or r10,r10,r11 + stb r10,PACAIRQHAPPENED(r13) + b 1f +masked_interrupt_book3e_full_mask: + mtcr r10 + lbz r10,PACAIRQHAPPENED(r13) + or r10,r10,r11 + stb r10,PACAIRQHAPPENED(r13) mfspr r10,SPRN_SRR1 rldicl r11,r10,48,1 /* clear MSR_EE */ rotldi r10,r11,16 mtspr SPRN_SRR1,r10 - ld r10,PACA_EXGEN+EX_R10(r13); /* restore registers */ +1: ld r10,PACA_EXGEN+EX_R10(r13); ld r11,PACA_EXGEN+EX_R11(r13); mfspr r13,SPRN_SPRG_GEN_SCRATCH; rfi b . +/* + * Called from arch_local_irq_enable when an interrupt needs + * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280 + * to indicate the kind of interrupt. MSR:EE is already off. + * We generate a stackframe like if a real interrupt had happened. + * + * Note: While MSR:EE is off, we need to make sure that _MSR + * in the generated frame has EE set to 1 or the exception + * handler will not properly re-enable them. + */ +_GLOBAL(__replay_interrupt) + /* We are going to jump to the exception common code which + * will retrieve various register values from the PACA which + * we don't give a damn about. + */ + mflr r10 + mfmsr r11 + mfcr r4 + mtspr SPRN_SPRG_GEN_SCRATCH,r13; + std r1,PACA_EXGEN+EX_R1(r13); + stw r4,PACA_EXGEN+EX_CR(r13); + ori r11,r11,MSR_EE + subi r1,r1,INT_FRAME_SIZE; + cmpwi cr0,r3,0x500 + beq exc_0x500_common + cmpwi cr0,r3,0x900 + beq exc_0x900_common + cmpwi cr0,r3,0x280 + beq exc_0x280_common + blr + /* * This is called from 0x300 and 0x400 handlers after the prologs with @@ -679,6 +753,8 @@ BAD_STACK_TRAMPOLINE(0x000) BAD_STACK_TRAMPOLINE(0x100) BAD_STACK_TRAMPOLINE(0x200) BAD_STACK_TRAMPOLINE(0x260) +BAD_STACK_TRAMPOLINE(0x280) +BAD_STACK_TRAMPOLINE(0x2a0) BAD_STACK_TRAMPOLINE(0x2c0) BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) @@ -696,11 +772,10 @@ BAD_STACK_TRAMPOLINE(0xa00) BAD_STACK_TRAMPOLINE(0xb00) BAD_STACK_TRAMPOLINE(0xc00) BAD_STACK_TRAMPOLINE(0xd00) +BAD_STACK_TRAMPOLINE(0xd08) BAD_STACK_TRAMPOLINE(0xe00) BAD_STACK_TRAMPOLINE(0xf00) BAD_STACK_TRAMPOLINE(0xf20) -BAD_STACK_TRAMPOLINE(0x2070) -BAD_STACK_TRAMPOLINE(0x2080) .globl bad_stack_book3e bad_stack_book3e: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 02448ea58ad..2d0868a4e2f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -12,6 +12,7 @@ * */ +#include #include #include @@ -356,34 +357,60 @@ do_stab_bolted_pSeries: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) /* - * An interrupt came in while soft-disabled; clear EE in SRR1, - * clear paca->hard_enabled and return. + * An interrupt came in while soft-disabled. We set paca->irq_happened, + * then, if it was a decrementer interrupt, we bump the dec to max and + * and return, else we hard disable and return. This is called with + * r10 containing the value to OR to the paca field. */ -masked_interrupt: - stb r10,PACAHARDIRQEN(r13) - mtcrf 0x80,r9 - ld r9,PACA_EXGEN+EX_R9(r13) - mfspr r10,SPRN_SRR1 - rldicl r10,r10,48,1 /* clear MSR_EE */ - rotldi r10,r10,16 - mtspr SPRN_SRR1,r10 - ld r10,PACA_EXGEN+EX_R10(r13) - GET_SCRATCH0(r13) - rfid +#define MASKED_INTERRUPT(_H) \ +masked_##_H##interrupt: \ + std r11,PACA_EXGEN+EX_R11(r13); \ + lbz r11,PACAIRQHAPPENED(r13); \ + or r11,r11,r10; \ + stb r11,PACAIRQHAPPENED(r13); \ + andi. r10,r10,PACA_IRQ_DEC; \ + beq 1f; \ + lis r10,0x7fff; \ + ori r10,r10,0xffff; \ + mtspr SPRN_DEC,r10; \ + b 2f; \ +1: mfspr r10,SPRN_##_H##SRR1; \ + rldicl r10,r10,48,1; /* clear MSR_EE */ \ + rotldi r10,r10,16; \ + mtspr SPRN_##_H##SRR1,r10; \ +2: mtcrf 0x80,r9; \ + ld r9,PACA_EXGEN+EX_R9(r13); \ + ld r10,PACA_EXGEN+EX_R10(r13); \ + ld r11,PACA_EXGEN+EX_R11(r13); \ + GET_SCRATCH0(r13); \ + ##_H##rfid; \ b . + + MASKED_INTERRUPT() + MASKED_INTERRUPT(H) -masked_Hinterrupt: - stb r10,PACAHARDIRQEN(r13) - mtcrf 0x80,r9 - ld r9,PACA_EXGEN+EX_R9(r13) - mfspr r10,SPRN_HSRR1 - rldicl r10,r10,48,1 /* clear MSR_EE */ - rotldi r10,r10,16 - mtspr SPRN_HSRR1,r10 - ld r10,PACA_EXGEN+EX_R10(r13) - GET_SCRATCH0(r13) - hrfid - b . +/* + * Called from arch_local_irq_enable when an interrupt needs + * to be resent. r3 contains 0x500 or 0x900 to indicate which + * kind of interrupt. MSR:EE is already off. We generate a + * stackframe like if a real interrupt had happened. + * + * Note: While MSR:EE is off, we need to make sure that _MSR + * in the generated frame has EE set to 1 or the exception + * handler will not properly re-enable them. + */ +_GLOBAL(__replay_interrupt) + /* We are going to jump to the exception common code which + * will retrieve various register values from the PACA which + * we don't give a damn about, so we don't bother storing them. + */ + mfmsr r12 + mflr r11 + mfcr r9 + ori r12,r12,MSR_EE + andi. r3,r3,0x0800 + bne decrementer_common + b hardware_interrupt_common #ifdef CONFIG_PPC_PSERIES /* @@ -793,7 +820,8 @@ vsx_unavailable_common: EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) #ifdef CONFIG_VSX BEGIN_FTR_SECTION - bne .load_up_vsx + beq 1f + b .load_up_vsx 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif @@ -807,65 +835,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) .globl __end_handlers __end_handlers: -/* - * Return from an exception with minimal checks. - * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. - * If interrupts have been enabled, or anything has been - * done that might have changed the scheduling status of - * any task or sent any task a signal, you should use - * ret_from_except or ret_from_except_lite instead of this. - */ -fast_exc_return_irq: /* restores irq state too */ - ld r3,SOFTE(r1) - TRACE_AND_RESTORE_IRQ(r3); - ld r12,_MSR(r1) - rldicl r4,r12,49,63 /* get MSR_EE to LSB */ - stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */ - b 1f - - .globl fast_exception_return -fast_exception_return: - ld r12,_MSR(r1) -1: ld r11,_NIP(r1) - andi. r3,r12,MSR_RI /* check if RI is set */ - beq- unrecov_fer - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - andi. r3,r12,MSR_PR - beq 2f - ACCOUNT_CPU_USER_EXIT(r3, r4) -2: -#endif - - ld r3,_CCR(r1) - ld r4,_LINK(r1) - ld r5,_CTR(r1) - ld r6,_XER(r1) - mtcr r3 - mtlr r4 - mtctr r5 - mtxer r6 - REST_GPR(0, r1) - REST_8GPRS(2, r1) - - ld r10,PACAKMSR(r13) - clrrdi r10,r10,2 /* clear RI */ - mtmsrd r10,1 - - mtspr SPRN_SRR1,r12 - mtspr SPRN_SRR0,r11 - REST_4GPRS(10, r1) - ld r1,GPR1(r1) - rfid - b . /* prevent speculative execution */ - -unrecov_fer: - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - - /* * Hash table stuff */ @@ -905,19 +874,16 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) * r4 contains the required access permissions * r5 contains the trap number * - * at return r3 = 0 for success + * at return r3 = 0 for success, 1 for page fault, negative for error */ bl .hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ - /* - * Here we have interrupts hard-disabled, so it is sufficient - * to restore paca->{soft,hard}_enable and get out. - */ + /* Success */ beq fast_exc_return_irq /* Return from exception on success */ - /* For a hash failure, we don't bother re-enabling interrupts */ - ble- 13f + /* Error */ + blt- 13f /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 40759fbfb17..58bddee8e1e 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -38,6 +38,7 @@ #include #include #include +#include /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -550,7 +551,8 @@ _GLOBAL(pmac_secondary_start) */ li r0,0 stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) @@ -601,9 +603,12 @@ __secondary_start: li r7,0 mtlr r7 - /* Mark interrupts both hard and soft disabled */ - stb r7,PACAHARDIRQEN(r13) + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ stb r7,PACASOFTIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) /* enable MMU and jump to start_secondary */ LOAD_REG_ADDR(r3, .start_secondary_prolog) @@ -750,13 +755,18 @@ _INIT_GLOBAL(start_here_common) /* Load the TOC (virtual address) */ ld r2,PACATOC(r13) + /* Do more system initializations in virtual mode */ bl .setup_system - /* Load up the kernel context */ -5: li r5,0 - stb r5,PACASOFTIRQEN(r13) /* Soft Disabled */ - stb r5,PACAHARDIRQEN(r13) /* Hard Disabled on others */ + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + /* Generic kernel entry */ bl .start_kernel /* Not reached */ diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 0a48bf5db6c..8f7a2b62863 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -84,7 +84,11 @@ void cpu_idle(void) start_critical_timings(); - local_irq_enable(); + /* Some power_save functions return with + * interrupts enabled, some don't. + */ + if (irqs_disabled()) + local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); } else { diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 16c002d6bdf..ff007b59448 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -29,43 +29,30 @@ _GLOBAL(book3e_idle) wrteei 0 /* Now check if an interrupt came in while we were soft disabled - * since we may otherwise lose it (doorbells etc...). We know - * that since PACAHARDIRQEN will have been cleared in that case. + * since we may otherwise lose it (doorbells etc...). */ - lbz r3,PACAHARDIRQEN(r13) + lbz r3,PACAIRQHAPPENED(r13) cmpwi cr0,r3,0 - beqlr + bnelr - /* Now we are going to mark ourselves as soft and hard enables in + /* Now we are going to mark ourselves as soft and hard enabled in * order to be able to take interrupts while asleep. We inform lockdep * of that. We don't actually turn interrupts on just yet tho. */ #ifdef CONFIG_TRACE_IRQFLAGS stdu r1,-128(r1) bl .trace_hardirqs_on + addi r1,r1,128 #endif li r0,1 stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) /* Interrupts will make use return to LR, so get something we want * in there */ bl 1f - /* Hard disable interrupts again */ - wrteei 0 - - /* Mark them off again in the PACA as well */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) - - /* Tell lockdep about it */ -#ifdef CONFIG_TRACE_IRQFLAGS - bl .trace_hardirqs_off - addi r1,r1,128 -#endif + /* And return (interrupts are on) */ ld r0,16(r1) mtlr r0 blr diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index ba319547860..d8cdba4c28b 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -14,6 +14,7 @@ #include #include #include +#include #undef DEBUG @@ -29,14 +30,31 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) cmpwi 0,r4,0 beqlr - /* Go to NAP now */ + /* Hard disable interrupts */ mfmsr r7 rldicl r0,r7,48,1 rotldi r0,r0,16 - mtmsrd r0,1 /* hard-disable interrupts */ + mtmsrd r0,1 + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + bnelr + + /* Soft-enable interrupts */ +#ifdef CONFIG_TRACE_IRQFLAGS + mflr r0 + std r0,16(r1) + stdu r1,-128(r1) + bl .trace_hardirqs_on + addi r1,r1,128 + ld r0,16(r1) + mtlr r0 +#endif /* CONFIG_TRACE_IRQFLAGS */ + + TRACE_ENABLE_INTS li r0,1 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ - stb r0,PACAHARDIRQEN(r13) BEGIN_FTR_SECTION DSSALL sync diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index fcdff198da4..0cdc9a39283 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -1,5 +1,5 @@ /* - * This file contains the power_save function for 970-family CPUs. + * This file contains the power_save function for Power7 CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -15,6 +15,7 @@ #include #include #include +#include #undef DEBUG @@ -51,9 +52,25 @@ _GLOBAL(power7_idle) rldicl r9,r9,48,1 rotldi r9,r9,16 mtmsrd r9,1 /* hard-disable interrupts */ + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + beq 1f + addi r1,r1,INT_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr + +1: /* We mark irqs hard disabled as this is the state we'll + * be in when returning and we need to tell arch_local_irq_restore() + * about it + */ + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* We haven't lost state ... yet */ li r0,0 - stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ - stb r0,PACAHARDIRQEN(r13) stb r0,PACA_NAPSTATELOST(r13) /* Continue saving state */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9b6e80668cf..eb804e15b29 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -95,14 +95,14 @@ extern int tau_interrupts(int); int distribute_irqs = 1; -static inline notrace unsigned long get_hard_enabled(void) +static inline notrace unsigned long get_irq_happened(void) { - unsigned long enabled; + unsigned long happened; __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (enabled) : "i" (offsetof(struct paca_struct, hard_enabled))); + : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); - return enabled; + return happened; } static inline notrace void set_soft_enabled(unsigned long enable) @@ -111,88 +111,167 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -static inline notrace void decrementer_check_overflow(void) +static inline notrace int decrementer_check_overflow(void) { - u64 now = get_tb_or_rtc(); - u64 *next_tb; - - preempt_disable(); - next_tb = &__get_cpu_var(decrementers_next_tb); - + u64 now = get_tb_or_rtc(); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + if (now >= *next_tb) set_dec(1); - preempt_enable(); + return now >= *next_tb; } -notrace void arch_local_irq_restore(unsigned long en) +/* This is called whenever we are re-enabling interrupts + * and returns either 0 (nothing to do) or 500/900 if there's + * either an EE or a DEC to generate. + * + * This is called in two contexts: From arch_local_irq_restore() + * before soft-enabling interrupts, and from the exception exit + * path when returning from an interrupt from a soft-disabled to + * a soft enabled context. In both case we have interrupts hard + * disabled. + * + * We take care of only clearing the bits we handled in the + * PACA irq_happened field since we can only re-emit one at a + * time and we don't want to "lose" one. + */ +notrace unsigned int __check_irq_replay(void) { /* - * get_paca()->soft_enabled = en; - * Is it ever valid to use local_irq_restore(0) when soft_enabled is 1? - * That was allowed before, and in such a case we do need to take care - * that gcc will set soft_enabled directly via r13, not choose to use - * an intermediate register, lest we're preempted to a different cpu. + * We use local_paca rather than get_paca() to avoid all + * the debug_smp_processor_id() business in this low level + * function */ - set_soft_enabled(en); - if (!en) - return; + unsigned char happened = local_paca->irq_happened; -#ifdef CONFIG_PPC_STD_MMU_64 - if (firmware_has_feature(FW_FEATURE_ISERIES)) { - /* - * Do we need to disable preemption here? Not really: in the - * unlikely event that we're preempted to a different cpu in - * between getting r13, loading its lppaca_ptr, and loading - * its any_int, we might call iseries_handle_interrupts without - * an interrupt pending on the new cpu, but that's no disaster, - * is it? And the business of preempting us off the old cpu - * would itself involve a local_irq_restore which handles the - * interrupt to that cpu. - * - * But use "local_paca->lppaca_ptr" instead of "get_lppaca()" - * to avoid any preemption checking added into get_paca(). - */ - if (local_paca->lppaca_ptr->int_dword.any_int) - iseries_handle_interrupts(); + /* Clear bit 0 which we wouldn't clear otherwise */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + + /* + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. + */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); } -#endif /* CONFIG_PPC_STD_MMU_64 */ /* - * if (get_paca()->hard_enabled) return; - * But again we need to take care that gcc gets hard_enabled directly - * via r13, not choose to use an intermediate register, lest we're - * preempted to a different cpu in between the two instructions. + * We may have missed a decrementer interrupt. We check the + * decrementer itself rather than the paca irq_happened field + * in case we also had a rollover while hard disabled + */ + local_paca->irq_happened &= ~PACA_IRQ_DEC; + if (decrementer_check_overflow()) + return 0x900; + + /* Finally check if an external interrupt happened */ + local_paca->irq_happened &= ~PACA_IRQ_EE; + if (happened & PACA_IRQ_EE) + return 0x500; + +#ifdef CONFIG_PPC_BOOK3E + /* Finally check if an EPR external interrupt happened + * this bit is typically set if we need to handle another + * "edge" interrupt from within the MPIC "EPR" handler + */ + local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; + if (happened & PACA_IRQ_EE_EDGE) + return 0x500; + + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + if (happened & PACA_IRQ_DBELL) + return 0x280; +#endif /* CONFIG_PPC_BOOK3E */ + + /* There should be nothing left ! */ + BUG_ON(local_paca->irq_happened != 0); + + return 0; +} + +notrace void arch_local_irq_restore(unsigned long en) +{ + unsigned char irq_happened; + unsigned int replay; + + /* Write the new soft-enabled value */ + set_soft_enabled(en); + if (!en) + return; + /* + * From this point onward, we can take interrupts, preempt, + * etc... unless we got hard-disabled. We check if an event + * happened. If none happened, we know we can just return. + * + * We may have preempted before the check below, in which case + * we are checking the "new" CPU instead of the old one. This + * is only a problem if an event happened on the "old" CPU. + * + * External interrupt events on non-iseries will have caused + * interrupts to be hard-disabled, so there is no problem, we + * cannot have preempted. + * + * That leaves us with EEs on iSeries or decrementer interrupts, + * which I decided to safely ignore. The preemption would have + * itself been the result of an interrupt, upon which return we + * will have checked for pending events on the old CPU. */ - if (get_hard_enabled()) + irq_happened = get_irq_happened(); + if (!irq_happened) return; /* - * Need to hard-enable interrupts here. Since currently disabled, - * no need to take further asm precautions against preemption; but - * use local_paca instead of get_paca() to avoid preemption checking. + * We need to hard disable to get a trusted value from + * __check_irq_replay(). We also need to soft-disable + * again to avoid warnings in there due to the use of + * per-cpu variables. + * + * We know that if the value in irq_happened is exactly 0x01 + * then we are already hard disabled (there are other less + * common cases that we'll ignore for now), so we skip the + * (expensive) mtmsrd. */ - local_paca->hard_enabled = en; + if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) + __hard_irq_disable(); + set_soft_enabled(0); /* - * Trigger the decrementer if we have a pending event. Some processors - * only trigger on edge transitions of the sign bit. We might also - * have disabled interrupts long enough that the decrementer wrapped - * to positive. + * Check if anything needs to be re-emitted. We haven't + * soft-enabled yet to avoid warnings in decrementer_check_overflow + * accessing per-cpu variables */ - decrementer_check_overflow(); + replay = __check_irq_replay(); + + /* We can soft-enable now */ + set_soft_enabled(1); /* - * Force the delivery of pending soft-disabled interrupts on PS3. - * Any HV call will have this side effect. + * And replay if we have to. This will return with interrupts + * hard-enabled. */ - if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp, tmp2; - lv1_get_version_info(&tmp, &tmp2); + if (replay) { + __replay_interrupt(replay); + return; } + /* Finally, let's ensure we are hard enabled */ __hard_irq_enable(); } EXPORT_SYMBOL(arch_local_irq_restore); + +/* + * This is specifically called by assembly code to re-enable interrupts + * if they are currently disabled. This is typically called before + * schedule() or do_signal() when returning to userspace. We do it + * in C to avoid the burden of dealing with lockdep etc... + */ +void restore_interrupts(void) +{ + if (irqs_disabled()) + local_irq_enable(); +} + #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) @@ -360,8 +439,17 @@ void do_IRQ(struct pt_regs *regs) check_stack_overflow(); + /* + * Query the platform PIC for the interrupt & ack it. + * + * This will typically lower the interrupt line to the CPU + */ irq = ppc_md.get_irq(); + /* We can hard enable interrupts now */ + may_hard_irq_enable(); + + /* And finally process it */ if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) handle_one_irq(irq); else if (irq != NO_IRQ_IGNORE) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bf80a1d5f8f..e40707032ac 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -647,6 +647,9 @@ void show_regs(struct pt_regs * regs) printk("MSR: "REG" ", regs->msr); printbits(regs->msr, msr_bits); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); +#ifdef CONFIG_PPC64 + printk("SOFTE: %ld\n", regs->softe); +#endif trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) printk("CFAR: "REG"\n", regs->orig_gpr3); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 567dd7c3ac2..f81c81b92f0 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -259,7 +259,6 @@ void accumulate_stolen_time(void) u64 sst, ust; u8 save_soft_enabled = local_paca->soft_enabled; - u8 save_hard_enabled = local_paca->hard_enabled; /* We are called early in the exception entry, before * soft/hard_enabled are sync'ed to the expected state @@ -268,7 +267,6 @@ void accumulate_stolen_time(void) * complain */ local_paca->soft_enabled = 0; - local_paca->hard_enabled = 0; sst = scan_dispatch_log(local_paca->starttime_user); ust = scan_dispatch_log(local_paca->starttime); @@ -277,7 +275,6 @@ void accumulate_stolen_time(void) local_paca->stolen_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; - local_paca->hard_enabled = save_hard_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) @@ -580,6 +577,11 @@ void timer_interrupt(struct pt_regs * regs) if (!cpu_online(smp_processor_id())) return; + /* Conditionally hard-enable interrupts now that the DEC has been + * bumped to its maximum value + */ + may_hard_irq_enable(); + trace_timer_interrupt_entry(regs); __get_cpu_var(irq_stat).timer_irqs++; diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 085fd3f45ad..a12e95af693 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -96,6 +96,20 @@ out: return index; } +static void check_and_cede_processor(void) +{ + /* + * Interrupts are soft-disabled at this point, + * but not hard disabled. So an interrupt might have + * occurred before entering NAP, and would be potentially + * lost (edge events, decrementer events, etc...) unless + * we first hard disable then check. + */ + hard_irq_disable(); + if (get_paca()->irq_happened == 0) + cede_processor(); +} + static int dedicated_cede_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -108,7 +122,7 @@ static int dedicated_cede_loop(struct cpuidle_device *dev, ppc64_runlatch_off(); HMT_medium(); - cede_processor(); + check_and_cede_processor(); get_lppaca()->donate_dedicated_cpu = 0; dev->last_residency = @@ -132,7 +146,7 @@ static int shared_cede_loop(struct cpuidle_device *dev, * processor. When returning here, external interrupts * are enabled. */ - cede_processor(); + check_and_cede_processor(); dev->last_residency = (int)idle_loop_epilog(in_purr, kt_before); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 63846ebd327..974a47b3c9b 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1437,8 +1437,8 @@ static void excprint(struct pt_regs *fp) printf(" current = 0x%lx\n", current); #ifdef CONFIG_PPC64 - printf(" paca = 0x%lx\t softe: %d\t harde: %d\n", - local_paca, local_paca->soft_enabled, local_paca->hard_enabled); + printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n", + local_paca, local_paca->soft_enabled, local_paca->irq_happened); #endif if (current) { printf(" pid = %ld, comm = %s\n", -- cgit v1.2.3-70-g09d2 From f0b8b3417d836f89d873f6d5de43d54f02cb11e2 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 5 Jan 2012 12:37:16 -0600 Subject: powerpc/fsl-booke: Fixup calc_cam_sz to support MMU v2 The registers that describe size supported by TLB are different on MMU v2 as well as we support power of two page sizes. For now we continue to assume that FSL variable size array supports all page sizes up to the maximum one reported in TLB1PS. Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/reg_booke.h | 1 + arch/powerpc/mm/fsl_booke_mmu.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 500fe1dc43e..8a97aa7289d 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -62,6 +62,7 @@ #define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */ #define SPRN_MAS8 0x155 /* MMU Assist Register 8 */ #define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */ +#define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */ #define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */ #define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */ #define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */ diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 66a6fd38e9c..07ba45b0f07 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -149,12 +149,19 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, phys_addr_t phys) { - unsigned int camsize = __ilog2(ram) & ~1U; - unsigned int align = __ffs(virt | phys) & ~1U; - unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf; - - /* Convert (4^max) kB to (2^max) bytes */ - max_cam = max_cam * 2 + 10; + unsigned int camsize = __ilog2(ram); + unsigned int align = __ffs(virt | phys); + unsigned long max_cam; + + if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { + /* Convert (4^max) kB to (2^max) bytes */ + max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10; + camsize &= ~1U; + align &= ~1U; + } else { + /* Convert (2^max) kB to (2^max) bytes */ + max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10; + } if (camsize > align) camsize = align; -- cgit v1.2.3-70-g09d2 From 10241842fbe900276634fee8d37ec48a7d8a762f Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sun, 6 Nov 2011 11:51:07 -0600 Subject: powerpc: Add initial e6500 cpu support Add basic support for e6500 core in its single threaded mode. Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/cputable.h | 12 ++++++++---- arch/powerpc/kernel/cputable.c | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index ad55a1ccb9f..b9219e99bd2 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -390,6 +390,10 @@ extern const char *powerpc_base_platform; CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC) +#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ + CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ + CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ @@ -442,7 +446,7 @@ extern const char *powerpc_base_platform; #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500 | CPU_FTRS_A2) +#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ @@ -483,7 +487,7 @@ enum { #endif #ifdef CONFIG_E500 CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC | - CPU_FTRS_E5500 | + CPU_FTRS_E5500 | CPU_FTRS_E6500 | #endif 0, }; @@ -491,7 +495,7 @@ enum { #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500 & CPU_FTRS_A2) +#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2) #else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \ @@ -528,7 +532,7 @@ enum { #endif #ifdef CONFIG_E500 CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC & - CPU_FTRS_E5500 & + CPU_FTRS_E5500 & CPU_FTRS_E6500 & #endif CPU_FTRS_POSSIBLE, }; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 81db9e2a8a2..4dccf51064e 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2019,6 +2019,24 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce5500", }, + { /* e6500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80400000, + .cpu_name = "e6500", + .cpu_features = CPU_FTRS_E6500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | + MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e6500", + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .cpu_setup = __setup_cpu_e5500, + .cpu_restore = __restore_cpu_e5500, + .machine_check = machine_check_e500mc, + .platform = "ppce6500", + }, #ifdef CONFIG_PPC32 { /* default match */ .pvr_mask = 0x00000000, -- cgit v1.2.3-70-g09d2 From c141b38f8643b46701c9db819909120c5b667a26 Mon Sep 17 00:00:00 2001 From: Zhicheng Fan Date: Wed, 22 Feb 2012 13:44:07 +0800 Subject: powerpc/85xx: Abstract common define of signal multiplex control for qe The mpc85xx_rdb and mpc85xx_mds have commom define of signal multiplex for qe, so they need to go in common header, the patch abstract them to fsl_guts.h Signed-off-by: Zhicheng Fan Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/fsl_guts.h | 6 +++++- arch/powerpc/platforms/85xx/mpc85xx_mds.c | 35 +++++++++++++------------------ 2 files changed, 20 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h index bebd12463ec..ce04530d200 100644 --- a/arch/powerpc/include/asm/fsl_guts.h +++ b/arch/powerpc/include/asm/fsl_guts.h @@ -4,7 +4,7 @@ * Authors: Jeff Brown * Timur Tabi * - * Copyright 2004,2007 Freescale Semiconductor, Inc + * Copyright 2004,2007,2012 Freescale Semiconductor, Inc * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -114,6 +114,10 @@ struct ccsr_guts_86xx { __be32 srds2cr1; /* 0x.0f44 - SerDes2 Control Register 0 */ } __attribute__ ((packed)); + +/* Alternate function signal multiplex control */ +#define MPC85xx_PMUXCR_QE(x) (0x8000 >> (x)) + #ifdef CONFIG_PPC_86xx #define CCSR_GUTS_DMACR_DEV_SSI 0 /* DMA controller/channel set to SSI */ diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 57aceb5d273..f33662b46b8 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -1,5 +1,6 @@ /* - * Copyright (C) Freescale Semicondutor, Inc. 2006-2010. All rights reserved. + * Copyright (C) 2006-2010, 2012 Freescale Semicondutor, Inc. + * All rights reserved. * * Author: Andy Fleming * @@ -51,6 +52,7 @@ #include #include #include +#include #include "smp.h" #include "mpc85xx.h" @@ -268,34 +270,27 @@ static void __init mpc85xx_mds_qe_init(void) mpc85xx_mds_reset_ucc_phys(); if (machine_is(p1021_mds)) { -#define MPC85xx_PMUXCR_OFFSET 0x60 -#define MPC85xx_PMUXCR_QE0 0x00008000 -#define MPC85xx_PMUXCR_QE3 0x00001000 -#define MPC85xx_PMUXCR_QE9 0x00000040 -#define MPC85xx_PMUXCR_QE12 0x00000008 - static __be32 __iomem *pmuxcr; - np = of_find_node_by_name(NULL, "global-utilities"); + struct ccsr_guts_85xx __iomem *guts; + np = of_find_node_by_name(NULL, "global-utilities"); if (np) { - pmuxcr = of_iomap(np, 0) + MPC85xx_PMUXCR_OFFSET; - - if (!pmuxcr) - printk(KERN_EMERG "Error: Alternate function" - " signal multiplex control register not" - " mapped!\n"); - else + guts = of_iomap(np, 0); + if (!guts) + pr_err("mpc85xx-rdb: could not map global utilities register\n"); + else{ /* P1021 has pins muxed for QE and other functions. To * enable QE UEC mode, we need to set bit QE0 for UCC1 * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9 * and QE12 for QE MII management signals in PMUXCR * register. */ - setbits32(pmuxcr, MPC85xx_PMUXCR_QE0 | - MPC85xx_PMUXCR_QE3 | - MPC85xx_PMUXCR_QE9 | - MPC85xx_PMUXCR_QE12); - + setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) | + MPC85xx_PMUXCR_QE(3) | + MPC85xx_PMUXCR_QE(9) | + MPC85xx_PMUXCR_QE(12)); + iounmap(guts); + } of_node_put(np); } -- cgit v1.2.3-70-g09d2 From 8626816e905e8e131663dfcae27d307df8c220d0 Mon Sep 17 00:00:00 2001 From: Jia Hongtao Date: Fri, 17 Feb 2012 10:49:03 +0800 Subject: powerpc: add support for MPIC message register API Some MPIC implementations contain one or more blocks of message registers that are used to send messages between cores via IPIs. A simple API has been added to access (get/put, read, write, etc ...) these message registers. The available message registers are initially discovered via nodes in the device tree. A separate commit contains a binding for the message register nodes. Signed-off-by: Meador Inge Signed-off-by: Jia Hongtao Signed-off-by: Li Yang Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/mpic_msgr.h | 132 ++++++++++++++++ arch/powerpc/platforms/Kconfig | 8 + arch/powerpc/sysdev/Makefile | 2 + arch/powerpc/sysdev/mpic_msgr.c | 282 +++++++++++++++++++++++++++++++++++ 4 files changed, 424 insertions(+) create mode 100644 arch/powerpc/include/asm/mpic_msgr.h create mode 100644 arch/powerpc/sysdev/mpic_msgr.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mpic_msgr.h b/arch/powerpc/include/asm/mpic_msgr.h new file mode 100644 index 00000000000..3ec37dc9003 --- /dev/null +++ b/arch/powerpc/include/asm/mpic_msgr.h @@ -0,0 +1,132 @@ +/* + * Copyright 2011-2012, Meador Inge, Mentor Graphics Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + */ + +#ifndef _ASM_MPIC_MSGR_H +#define _ASM_MPIC_MSGR_H + +#include +#include + +struct mpic_msgr { + u32 __iomem *base; + u32 __iomem *mer; + int irq; + unsigned char in_use; + raw_spinlock_t lock; + int num; +}; + +/* Get a message register + * + * @reg_num: the MPIC message register to get + * + * A pointer to the message register is returned. If + * the message register asked for is already in use, then + * EBUSY is returned. If the number given is not associated + * with an actual message register, then ENODEV is returned. + * Successfully getting the register marks it as in use. + */ +extern struct mpic_msgr *mpic_msgr_get(unsigned int reg_num); + +/* Relinquish a message register + * + * @msgr: the message register to return + * + * Disables the given message register and marks it as free. + * After this call has completed successully the message + * register is available to be acquired by a call to + * mpic_msgr_get. + */ +extern void mpic_msgr_put(struct mpic_msgr *msgr); + +/* Enable a message register + * + * @msgr: the message register to enable + * + * The given message register is enabled for sending + * messages. + */ +extern void mpic_msgr_enable(struct mpic_msgr *msgr); + +/* Disable a message register + * + * @msgr: the message register to disable + * + * The given message register is disabled for sending + * messages. + */ +extern void mpic_msgr_disable(struct mpic_msgr *msgr); + +/* Write a message to a message register + * + * @msgr: the message register to write to + * @message: the message to write + * + * The given 32-bit message is written to the given message + * register. Writing to an enabled message registers fires + * an interrupt. + */ +static inline void mpic_msgr_write(struct mpic_msgr *msgr, u32 message) +{ + out_be32(msgr->base, message); +} + +/* Read a message from a message register + * + * @msgr: the message register to read from + * + * Returns the 32-bit value currently in the given message register. + * Upon reading the register any interrupts for that register are + * cleared. + */ +static inline u32 mpic_msgr_read(struct mpic_msgr *msgr) +{ + return in_be32(msgr->base); +} + +/* Clear a message register + * + * @msgr: the message register to clear + * + * Clears any interrupts associated with the given message register. + */ +static inline void mpic_msgr_clear(struct mpic_msgr *msgr) +{ + (void) mpic_msgr_read(msgr); +} + +/* Set the destination CPU for the message register + * + * @msgr: the message register whose destination is to be set + * @cpu_num: the Linux CPU number to bind the message register to + * + * Note that the CPU number given is the CPU number used by the kernel + * and *not* the actual hardware CPU number. + */ +static inline void mpic_msgr_set_destination(struct mpic_msgr *msgr, + u32 cpu_num) +{ + out_be32(msgr->base, 1 << get_hard_smp_processor_id(cpu_num)); +} + +/* Get the IRQ number for the message register + * @msgr: the message register whose IRQ is to be returned + * + * Returns the IRQ number associated with the given message register. + * NO_IRQ is returned if this message register is not capable of + * receiving interrupts. What message register can and cannot receive + * interrupts is specified in the device tree for the system. + */ +static inline int mpic_msgr_get_irq(struct mpic_msgr *msgr) +{ + return msgr->irq; +} + +#endif diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 236ab67bc3a..a35ca44ade6 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -86,6 +86,14 @@ config MPIC_WEIRD bool default n +config MPIC_MSGR + bool "MPIC message register support" + depends on MPIC + default n + help + Enables support for the MPIC message registers. These + registers are used for inter-processor communication. + config PPC_I8259 bool default n diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index f80ff9f5f44..1bd7ecb2462 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -4,6 +4,8 @@ ccflags-$(CONFIG_PPC64) := -mno-minimal-toc mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) +mpic-msgr-obj-$(CONFIG_MPIC_MSGR) += mpic_msgr.o +obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) $(mpic-msgr-obj-y) obj-$(CONFIG_PPC_EPAPR_HV_PIC) += ehv_pic.o fsl-msi-obj-$(CONFIG_PCI_MSI) += fsl_msi.o obj-$(CONFIG_PPC_MSI_BITMAP) += msi_bitmap.o diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c new file mode 100644 index 00000000000..6e7fa386e76 --- /dev/null +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -0,0 +1,282 @@ +/* + * Copyright 2011-2012, Meador Inge, Mentor Graphics Corporation. + * + * Some ideas based on un-pushed work done by Vivek Mahajan, Jason Jin, and + * Mingkai Hu from Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#define MPIC_MSGR_REGISTERS_PER_BLOCK 4 +#define MPIC_MSGR_STRIDE 0x10 +#define MPIC_MSGR_MER_OFFSET 0x100 +#define MSGR_INUSE 0 +#define MSGR_FREE 1 + +static struct mpic_msgr **mpic_msgrs; +static unsigned int mpic_msgr_count; + +static inline void _mpic_msgr_mer_write(struct mpic_msgr *msgr, u32 value) +{ + out_be32(msgr->mer, value); +} + +static inline u32 _mpic_msgr_mer_read(struct mpic_msgr *msgr) +{ + return in_be32(msgr->mer); +} + +static inline void _mpic_msgr_disable(struct mpic_msgr *msgr) +{ + u32 mer = _mpic_msgr_mer_read(msgr); + + _mpic_msgr_mer_write(msgr, mer & ~(1 << msgr->num)); +} + +struct mpic_msgr *mpic_msgr_get(unsigned int reg_num) +{ + unsigned long flags; + struct mpic_msgr *msgr; + + /* Assume busy until proven otherwise. */ + msgr = ERR_PTR(-EBUSY); + + if (reg_num >= mpic_msgr_count) + return ERR_PTR(-ENODEV); + + raw_spin_lock_irqsave(&msgr->lock, flags); + if (mpic_msgrs[reg_num]->in_use == MSGR_FREE) { + msgr = mpic_msgrs[reg_num]; + msgr->in_use = MSGR_INUSE; + } + raw_spin_unlock_irqrestore(&msgr->lock, flags); + + return msgr; +} +EXPORT_SYMBOL_GPL(mpic_msgr_get); + +void mpic_msgr_put(struct mpic_msgr *msgr) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&msgr->lock, flags); + msgr->in_use = MSGR_FREE; + _mpic_msgr_disable(msgr); + raw_spin_unlock_irqrestore(&msgr->lock, flags); +} +EXPORT_SYMBOL_GPL(mpic_msgr_put); + +void mpic_msgr_enable(struct mpic_msgr *msgr) +{ + unsigned long flags; + u32 mer; + + raw_spin_lock_irqsave(&msgr->lock, flags); + mer = _mpic_msgr_mer_read(msgr); + _mpic_msgr_mer_write(msgr, mer | (1 << msgr->num)); + raw_spin_unlock_irqrestore(&msgr->lock, flags); +} +EXPORT_SYMBOL_GPL(mpic_msgr_enable); + +void mpic_msgr_disable(struct mpic_msgr *msgr) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&msgr->lock, flags); + _mpic_msgr_disable(msgr); + raw_spin_unlock_irqrestore(&msgr->lock, flags); +} +EXPORT_SYMBOL_GPL(mpic_msgr_disable); + +/* The following three functions are used to compute the order and number of + * the message register blocks. They are clearly very inefficent. However, + * they are called *only* a few times during device initialization. + */ +static unsigned int mpic_msgr_number_of_blocks(void) +{ + unsigned int count; + struct device_node *aliases; + + count = 0; + aliases = of_find_node_by_name(NULL, "aliases"); + + if (aliases) { + char buf[32]; + + for (;;) { + snprintf(buf, sizeof(buf), "mpic-msgr-block%d", count); + if (!of_find_property(aliases, buf, NULL)) + break; + + count += 1; + } + } + + return count; +} + +static unsigned int mpic_msgr_number_of_registers(void) +{ + return mpic_msgr_number_of_blocks() * MPIC_MSGR_REGISTERS_PER_BLOCK; +} + +static int mpic_msgr_block_number(struct device_node *node) +{ + struct device_node *aliases; + unsigned int index, number_of_blocks; + char buf[64]; + + number_of_blocks = mpic_msgr_number_of_blocks(); + aliases = of_find_node_by_name(NULL, "aliases"); + if (!aliases) + return -1; + + for (index = 0; index < number_of_blocks; ++index) { + struct property *prop; + + snprintf(buf, sizeof(buf), "mpic-msgr-block%d", index); + prop = of_find_property(aliases, buf, NULL); + if (node == of_find_node_by_path(prop->value)) + break; + } + + return index == number_of_blocks ? -1 : index; +} + +/* The probe function for a single message register block. + */ +static __devinit int mpic_msgr_probe(struct platform_device *dev) +{ + void __iomem *msgr_block_addr; + int block_number; + struct resource rsrc; + unsigned int i; + unsigned int irq_index; + struct device_node *np = dev->dev.of_node; + unsigned int receive_mask; + const unsigned int *prop; + + if (!np) { + dev_err(&dev->dev, "Device OF-Node is NULL"); + return -EFAULT; + } + + /* Allocate the message register array upon the first device + * registered. + */ + if (!mpic_msgrs) { + mpic_msgr_count = mpic_msgr_number_of_registers(); + dev_info(&dev->dev, "Found %d message registers\n", + mpic_msgr_count); + + mpic_msgrs = kzalloc(sizeof(struct mpic_msgr) * mpic_msgr_count, + GFP_KERNEL); + if (!mpic_msgrs) { + dev_err(&dev->dev, + "No memory for message register blocks\n"); + return -ENOMEM; + } + } + dev_info(&dev->dev, "Of-device full name %s\n", np->full_name); + + /* IO map the message register block. */ + of_address_to_resource(np, 0, &rsrc); + msgr_block_addr = ioremap(rsrc.start, rsrc.end - rsrc.start); + if (!msgr_block_addr) { + dev_err(&dev->dev, "Failed to iomap MPIC message registers"); + return -EFAULT; + } + + /* Ensure the block has a defined order. */ + block_number = mpic_msgr_block_number(np); + if (block_number < 0) { + dev_err(&dev->dev, + "Failed to find message register block alias\n"); + return -ENODEV; + } + dev_info(&dev->dev, "Setting up message register block %d\n", + block_number); + + /* Grab the receive mask which specifies what registers can receive + * interrupts. + */ + prop = of_get_property(np, "mpic-msgr-receive-mask", NULL); + receive_mask = (prop) ? *prop : 0xF; + + /* Build up the appropriate message register data structures. */ + for (i = 0, irq_index = 0; i < MPIC_MSGR_REGISTERS_PER_BLOCK; ++i) { + struct mpic_msgr *msgr; + unsigned int reg_number; + + msgr = kzalloc(sizeof(struct mpic_msgr), GFP_KERNEL); + if (!msgr) { + dev_err(&dev->dev, "No memory for message register\n"); + return -ENOMEM; + } + + reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i; + msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE; + msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET; + msgr->in_use = MSGR_FREE; + msgr->num = i; + raw_spin_lock_init(&msgr->lock); + + if (receive_mask & (1 << i)) { + struct resource irq; + + if (of_irq_to_resource(np, irq_index, &irq) == NO_IRQ) { + dev_err(&dev->dev, + "Missing interrupt specifier"); + kfree(msgr); + return -EFAULT; + } + msgr->irq = irq.start; + irq_index += 1; + } else { + msgr->irq = NO_IRQ; + } + + mpic_msgrs[reg_number] = msgr; + mpic_msgr_disable(msgr); + dev_info(&dev->dev, "Register %d initialized: irq %d\n", + reg_number, msgr->irq); + + } + + return 0; +} + +static const struct of_device_id mpic_msgr_ids[] = { + { + .compatible = "fsl,mpic-v3.1-msgr", + .data = NULL, + }, + {} +}; + +static struct platform_driver mpic_msgr_driver = { + .driver = { + .name = "mpic-msgr", + .owner = THIS_MODULE, + .of_match_table = mpic_msgr_ids, + }, + .probe = mpic_msgr_probe, +}; + +static __init int mpic_msgr_init(void) +{ + return platform_driver_register(&mpic_msgr_driver); +} +subsys_initcall(mpic_msgr_init); -- cgit v1.2.3-70-g09d2 From f5339277eb8d3aed37f12a27988366f68ab68930 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 15 Mar 2012 18:18:00 +0000 Subject: powerpc: Remove FW_FEATURE ISERIES from arch code This is no longer selectable, so just remove all the dependent code. Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/abs_addr.h | 21 +------ arch/powerpc/include/asm/firmware.h | 9 --- arch/powerpc/include/asm/time.h | 15 ----- arch/powerpc/kernel/irq.c | 14 ----- arch/powerpc/kernel/isa-bridge.c | 3 - arch/powerpc/kernel/lparcfg.c | 108 +++------------------------------- arch/powerpc/kernel/paca.c | 12 +--- arch/powerpc/kernel/pci-common.c | 15 ----- arch/powerpc/kernel/sysfs.c | 7 +-- arch/powerpc/kernel/time.c | 108 +--------------------------------- arch/powerpc/lib/locks.c | 24 ++------ arch/powerpc/mm/hash_utils_64.c | 9 +-- arch/powerpc/mm/slb.c | 6 -- arch/powerpc/mm/stab.c | 9 --- arch/powerpc/oprofile/common.c | 3 - arch/powerpc/platforms/powernv/pci.c | 1 + arch/powerpc/platforms/pseries/lpar.c | 1 + arch/powerpc/xmon/xmon.c | 28 --------- 18 files changed, 26 insertions(+), 367 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h index 5ab0b71531b..9d92ba04b03 100644 --- a/arch/powerpc/include/asm/abs_addr.h +++ b/arch/powerpc/include/asm/abs_addr.h @@ -17,7 +17,6 @@ #include #include #include -#include struct mschunks_map { unsigned long num_chunks; @@ -46,30 +45,12 @@ static inline unsigned long addr_to_chunk(unsigned long addr) static inline unsigned long phys_to_abs(unsigned long pa) { - unsigned long chunk; - - /* This is a no-op on non-iSeries */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - return pa; - - chunk = addr_to_chunk(pa); - - if (chunk < mschunks_map.num_chunks) - chunk = mschunks_map.mapping[chunk]; - - return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); + return pa; } /* Convenience macros */ #define virt_to_abs(va) phys_to_abs(__pa(va)) #define abs_to_virt(aa) __va(aa) -/* - * Converts Virtual Address to Real Address for - * Legacy iSeries Hypervisor calls - */ -#define iseries_hv_addr(virtaddr) \ - (0x8000000000000000UL | virt_to_abs(virtaddr)) - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_ABS_ADDR_H */ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 14db29b18d0..ad0b751b0d7 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -41,7 +41,6 @@ #define FW_FEATURE_XDABR ASM_CONST(0x0000000000040000) #define FW_FEATURE_MULTITCE ASM_CONST(0x0000000000080000) #define FW_FEATURE_SPLPAR ASM_CONST(0x0000000000100000) -#define FW_FEATURE_ISERIES ASM_CONST(0x0000000000200000) #define FW_FEATURE_LPAR ASM_CONST(0x0000000000400000) #define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) #define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000) @@ -65,8 +64,6 @@ enum { FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO, FW_FEATURE_PSERIES_ALWAYS = 0, - FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, - FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2, FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, @@ -79,9 +76,6 @@ enum { #ifdef CONFIG_PPC_PSERIES FW_FEATURE_PSERIES_POSSIBLE | #endif -#ifdef CONFIG_PPC_ISERIES - FW_FEATURE_ISERIES_POSSIBLE | -#endif #ifdef CONFIG_PPC_POWERNV FW_FEATURE_POWERNV_POSSIBLE | #endif @@ -99,9 +93,6 @@ enum { #ifdef CONFIG_PPC_PSERIES FW_FEATURE_PSERIES_ALWAYS & #endif -#ifdef CONFIG_PPC_ISERIES - FW_FEATURE_ISERIES_ALWAYS & -#endif #ifdef CONFIG_PPC_POWERNV FW_FEATURE_POWERNV_ALWAYS & #endif diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 7eb10fb96cd..2136f58a54e 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -18,11 +18,6 @@ #include #include -#ifdef CONFIG_PPC_ISERIES -#include -#include -#include -#endif /* time.c */ extern unsigned long tb_ticks_per_jiffy; @@ -166,15 +161,6 @@ static inline void set_dec(int val) #else #ifndef CONFIG_BOOKE --val; -#endif -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES) && - get_lppaca()->shared_proc) { - get_lppaca()->virtual_decr = val; - if (get_dec() > val) - HvCall_setVirtualDecr(); - return; - } #endif mtspr(SPRN_DEC, val); #endif /* not 40x or 8xx_CPU6 */ @@ -217,7 +203,6 @@ DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); #endif extern void secondary_cpu_time_init(void); -extern void iSeries_time_init_early(void); DECLARE_PER_CPU(u64, decrementers_next_tb); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index eb804e15b29..45b367c8d8b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -211,11 +211,6 @@ notrace void arch_local_irq_restore(unsigned long en) * External interrupt events on non-iseries will have caused * interrupts to be hard-disabled, so there is no problem, we * cannot have preempted. - * - * That leaves us with EEs on iSeries or decrementer interrupts, - * which I decided to safely ignore. The preemption would have - * itself been the result of an interrupt, upon which return we - * will have checked for pending events on the old CPU. */ irq_happened = get_irq_happened(); if (!irq_happened) @@ -458,15 +453,6 @@ void do_IRQ(struct pt_regs *regs) irq_exit(); set_irq_regs(old_regs); -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES) && - get_lppaca()->int_dword.fields.decr_int) { - get_lppaca()->int_dword.fields.decr_int = 0; - /* Signal a fake decrementer interrupt */ - timer_interrupt(regs); - } -#endif - trace_irq_exit(regs); } diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index 479752901ec..d45ec58703c 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -29,7 +29,6 @@ #include #include #include -#include unsigned long isa_io_base; /* NULL if no ISA bus */ EXPORT_SYMBOL(isa_io_base); @@ -261,8 +260,6 @@ static struct notifier_block isa_bridge_notifier = { */ static int __init isa_bridge_init(void) { - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return 0; bus_register_notifier(&pci_bus_type, &isa_bridge_notifier); return 0; } diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 578f35f1872..ac12bd80ad9 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -55,80 +54,14 @@ static unsigned long get_purr(void) int cpu; for_each_possible_cpu(cpu) { - if (firmware_has_feature(FW_FEATURE_ISERIES)) - sum_purr += lppaca_of(cpu).emulated_time_base; - else { - struct cpu_usage *cu; + struct cpu_usage *cu; - cu = &per_cpu(cpu_usage_array, cpu); - sum_purr += cu->current_tb; - } + cu = &per_cpu(cpu_usage_array, cpu); + sum_purr += cu->current_tb; } return sum_purr; } -#ifdef CONFIG_PPC_ISERIES - -/* - * Methods used to fetch LPAR data when running on an iSeries platform. - */ -static int iseries_lparcfg_data(struct seq_file *m, void *v) -{ - unsigned long pool_id; - int shared, entitled_capacity, max_entitled_capacity; - int processors, max_processors; - unsigned long purr = get_purr(); - - shared = (int)(local_paca->lppaca_ptr->shared_proc); - - seq_printf(m, "system_active_processors=%d\n", - (int)HvLpConfig_getSystemPhysicalProcessors()); - - seq_printf(m, "system_potential_processors=%d\n", - (int)HvLpConfig_getSystemPhysicalProcessors()); - - processors = (int)HvLpConfig_getPhysicalProcessors(); - seq_printf(m, "partition_active_processors=%d\n", processors); - - max_processors = (int)HvLpConfig_getMaxPhysicalProcessors(); - seq_printf(m, "partition_potential_processors=%d\n", max_processors); - - if (shared) { - entitled_capacity = HvLpConfig_getSharedProcUnits(); - max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits(); - } else { - entitled_capacity = processors * 100; - max_entitled_capacity = max_processors * 100; - } - seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity); - - seq_printf(m, "partition_max_entitled_capacity=%d\n", - max_entitled_capacity); - - if (shared) { - pool_id = HvLpConfig_getSharedPoolIndex(); - seq_printf(m, "pool=%d\n", (int)pool_id); - seq_printf(m, "pool_capacity=%d\n", - (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) * - 100)); - seq_printf(m, "purr=%ld\n", purr); - } - - seq_printf(m, "shared_processor_mode=%d\n", shared); - - return 0; -} - -#else /* CONFIG_PPC_ISERIES */ - -static int iseries_lparcfg_data(struct seq_file *m, void *v) -{ - return 0; -} - -#endif /* CONFIG_PPC_ISERIES */ - -#ifdef CONFIG_PPC_PSERIES /* * Methods used to fetch LPAR data when running on a pSeries platform. */ @@ -648,8 +581,7 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, u8 new_weight, *new_weight_ptr = &new_weight; ssize_t retval; - if (!firmware_has_feature(FW_FEATURE_SPLPAR) || - firmware_has_feature(FW_FEATURE_ISERIES)) + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return -EINVAL; if (count > kbuf_sz) @@ -709,21 +641,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, return retval; } -#else /* CONFIG_PPC_PSERIES */ - -static int pseries_lparcfg_data(struct seq_file *m, void *v) -{ - return 0; -} - -static ssize_t lparcfg_write(struct file *file, const char __user * buf, - size_t count, loff_t * off) -{ - return -EINVAL; -} - -#endif /* CONFIG_PPC_PSERIES */ - static int lparcfg_data(struct seq_file *m, void *v) { struct device_node *rootdn; @@ -738,19 +655,11 @@ static int lparcfg_data(struct seq_file *m, void *v) rootdn = of_find_node_by_path("/"); if (rootdn) { tmp = of_get_property(rootdn, "model", NULL); - if (tmp) { + if (tmp) model = tmp; - /* Skip "IBM," - see platforms/iseries/dt.c */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - model += 4; - } tmp = of_get_property(rootdn, "system-id", NULL); - if (tmp) { + if (tmp) system_id = tmp; - /* Skip "IBM," - see platforms/iseries/dt.c */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - system_id += 4; - } lp_index_ptr = of_get_property(rootdn, "ibm,partition-no", NULL); if (lp_index_ptr) @@ -761,8 +670,6 @@ static int lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "system_type=%s\n", model); seq_printf(m, "partition_id=%d\n", (int)lp_index); - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return iseries_lparcfg_data(m, v); return pseries_lparcfg_data(m, v); } @@ -786,8 +693,7 @@ static int __init lparcfg_init(void) umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; /* Allow writing if we have FW_FEATURE_SPLPAR */ - if (firmware_has_feature(FW_FEATURE_SPLPAR) && - !firmware_has_feature(FW_FEATURE_ISERIES)) + if (firmware_has_feature(FW_FEATURE_SPLPAR)) mode |= S_IWUSR; ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 41456ff55e1..0bb1f98613b 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -11,13 +11,10 @@ #include #include -#include #include #include #include #include -#include -#include #include /* This symbol is provided by the linker - let it fill in the paca @@ -30,8 +27,8 @@ extern unsigned long __toc_start; * The structure which the hypervisor knows about - this structure * should not cross a page boundary. The vpa_init/register_vpa call * is now known to fail if the lppaca structure crosses a page - * boundary. The lppaca is also used on legacy iSeries and POWER5 - * pSeries boxes. The lppaca is 640 bytes long, and cannot readily + * boundary. The lppaca is also used on POWER5 pSeries boxes. + * The lppaca is 640 bytes long, and cannot readily * change since the hypervisor knows its layout, so a 1kB alignment * will suffice to ensure that it doesn't cross a page boundary. */ @@ -183,12 +180,9 @@ void __init allocate_pacas(void) /* * We can't take SLB misses on the paca, and we want to access them * in real mode, so allocate them within the RMA and also within - * the first segment. On iSeries they must be within the area mapped - * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. + * the first segment. */ limit = min(0x10000000ULL, ppc64_rma_size); - if (firmware_has_feature(FW_FEATURE_ISERIES)) - limit = min(limit, HvPagesToMap * HVPAGESIZE); paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index cce98d76e90..d0373bcb7c9 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -38,7 +38,6 @@ #include #include #include -#include #include static DEFINE_SPINLOCK(hose_spinlock); @@ -219,20 +218,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) struct of_irq oirq; unsigned int virq; - /* The current device-tree that iSeries generates from the HV - * PCI informations doesn't contain proper interrupt routing, - * and all the fallback would do is print out crap, so we - * don't attempt to resolve the interrupts here at all, some - * iSeries specific fixup does it. - * - * In the long run, we will hopefully fix the generated device-tree - * instead. - */ -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return -1; -#endif - pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); #ifdef DEBUG diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 883e74c0d1b..0c683d376b1 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -341,8 +340,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu) int i, nattrs; #ifdef CONFIG_PPC64 - if (!firmware_has_feature(FW_FEATURE_ISERIES) && - cpu_has_feature(CPU_FTR_SMT)) + if (cpu_has_feature(CPU_FTR_SMT)) device_create_file(s, &dev_attr_smt_snooze_delay); #endif @@ -414,8 +412,7 @@ static void unregister_cpu_online(unsigned int cpu) BUG_ON(!c->hotpluggable); #ifdef CONFIG_PPC64 - if (!firmware_has_feature(FW_FEATURE_ISERIES) && - cpu_has_feature(CPU_FTR_SMT)) + if (cpu_has_feature(CPU_FTR_SMT)) device_remove_file(s, &dev_attr_smt_snooze_delay); #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index f81c81b92f0..2c42cd72d0f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -17,8 +17,7 @@ * * TODO (not necessarily in this file): * - improve precision and reproducibility of timebase frequency - * measurement at boot time. (for iSeries, we calibrate the timebase - * against the Titan chip's clock.) + * measurement at boot time. * - for astronomical applications: add a new function to get * non ambiguous timestamps even around leap seconds. This needs * a new timestamp format and a good name. @@ -70,10 +69,6 @@ #include #include #include -#ifdef CONFIG_PPC_ISERIES -#include -#include -#endif /* powerpc clocksource/clockevent code */ @@ -117,14 +112,6 @@ static struct clock_event_device decrementer_clockevent = { DEFINE_PER_CPU(u64, decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); -#ifdef CONFIG_PPC_ISERIES -static unsigned long __initdata iSeries_recal_titan; -static signed long __initdata iSeries_recal_tb; - -/* Forward declaration is only needed for iSereis compiles */ -static void __init clocksource_init(void); -#endif - #define XSEC_PER_SEC (1024*1024) #ifdef CONFIG_PPC64 @@ -423,74 +410,6 @@ unsigned long profile_pc(struct pt_regs *regs) EXPORT_SYMBOL(profile_pc); #endif -#ifdef CONFIG_PPC_ISERIES - -/* - * This function recalibrates the timebase based on the 49-bit time-of-day - * value in the Titan chip. The Titan is much more accurate than the value - * returned by the service processor for the timebase frequency. - */ - -static int __init iSeries_tb_recal(void) -{ - unsigned long titan, tb; - - /* Make sure we only run on iSeries */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - return -ENODEV; - - tb = get_tb(); - titan = HvCallXm_loadTod(); - if ( iSeries_recal_titan ) { - unsigned long tb_ticks = tb - iSeries_recal_tb; - unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12; - unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec; - unsigned long new_tb_ticks_per_jiffy = - DIV_ROUND_CLOSEST(new_tb_ticks_per_sec, HZ); - long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy; - char sign = '+'; - /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */ - new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ; - - if ( tick_diff < 0 ) { - tick_diff = -tick_diff; - sign = '-'; - } - if ( tick_diff ) { - if ( tick_diff < tb_ticks_per_jiffy/25 ) { - printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n", - new_tb_ticks_per_jiffy, sign, tick_diff ); - tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; - tb_ticks_per_sec = new_tb_ticks_per_sec; - calc_cputime_factors(); - vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; - setup_cputime_one_jiffy(); - } - else { - printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" - " new tb_ticks_per_jiffy = %lu\n" - " old tb_ticks_per_jiffy = %lu\n", - new_tb_ticks_per_jiffy, tb_ticks_per_jiffy ); - } - } - } - iSeries_recal_titan = titan; - iSeries_recal_tb = tb; - - /* Called here as now we know accurate values for the timebase */ - clocksource_init(); - return 0; -} -late_initcall(iSeries_tb_recal); - -/* Called from platform early init */ -void __init iSeries_time_init_early(void) -{ - iSeries_recal_tb = get_tb(); - iSeries_recal_titan = HvCallXm_loadTod(); -} -#endif /* CONFIG_PPC_ISERIES */ - #ifdef CONFIG_IRQ_WORK /* @@ -546,16 +465,6 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ -/* - * For iSeries shared processors, we have to let the hypervisor - * set the hardware decrementer. We set a virtual decrementer - * in the lppaca and call the hypervisor if the virtual - * decrementer is less than the current value in the hardware - * decrementer. (almost always the new decrementer value will - * be greater than the current hardware decementer so the hypervisor - * call will not be needed) - */ - /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. @@ -599,20 +508,10 @@ void timer_interrupt(struct pt_regs * regs) irq_work_run(); } -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES)) - get_lppaca()->int_dword.fields.decr_int = 0; -#endif - *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) - process_hvlpevents(); -#endif - #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { @@ -984,9 +883,8 @@ void __init time_init(void) */ start_cpu_decrementer(); - /* Register the clocksource, if we're not running on iSeries */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - clocksource_init(); + /* Register the clocksource */ + clocksource_init(); init_decrementer_clockevent(); } diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index a6ebba56fdd..bb7cfecf278 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -19,11 +19,9 @@ #include /* waiting for a spinlock... */ -#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES) +#if defined(CONFIG_PPC_SPLPAR) #include -#include #include -#include void __spin_yield(arch_spinlock_t *lock) { @@ -40,14 +38,8 @@ void __spin_yield(arch_spinlock_t *lock) rmb(); if (lock->slock != lock_value) return; /* something has changed */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, - ((u64)holder_cpu << 32) | yield_count); -#ifdef CONFIG_PPC_SPLPAR - else - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); -#endif + plpar_hcall_norets(H_CONFER, + get_hard_smp_processor_id(holder_cpu), yield_count); } /* @@ -71,14 +63,8 @@ void __rw_yield(arch_rwlock_t *rw) rmb(); if (rw->lock != lock_value) return; /* something has changed */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, - ((u64)holder_cpu << 32) | yield_count); -#ifdef CONFIG_PPC_SPLPAR - else - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); -#endif + plpar_hcall_norets(H_CONFER, + get_hard_smp_processor_id(holder_cpu), yield_count); } #endif diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b534bbac3f8..3e8c37a4e39 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -56,6 +56,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -756,12 +757,9 @@ void __init early_init_mmu(void) */ htab_initialize(); - /* Initialize stab / SLB management except on iSeries - */ + /* Initialize stab / SLB management */ if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); - else if (!firmware_has_feature(FW_FEATURE_ISERIES)) - stab_initialize(get_paca()->stab_real); } #ifdef CONFIG_SMP @@ -772,8 +770,7 @@ void __cpuinit early_init_mmu_secondary(void) mtspr(SPRN_SDR1, _SDR1); /* Initialize STAB/SLB. We use a virtual address as it works - * in real mode on pSeries and we want a virtual address on - * iSeries anyway + * in real mode on pSeries. */ if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index e22276cb67a..a538c80db2d 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -307,11 +306,6 @@ void slb_initialize(void) get_paca()->stab_rr = SLB_NUM_BOLTED; - /* On iSeries the bolted entries have already been set up by - * the hypervisor from the lparMap data in head.S */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return; - lflags = SLB_VSID_KERNEL | linear_llp; vflags = SLB_VSID_KERNEL | vmalloc_llp; diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 41e31642a86..9106ebb118f 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -21,8 +21,6 @@ #include #include #include -#include -#include struct stab_entry { unsigned long esid_data; @@ -285,12 +283,5 @@ void stab_initialize(unsigned long stab) /* Set ASR */ stabreal = get_paca()->stab_real | 0x1ul; -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES)) { - HvCall1(HvCallBaseSetASR, stabreal); - return; - } -#endif /* CONFIG_PPC_ISERIES */ - mtspr(SPRN_ASR, stabreal); } diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index d65e68f3cb2..6f01624f317 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c @@ -195,9 +195,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) if (!cur_cpu_spec->oprofile_cpu_type) return -ENODEV; - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return -ENODEV; - switch (cur_cpu_spec->oprofile_type) { #ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_OPROFILE_CELL diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f92b9ef7340..214478d781a 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "powernv.h" #include "pci.h" diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 7bc73af6c7b..5f3ef876ded 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "plpar_wrappers.h" #include "pseries.h" diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 974a47b3c9b..68a9cbbab45 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -1635,25 +1634,6 @@ static void super_regs(void) mfspr(SPRN_DEC), mfspr(SPRN_SPRG2)); printf("sp = "REG" sprg3= "REG"\n", sp, mfspr(SPRN_SPRG3)); printf("toc = "REG" dar = "REG"\n", toc, mfspr(SPRN_DAR)); -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES)) { - struct paca_struct *ptrPaca; - struct lppaca *ptrLpPaca; - - /* Dump out relevant Paca data areas. */ - printf("Paca: \n"); - ptrPaca = local_paca; - - printf(" Local Processor Control Area (LpPaca): \n"); - ptrLpPaca = ptrPaca->lppaca_ptr; - printf(" Saved Srr0=%.16lx Saved Srr1=%.16lx \n", - ptrLpPaca->saved_srr0, ptrLpPaca->saved_srr1); - printf(" Saved Gpr3=%.16lx Saved Gpr4=%.16lx \n", - ptrLpPaca->saved_gpr3, ptrLpPaca->saved_gpr4); - printf(" Saved Gpr5=%.16lx \n", - ptrLpPaca->gpr5_dword.saved_gpr5); - } -#endif return; } @@ -2856,10 +2836,6 @@ static void dump_tlb_book3e(void) static void xmon_init(int enable) { -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return; -#endif if (enable) { __debugger = xmon; __debugger_ipi = xmon_ipi; @@ -2896,10 +2872,6 @@ static struct sysrq_key_op sysrq_xmon_op = { static int __init setup_xmon_sysrq(void) { -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return 0; -#endif register_sysrq_key('x', &sysrq_xmon_op); return 0; } -- cgit v1.2.3-70-g09d2 From 1b041885ae1d9938440fc2cf6a444b70ec0a86c9 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 15 Mar 2012 18:20:13 +0000 Subject: powerpc: Remove the remaining CONFIG_PPC_ISERIES pieces Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/boot/Makefile | 9 +-------- arch/powerpc/include/asm/dma.h | 4 ---- arch/powerpc/include/asm/lppaca.h | 8 -------- arch/powerpc/include/asm/spinlock.h | 5 ++--- arch/powerpc/kernel/asm-offsets.c | 14 -------------- 5 files changed, 3 insertions(+), 37 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index f6622e02236..e8461cb18d0 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -184,7 +184,6 @@ image-$(CONFIG_PPC_EFIKA) += zImage.chrp image-$(CONFIG_PPC_PMAC) += zImage.pmac image-$(CONFIG_PPC_HOLLY) += dtbImage.holly image-$(CONFIG_PPC_PRPMC2800) += dtbImage.prpmc2800 -image-$(CONFIG_PPC_ISERIES) += zImage.iseries image-$(CONFIG_DEFAULT_UIMAGE) += uImage image-$(CONFIG_EPAPR_BOOT) += zImage.epapr @@ -311,12 +310,6 @@ $(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb $(obj)/vmlinux.strip: vmlinux $(STRIP) -s -R .comment $< -o $@ -# The iseries hypervisor won't take an ET_DYN executable, so this -# changes the type (byte 17) in the file to ET_EXEC (2). -$(obj)/zImage.iseries: vmlinux - $(STRIP) -s -R .comment $< -o $@ - printf "\x02" | dd of=$@ conv=notrunc bs=1 seek=17 - $(obj)/uImage: vmlinux $(wrapperbits) $(call if_changed,wrap,uboot) @@ -364,7 +357,7 @@ install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) # anything not in $(targets) clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \ - zImage.iseries zImage.miboot zImage.pmac zImage.pseries \ + zImage.miboot zImage.pmac zImage.pseries \ zImage.maple simpleImage.* otheros.bld *.dtb # clean up files cached by wrapper diff --git a/arch/powerpc/include/asm/dma.h b/arch/powerpc/include/asm/dma.h index a7e06e25c70..adadb994361 100644 --- a/arch/powerpc/include/asm/dma.h +++ b/arch/powerpc/include/asm/dma.h @@ -34,8 +34,6 @@ /* Doesn't really apply... */ #define MAX_DMA_ADDRESS (~0UL) -#if !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI) - #ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER #define dma_outb outb_p #else @@ -354,7 +352,5 @@ extern int isa_dma_bridge_buggy; #define isa_dma_bridge_buggy (0) #endif -#endif /* !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI) */ - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_DMA_H */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index e0298d26ce5..a76254af0aa 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -41,15 +41,7 @@ * We only have to have statically allocated lppaca structs on * legacy iSeries, which supports at most 64 cpus. */ -#ifdef CONFIG_PPC_ISERIES -#if NR_CPUS < 64 -#define NR_LPPACAS NR_CPUS -#else -#define NR_LPPACAS 64 -#endif -#else /* not iSeries */ #define NR_LPPACAS 1 -#endif /* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index f9611bd69ed..7124fc06ad4 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -23,7 +23,6 @@ #ifdef CONFIG_PPC64 #include #include -#include #endif #include #include @@ -95,12 +94,12 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) * value. */ -#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES) +#if defined(CONFIG_PPC_SPLPAR) /* We only yield to the hypervisor if we are in shared processor mode */ #define SHARED_PROCESSOR (get_lppaca()->shared_proc) extern void __spin_yield(arch_spinlock_t *lock); extern void __rw_yield(arch_rwlock_t *lock); -#else /* SPLPAR || ISERIES */ +#else /* SPLPAR */ #define __spin_yield(x) barrier() #define __rw_yield(x) barrier() #define SHARED_PROCESSOR 0 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index cdd0d264415..cc492e48ddf 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -46,9 +46,6 @@ #include #include #endif -#ifdef CONFIG_PPC_ISERIES -#include -#endif #ifdef CONFIG_PPC_POWERNV #include #endif @@ -384,17 +381,6 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif -#ifdef CONFIG_PPC_ISERIES - /* the assembler miscalculates the VSID values */ - DEFINE(PAGE_OFFSET_ESID, GET_ESID(PAGE_OFFSET)); - DEFINE(PAGE_OFFSET_VSID, KERNEL_VSID(PAGE_OFFSET)); - DEFINE(VMALLOC_START_ESID, GET_ESID(VMALLOC_START)); - DEFINE(VMALLOC_START_VSID, KERNEL_VSID(VMALLOC_START)); - - /* alpaca */ - DEFINE(ALPACA_SIZE, sizeof(struct alpaca)); -#endif - DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); DEFINE(PTE_SIZE, sizeof(pte_t)); -- cgit v1.2.3-70-g09d2 From a6626ffe09d379bad4d0e49885f9195946ac875d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 15 Mar 2012 18:22:08 +0000 Subject: powerpc: Remove the rest of the legacy iSeries include files since they are not referenced any more. Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iseries/alpaca.h | 31 ------ arch/powerpc/include/asm/iseries/hv_call.h | 111 -------------------- arch/powerpc/include/asm/iseries/hv_call_sc.h | 50 --------- arch/powerpc/include/asm/iseries/hv_call_xm.h | 61 ----------- arch/powerpc/include/asm/iseries/hv_lp_config.h | 128 ------------------------ arch/powerpc/include/asm/iseries/hv_types.h | 112 --------------------- arch/powerpc/include/asm/iseries/it_lp_queue.h | 78 --------------- arch/powerpc/include/asm/iseries/lpar_map.h | 85 ---------------- arch/powerpc/include/asm/iseries/mf.h | 51 ---------- 9 files changed, 707 deletions(-) delete mode 100644 arch/powerpc/include/asm/iseries/alpaca.h delete mode 100644 arch/powerpc/include/asm/iseries/hv_call.h delete mode 100644 arch/powerpc/include/asm/iseries/hv_call_sc.h delete mode 100644 arch/powerpc/include/asm/iseries/hv_call_xm.h delete mode 100644 arch/powerpc/include/asm/iseries/hv_lp_config.h delete mode 100644 arch/powerpc/include/asm/iseries/hv_types.h delete mode 100644 arch/powerpc/include/asm/iseries/it_lp_queue.h delete mode 100644 arch/powerpc/include/asm/iseries/lpar_map.h delete mode 100644 arch/powerpc/include/asm/iseries/mf.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/iseries/alpaca.h b/arch/powerpc/include/asm/iseries/alpaca.h deleted file mode 100644 index c0cce6727a6..00000000000 --- a/arch/powerpc/include/asm/iseries/alpaca.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright © 2008 Stephen Rothwell IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _ASM_POWERPC_ISERIES_ALPACA_H -#define _ASM_POWERPC_ISERIES_ALPACA_H - -/* - * This is the part of the paca that the iSeries hypervisor - * needs to be statically initialised. Immediately after boot - * we switch to the normal Linux paca. - */ -struct alpaca { - struct lppaca *lppaca_ptr; /* Pointer to LpPaca for PLIC */ - const void *reg_save_ptr; /* Pointer to LpRegSave for PLIC */ -}; - -#endif /* _ASM_POWERPC_ISERIES_ALPACA_H */ diff --git a/arch/powerpc/include/asm/iseries/hv_call.h b/arch/powerpc/include/asm/iseries/hv_call.h deleted file mode 100644 index 162d653ad51..00000000000 --- a/arch/powerpc/include/asm/iseries/hv_call.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2001 Mike Corrigan IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * This file contains the "hypervisor call" interface which is used to - * drive the hypervisor from the OS. - */ -#ifndef _ASM_POWERPC_ISERIES_HV_CALL_H -#define _ASM_POWERPC_ISERIES_HV_CALL_H - -#include -#include -#include - -/* Type of yield for HvCallBaseYieldProcessor */ -#define HvCall_YieldTimed 0 /* Yield until specified time (tb) */ -#define HvCall_YieldToActive 1 /* Yield until all active procs have run */ -#define HvCall_YieldToProc 2 /* Yield until the specified processor has run */ - -/* interrupt masks for setEnabledInterrupts */ -#define HvCall_MaskIPI 0x00000001 -#define HvCall_MaskLpEvent 0x00000002 -#define HvCall_MaskLpProd 0x00000004 -#define HvCall_MaskTimeout 0x00000008 - -/* Log buffer formats */ -#define HvCall_LogBuffer_ASCII 0 -#define HvCall_LogBuffer_EBCDIC 1 - -#define HvCallBaseAckDeferredInts HvCallBase + 0 -#define HvCallBaseCpmPowerOff HvCallBase + 1 -#define HvCallBaseGetHwPatch HvCallBase + 2 -#define HvCallBaseReIplSpAttn HvCallBase + 3 -#define HvCallBaseSetASR HvCallBase + 4 -#define HvCallBaseSetASRAndRfi HvCallBase + 5 -#define HvCallBaseSetIMR HvCallBase + 6 -#define HvCallBaseSendIPI HvCallBase + 7 -#define HvCallBaseTerminateMachine HvCallBase + 8 -#define HvCallBaseTerminateMachineSrc HvCallBase + 9 -#define HvCallBaseProcessPlicInterrupts HvCallBase + 10 -#define HvCallBaseIsPrimaryCpmOrMsdIpl HvCallBase + 11 -#define HvCallBaseSetVirtualSIT HvCallBase + 12 -#define HvCallBaseVaryOffThisProcessor HvCallBase + 13 -#define HvCallBaseVaryOffMemoryChunk HvCallBase + 14 -#define HvCallBaseVaryOffInteractivePercentage HvCallBase + 15 -#define HvCallBaseSendLpProd HvCallBase + 16 -#define HvCallBaseSetEnabledInterrupts HvCallBase + 17 -#define HvCallBaseYieldProcessor HvCallBase + 18 -#define HvCallBaseVaryOffSharedProcUnits HvCallBase + 19 -#define HvCallBaseSetVirtualDecr HvCallBase + 20 -#define HvCallBaseClearLogBuffer HvCallBase + 21 -#define HvCallBaseGetLogBufferCodePage HvCallBase + 22 -#define HvCallBaseGetLogBufferFormat HvCallBase + 23 -#define HvCallBaseGetLogBufferLength HvCallBase + 24 -#define HvCallBaseReadLogBuffer HvCallBase + 25 -#define HvCallBaseSetLogBufferFormatAndCodePage HvCallBase + 26 -#define HvCallBaseWriteLogBuffer HvCallBase + 27 -#define HvCallBaseRouter28 HvCallBase + 28 -#define HvCallBaseRouter29 HvCallBase + 29 -#define HvCallBaseRouter30 HvCallBase + 30 -#define HvCallBaseSetDebugBus HvCallBase + 31 - -#define HvCallCcSetDABR HvCallCc + 7 - -static inline void HvCall_setVirtualDecr(void) -{ - /* - * Ignore any error return codes - most likely means that the - * target value for the LP has been increased and this vary off - * would bring us below the new target. - */ - HvCall0(HvCallBaseSetVirtualDecr); -} - -static inline void HvCall_yieldProcessor(unsigned typeOfYield, u64 yieldParm) -{ - HvCall2(HvCallBaseYieldProcessor, typeOfYield, yieldParm); -} - -static inline void HvCall_setEnabledInterrupts(u64 enabledInterrupts) -{ - HvCall1(HvCallBaseSetEnabledInterrupts, enabledInterrupts); -} - -static inline void HvCall_setLogBufferFormatAndCodepage(int format, - u32 codePage) -{ - HvCall2(HvCallBaseSetLogBufferFormatAndCodePage, format, codePage); -} - -extern void HvCall_writeLogBuffer(const void *buffer, u64 bufLen); - -static inline void HvCall_sendIPI(struct paca_struct *targetPaca) -{ - HvCall1(HvCallBaseSendIPI, targetPaca->paca_index); -} - -#endif /* _ASM_POWERPC_ISERIES_HV_CALL_H */ diff --git a/arch/powerpc/include/asm/iseries/hv_call_sc.h b/arch/powerpc/include/asm/iseries/hv_call_sc.h deleted file mode 100644 index f5d21095925..00000000000 --- a/arch/powerpc/include/asm/iseries/hv_call_sc.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2001 Mike Corrigan IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _ASM_POWERPC_ISERIES_HV_CALL_SC_H -#define _ASM_POWERPC_ISERIES_HV_CALL_SC_H - -#include - -#define HvCallBase 0x8000000000000000ul -#define HvCallCc 0x8001000000000000ul -#define HvCallCfg 0x8002000000000000ul -#define HvCallEvent 0x8003000000000000ul -#define HvCallHpt 0x8004000000000000ul -#define HvCallPci 0x8005000000000000ul -#define HvCallSm 0x8007000000000000ul -#define HvCallXm 0x8009000000000000ul - -extern u64 HvCall0(u64); -extern u64 HvCall1(u64, u64); -extern u64 HvCall2(u64, u64, u64); -extern u64 HvCall3(u64, u64, u64, u64); -extern u64 HvCall4(u64, u64, u64, u64, u64); -extern u64 HvCall5(u64, u64, u64, u64, u64, u64); -extern u64 HvCall6(u64, u64, u64, u64, u64, u64, u64); -extern u64 HvCall7(u64, u64, u64, u64, u64, u64, u64, u64); - -extern u64 HvCall0Ret16(u64, void *); -extern u64 HvCall1Ret16(u64, void *, u64); -extern u64 HvCall2Ret16(u64, void *, u64, u64); -extern u64 HvCall3Ret16(u64, void *, u64, u64, u64); -extern u64 HvCall4Ret16(u64, void *, u64, u64, u64, u64); -extern u64 HvCall5Ret16(u64, void *, u64, u64, u64, u64, u64); -extern u64 HvCall6Ret16(u64, void *, u64, u64, u64, u64, u64, u64); -extern u64 HvCall7Ret16(u64, void *, u64, u64 ,u64 ,u64 ,u64 ,u64 ,u64); - -#endif /* _ASM_POWERPC_ISERIES_HV_CALL_SC_H */ diff --git a/arch/powerpc/include/asm/iseries/hv_call_xm.h b/arch/powerpc/include/asm/iseries/hv_call_xm.h deleted file mode 100644 index 392ac3f54df..00000000000 --- a/arch/powerpc/include/asm/iseries/hv_call_xm.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * This file contains the "hypervisor call" interface which is used to - * drive the hypervisor from SLIC. - */ -#ifndef _ASM_POWERPC_ISERIES_HV_CALL_XM_H -#define _ASM_POWERPC_ISERIES_HV_CALL_XM_H - -#include -#include - -#define HvCallXmGetTceTableParms HvCallXm + 0 -#define HvCallXmTestBus HvCallXm + 1 -#define HvCallXmConnectBusUnit HvCallXm + 2 -#define HvCallXmLoadTod HvCallXm + 8 -#define HvCallXmTestBusUnit HvCallXm + 9 -#define HvCallXmSetTce HvCallXm + 11 -#define HvCallXmSetTces HvCallXm + 13 - -static inline void HvCallXm_getTceTableParms(u64 cb) -{ - HvCall1(HvCallXmGetTceTableParms, cb); -} - -static inline u64 HvCallXm_setTce(u64 tceTableToken, u64 tceOffset, u64 tce) -{ - return HvCall3(HvCallXmSetTce, tceTableToken, tceOffset, tce); -} - -static inline u64 HvCallXm_setTces(u64 tceTableToken, u64 tceOffset, - u64 numTces, u64 tce1, u64 tce2, u64 tce3, u64 tce4) -{ - return HvCall7(HvCallXmSetTces, tceTableToken, tceOffset, numTces, - tce1, tce2, tce3, tce4); -} - -static inline u64 HvCallXm_testBus(u16 busNumber) -{ - return HvCall1(HvCallXmTestBus, busNumber); -} - -static inline u64 HvCallXm_testBusUnit(u16 busNumber, u8 subBusNumber, - u8 deviceId) -{ - return HvCall2(HvCallXmTestBusUnit, busNumber, - (subBusNumber << 8) | deviceId); -} - -static inline u64 HvCallXm_connectBusUnit(u16 busNumber, u8 subBusNumber, - u8 deviceId, u64 interruptToken) -{ - return HvCall5(HvCallXmConnectBusUnit, busNumber, - (subBusNumber << 8) | deviceId, interruptToken, 0, - 0 /* HvLpConfig::mapDsaToQueueIndex(HvLpDSA(busNumber, xBoard, xCard)) */); -} - -static inline u64 HvCallXm_loadTod(void) -{ - return HvCall0(HvCallXmLoadTod); -} - -#endif /* _ASM_POWERPC_ISERIES_HV_CALL_XM_H */ diff --git a/arch/powerpc/include/asm/iseries/hv_lp_config.h b/arch/powerpc/include/asm/iseries/hv_lp_config.h deleted file mode 100644 index a006fd1e4a2..00000000000 --- a/arch/powerpc/include/asm/iseries/hv_lp_config.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2001 Mike Corrigan IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _ASM_POWERPC_ISERIES_HV_LP_CONFIG_H -#define _ASM_POWERPC_ISERIES_HV_LP_CONFIG_H - -/* - * This file contains the interface to the LPAR configuration data - * to determine which resources should be allocated to each partition. - */ - -#include -#include - -enum { - HvCallCfg_Cur = 0, - HvCallCfg_Init = 1, - HvCallCfg_Max = 2, - HvCallCfg_Min = 3 -}; - -#define HvCallCfgGetSystemPhysicalProcessors HvCallCfg + 6 -#define HvCallCfgGetPhysicalProcessors HvCallCfg + 7 -#define HvCallCfgGetMsChunks HvCallCfg + 9 -#define HvCallCfgGetSharedPoolIndex HvCallCfg + 20 -#define HvCallCfgGetSharedProcUnits HvCallCfg + 21 -#define HvCallCfgGetNumProcsInSharedPool HvCallCfg + 22 -#define HvCallCfgGetVirtualLanIndexMap HvCallCfg + 30 -#define HvCallCfgGetHostingLpIndex HvCallCfg + 32 - -extern HvLpIndex HvLpConfig_getLpIndex_outline(void); -extern HvLpIndex HvLpConfig_getLpIndex(void); -extern HvLpIndex HvLpConfig_getPrimaryLpIndex(void); - -static inline u64 HvLpConfig_getMsChunks(void) -{ - return HvCall2(HvCallCfgGetMsChunks, HvLpConfig_getLpIndex(), - HvCallCfg_Cur); -} - -static inline u64 HvLpConfig_getSystemPhysicalProcessors(void) -{ - return HvCall0(HvCallCfgGetSystemPhysicalProcessors); -} - -static inline u64 HvLpConfig_getNumProcsInSharedPool(HvLpSharedPoolIndex sPI) -{ - return (u16)HvCall1(HvCallCfgGetNumProcsInSharedPool, sPI); -} - -static inline u64 HvLpConfig_getPhysicalProcessors(void) -{ - return HvCall2(HvCallCfgGetPhysicalProcessors, HvLpConfig_getLpIndex(), - HvCallCfg_Cur); -} - -static inline HvLpSharedPoolIndex HvLpConfig_getSharedPoolIndex(void) -{ - return HvCall1(HvCallCfgGetSharedPoolIndex, HvLpConfig_getLpIndex()); -} - -static inline u64 HvLpConfig_getSharedProcUnits(void) -{ - return HvCall2(HvCallCfgGetSharedProcUnits, HvLpConfig_getLpIndex(), - HvCallCfg_Cur); -} - -static inline u64 HvLpConfig_getMaxSharedProcUnits(void) -{ - return HvCall2(HvCallCfgGetSharedProcUnits, HvLpConfig_getLpIndex(), - HvCallCfg_Max); -} - -static inline u64 HvLpConfig_getMaxPhysicalProcessors(void) -{ - return HvCall2(HvCallCfgGetPhysicalProcessors, HvLpConfig_getLpIndex(), - HvCallCfg_Max); -} - -static inline HvLpVirtualLanIndexMap HvLpConfig_getVirtualLanIndexMapForLp( - HvLpIndex lp) -{ - /* - * This is a new function in V5R1 so calls to this on older - * hypervisors will return -1 - */ - u64 retVal = HvCall1(HvCallCfgGetVirtualLanIndexMap, lp); - if (retVal == -1) - retVal = 0; - return retVal; -} - -static inline HvLpVirtualLanIndexMap HvLpConfig_getVirtualLanIndexMap(void) -{ - return HvLpConfig_getVirtualLanIndexMapForLp( - HvLpConfig_getLpIndex_outline()); -} - -static inline int HvLpConfig_doLpsCommunicateOnVirtualLan(HvLpIndex lp1, - HvLpIndex lp2) -{ - HvLpVirtualLanIndexMap virtualLanIndexMap1 = - HvLpConfig_getVirtualLanIndexMapForLp(lp1); - HvLpVirtualLanIndexMap virtualLanIndexMap2 = - HvLpConfig_getVirtualLanIndexMapForLp(lp2); - return ((virtualLanIndexMap1 & virtualLanIndexMap2) != 0); -} - -static inline HvLpIndex HvLpConfig_getHostingLpIndex(HvLpIndex lp) -{ - return HvCall1(HvCallCfgGetHostingLpIndex, lp); -} - -#endif /* _ASM_POWERPC_ISERIES_HV_LP_CONFIG_H */ diff --git a/arch/powerpc/include/asm/iseries/hv_types.h b/arch/powerpc/include/asm/iseries/hv_types.h deleted file mode 100644 index c3e6d2a1d1c..00000000000 --- a/arch/powerpc/include/asm/iseries/hv_types.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (C) 2001 Mike Corrigan IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _ASM_POWERPC_ISERIES_HV_TYPES_H -#define _ASM_POWERPC_ISERIES_HV_TYPES_H - -/* - * General typedefs for the hypervisor. - */ - -#include - -typedef u8 HvLpIndex; -typedef u16 HvLpInstanceId; -typedef u64 HvLpTOD; -typedef u64 HvLpSystemSerialNum; -typedef u8 HvLpDeviceSerialNum[12]; -typedef u16 HvLpSanHwSet; -typedef u16 HvLpBus; -typedef u16 HvLpBoard; -typedef u16 HvLpCard; -typedef u8 HvLpDeviceType[4]; -typedef u8 HvLpDeviceModel[3]; -typedef u64 HvIoToken; -typedef u8 HvLpName[8]; -typedef u32 HvIoId; -typedef u64 HvRealMemoryIndex; -typedef u32 HvLpIndexMap; /* Must hold HVMAXARCHITECTEDLPS bits!!! */ -typedef u16 HvLpVrmIndex; -typedef u32 HvXmGenerationId; -typedef u8 HvLpBusPool; -typedef u8 HvLpSharedPoolIndex; -typedef u16 HvLpSharedProcUnitsX100; -typedef u8 HvLpVirtualLanIndex; -typedef u16 HvLpVirtualLanIndexMap; /* Must hold HVMAXARCHITECTEDVIRTUALLANS bits!!! */ -typedef u16 HvBusNumber; /* Hypervisor Bus Number */ -typedef u8 HvSubBusNumber; /* Hypervisor SubBus Number */ -typedef u8 HvAgentId; /* Hypervisor DevFn */ - - -#define HVMAXARCHITECTEDLPS 32 -#define HVMAXARCHITECTEDVIRTUALLANS 16 -#define HVMAXARCHITECTEDVIRTUALDISKS 32 -#define HVMAXARCHITECTEDVIRTUALCDROMS 8 -#define HVMAXARCHITECTEDVIRTUALTAPES 8 -#define HVCHUNKSIZE (256 * 1024) -#define HVPAGESIZE (4 * 1024) -#define HVLPMINMEGSPRIMARY 256 -#define HVLPMINMEGSSECONDARY 64 -#define HVCHUNKSPERMEG 4 -#define HVPAGESPERMEG 256 -#define HVPAGESPERCHUNK 64 - -#define HvLpIndexInvalid ((HvLpIndex)0xff) - -/* - * Enums for the sub-components under PLIC - * Used in HvCall and HvPrimaryCall - */ -enum { - HvCallCompId = 0, - HvCallCpuCtlsCompId = 1, - HvCallCfgCompId = 2, - HvCallEventCompId = 3, - HvCallHptCompId = 4, - HvCallPciCompId = 5, - HvCallSlmCompId = 6, - HvCallSmCompId = 7, - HvCallSpdCompId = 8, - HvCallXmCompId = 9, - HvCallRioCompId = 10, - HvCallRsvd3CompId = 11, - HvCallRsvd2CompId = 12, - HvCallRsvd1CompId = 13, - HvCallMaxCompId = 14, - HvPrimaryCallCompId = 0, - HvPrimaryCallCfgCompId = 1, - HvPrimaryCallPciCompId = 2, - HvPrimaryCallSmCompId = 3, - HvPrimaryCallSpdCompId = 4, - HvPrimaryCallXmCompId = 5, - HvPrimaryCallRioCompId = 6, - HvPrimaryCallRsvd7CompId = 7, - HvPrimaryCallRsvd6CompId = 8, - HvPrimaryCallRsvd5CompId = 9, - HvPrimaryCallRsvd4CompId = 10, - HvPrimaryCallRsvd3CompId = 11, - HvPrimaryCallRsvd2CompId = 12, - HvPrimaryCallRsvd1CompId = 13, - HvPrimaryCallMaxCompId = HvCallMaxCompId -}; - -struct HvLpBufferList { - u64 addr; - u64 len; -}; - -#endif /* _ASM_POWERPC_ISERIES_HV_TYPES_H */ diff --git a/arch/powerpc/include/asm/iseries/it_lp_queue.h b/arch/powerpc/include/asm/iseries/it_lp_queue.h deleted file mode 100644 index 42827883882..00000000000 --- a/arch/powerpc/include/asm/iseries/it_lp_queue.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2001 Mike Corrigan IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _ASM_POWERPC_ISERIES_IT_LP_QUEUE_H -#define _ASM_POWERPC_ISERIES_IT_LP_QUEUE_H - -/* - * This control block defines the simple LP queue structure that is - * shared between the hypervisor (PLIC) and the OS in order to send - * events to an LP. - */ - -#include -#include - -#define IT_LP_MAX_QUEUES 8 - -#define IT_LP_NOT_USED 0 /* Queue will not be used by PLIC */ -#define IT_LP_DEDICATED_IO 1 /* Queue dedicated to IO processor specified */ -#define IT_LP_DEDICATED_LP 2 /* Queue dedicated to LP specified */ -#define IT_LP_SHARED 3 /* Queue shared for both IO and LP */ - -#define IT_LP_EVENT_STACK_SIZE 4096 -#define IT_LP_EVENT_MAX_SIZE 256 -#define IT_LP_EVENT_ALIGN 64 - -struct hvlpevent_queue { -/* - * The hq_current_event is the pointer to the next event stack entry - * that will become valid. The OS must peek at this entry to determine - * if it is valid. PLIC will set the valid indicator as the very last - * store into that entry. - * - * When the OS has completed processing of the event then it will mark - * the event as invalid so that PLIC knows it can store into that event - * location again. - * - * If the event stack fills and there are overflow events, then PLIC - * will set the hq_overflow_pending flag in which case the OS will - * have to fetch the additional LP events once they have drained the - * event stack. - * - * The first 16-bytes are known by both the OS and PLIC. The remainder - * of the cache line is for use by the OS. - */ - u8 hq_overflow_pending; /* 0x00 Overflow events are pending */ - u8 hq_status; /* 0x01 DedicatedIo or DedicatedLp or NotUsed */ - u16 hq_proc_index; /* 0x02 Logical Proc Index for correlation */ - u8 hq_reserved1[12]; /* 0x04 */ - char *hq_current_event; /* 0x10 */ - char *hq_last_event; /* 0x18 */ - char *hq_event_stack; /* 0x20 */ - u8 hq_index; /* 0x28 unique sequential index. */ - u8 hq_reserved2[3]; /* 0x29-2b */ - spinlock_t hq_lock; -}; - -extern struct hvlpevent_queue hvlpevent_queue; - -extern int hvlpevent_is_pending(void); -extern void process_hvlpevents(void); -extern void setup_hvlpevent_queue(void); - -#endif /* _ASM_POWERPC_ISERIES_IT_LP_QUEUE_H */ diff --git a/arch/powerpc/include/asm/iseries/lpar_map.h b/arch/powerpc/include/asm/iseries/lpar_map.h deleted file mode 100644 index 5e9f3e128ee..00000000000 --- a/arch/powerpc/include/asm/iseries/lpar_map.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2001 Mike Corrigan IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _ASM_POWERPC_ISERIES_LPAR_MAP_H -#define _ASM_POWERPC_ISERIES_LPAR_MAP_H - -#ifndef __ASSEMBLY__ - -#include - -#endif - -/* - * The iSeries hypervisor will set up mapping for one or more - * ESID/VSID pairs (in SLB/segment registers) and will set up - * mappings of one or more ranges of pages to VAs. - * We will have the hypervisor set up the ESID->VSID mapping - * for the four kernel segments (C-F). With shared processors, - * the hypervisor will clear all segment registers and reload - * these four whenever the processor is switched from one - * partition to another. - */ - -/* The Vsid and Esid identified below will be used by the hypervisor - * to set up a memory mapping for part of the load area before giving - * control to the Linux kernel. The load area is 64 MB, but this must - * not attempt to map the whole load area. The Hashed Page Table may - * need to be located within the load area (if the total partition size - * is 64 MB), but cannot be mapped. Typically, this should specify - * to map half (32 MB) of the load area. - * - * The hypervisor will set up page table entries for the number of - * pages specified. - * - * In 32-bit mode, the hypervisor will load all four of the - * segment registers (identified by the low-order four bits of the - * Esid field. In 64-bit mode, the hypervisor will load one SLB - * entry to map the Esid to the Vsid. -*/ - -#define HvEsidsToMap 2 -#define HvRangesToMap 1 - -/* Hypervisor initially maps 32MB of the load area */ -#define HvPagesToMap 8192 - -#ifndef __ASSEMBLY__ -struct LparMap { - u64 xNumberEsids; // Number of ESID/VSID pairs - u64 xNumberRanges; // Number of VA ranges to map - u64 xSegmentTableOffs; // Page number within load area of seg table - u64 xRsvd[5]; - struct { - u64 xKernelEsid; // Esid used to map kernel load - u64 xKernelVsid; // Vsid used to map kernel load - } xEsids[HvEsidsToMap]; - struct { - u64 xPages; // Number of pages to be mapped - u64 xOffset; // Offset from start of load area - u64 xVPN; // Virtual Page Number - } xRanges[HvRangesToMap]; -}; - -extern const struct LparMap xLparMap; - -#endif /* __ASSEMBLY__ */ - -/* the fixed address where the LparMap exists */ -#define LPARMAP_PHYS 0x7000 - -#endif /* _ASM_POWERPC_ISERIES_LPAR_MAP_H */ diff --git a/arch/powerpc/include/asm/iseries/mf.h b/arch/powerpc/include/asm/iseries/mf.h deleted file mode 100644 index eb851a9c9e5..00000000000 --- a/arch/powerpc/include/asm/iseries/mf.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2001 Troy D. Armstrong IBM Corporation - * Copyright (C) 2004 Stephen Rothwell IBM Corporation - * - * This modules exists as an interface between a Linux secondary partition - * running on an iSeries and the primary partition's Virtual Service - * Processor (VSP) object. The VSP has final authority over powering on/off - * all partitions in the iSeries. It also provides miscellaneous low-level - * machine facility type operations. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef _ASM_POWERPC_ISERIES_MF_H -#define _ASM_POWERPC_ISERIES_MF_H - -#include - -#include -#include - -struct rtc_time; - -typedef void (*MFCompleteHandler)(void *clientToken, int returnCode); - -extern void mf_allocate_lp_events(HvLpIndex targetLp, HvLpEvent_Type type, - unsigned size, unsigned amount, MFCompleteHandler hdlr, - void *userToken); -extern void mf_deallocate_lp_events(HvLpIndex targetLp, HvLpEvent_Type type, - unsigned count, MFCompleteHandler hdlr, void *userToken); - -extern void mf_power_off(void); -extern void mf_reboot(char *cmd); - -extern void mf_display_src(u32 word); -extern void mf_display_progress(u16 value); - -extern void mf_init(void); - -#endif /* _ASM_POWERPC_ISERIES_MF_H */ -- cgit v1.2.3-70-g09d2