diff options
37 files changed, 876 insertions, 178 deletions
diff --git a/Documentation/powerpc/dts-bindings/fsl/mpic.txt b/Documentation/powerpc/dts-bindings/fsl/mpic.txt new file mode 100644 index 00000000000..71e39cf3215 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/mpic.txt @@ -0,0 +1,42 @@ +* OpenPIC and its interrupt numbers on Freescale's e500/e600 cores + +The OpenPIC specification does not specify which interrupt source has to +become which interrupt number. This is up to the software implementation +of the interrupt controller. The only requirement is that every +interrupt source has to have an unique interrupt number / vector number. +To accomplish this the current implementation assigns the number zero to +the first source, the number one to the second source and so on until +all interrupt sources have their unique number. +Usually the assigned vector number equals the interrupt number mentioned +in the documentation for a given core / CPU. This is however not true +for the e500 cores (MPC85XX CPUs) where the documentation distinguishes +between internal and external interrupt sources and starts counting at +zero for both of them. + +So what to write for external interrupt source X or internal interrupt +source Y into the device tree? Here is an example: + +The memory map for the interrupt controller in the MPC8544[0] shows, +that the first interrupt source starts at 0x5_0000 (PIC Register Address +Map-Interrupt Source Configuration Registers). This source becomes the +number zero therefore: + External interrupt 0 = interrupt number 0 + External interrupt 1 = interrupt number 1 + External interrupt 2 = interrupt number 2 + ... +Every interrupt number allocates 0x20 bytes register space. So to get +its number it is sufficient to shift the lower 16bits to right by five. +So for the external interrupt 10 we have: + 0x0140 >> 5 = 10 + +After the external sources, the internal sources follow. The in core I2C +controller on the MPC8544 for instance has the internal source number +27. Oo obtain its interrupt number we take the lower 16bits of its memory +address (0x5_0560) and shift it right: + 0x0560 >> 5 = 43 + +Therefore the I2C device node for the MPC8544 CPU has to have the +interrupt number 43 specified in the device tree. + +[0] MPC8544E PowerQUICCTM III, Integrated Host Processor Family Reference Manual + MPC8544ERM Rev. 1 10/2007 diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts index 51eb6ed5da2..8f345de960c 100644 --- a/arch/powerpc/boot/dts/katmai.dts +++ b/arch/powerpc/boot/dts/katmai.dts @@ -108,12 +108,19 @@ dcr-reg = <0x00c 0x002>; }; + MQ0: mq { + compatible = "ibm,mq-440spe"; + dcr-reg = <0x040 0x020>; + }; + plb { compatible = "ibm,plb-440spe", "ibm,plb-440gp", "ibm,plb4"; #address-cells = <2>; #size-cells = <1>; /* addr-child addr-parent size */ - ranges = <0x4 0xe0000000 0x4 0xe0000000 0x20000000 + ranges = <0x4 0x00100000 0x4 0x00100000 0x00001000 + 0x4 0x00200000 0x4 0x00200000 0x00000400 + 0x4 0xe0000000 0x4 0xe0000000 0x20000000 0xc 0x00000000 0xc 0x00000000 0x20000000 0xd 0x00000000 0xd 0x00000000 0x80000000 0xd 0x80000000 0xd 0x80000000 0x80000000 @@ -400,6 +407,49 @@ 0x0 0x0 0x0 0x3 &UIC3 0xa 0x4 /* swizzled int C */ 0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>; }; + + I2O: i2o@400100000 { + compatible = "ibm,i2o-440spe"; + reg = <0x00000004 0x00100000 0x100>; + dcr-reg = <0x060 0x020>; + }; + + DMA0: dma0@400100100 { + compatible = "ibm,dma-440spe"; + cell-index = <0>; + reg = <0x00000004 0x00100100 0x100>; + dcr-reg = <0x060 0x020>; + interrupt-parent = <&DMA0>; + interrupts = <0 1>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = < + 0 &UIC0 0x14 4 + 1 &UIC1 0x16 4>; + }; + + DMA1: dma1@400100200 { + compatible = "ibm,dma-440spe"; + cell-index = <1>; + reg = <0x00000004 0x00100200 0x100>; + dcr-reg = <0x060 0x020>; + interrupt-parent = <&DMA1>; + interrupts = <0 1>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = < + 0 &UIC0 0x16 4 + 1 &UIC1 0x16 4>; + }; + + xor-accel@400200000 { + compatible = "amcc,xor-accelerator"; + reg = <0x00000004 0x00200000 0x400>; + interrupt-parent = <&UIC1>; + interrupts = <0x1f 4>; + }; }; chosen { diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index 32e10f588c1..8a3a4f3ef83 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -204,6 +204,7 @@ interrupt-parent = <&ipic>; tbi-handle = <&tbi0>; phy-handle = < &phy0 >; + fsl,magic-packet; mdio@520 { #address-cells = <1>; @@ -246,6 +247,7 @@ interrupt-parent = <&ipic>; tbi-handle = <&tbi1>; phy-handle = < &phy1 >; + fsl,magic-packet; mdio@520 { #address-cells = <1>; @@ -309,6 +311,22 @@ interrupt-parent = <&ipic>; }; + gtm1: timer@500 { + compatible = "fsl,mpc8315-gtm", "fsl,gtm"; + reg = <0x500 0x100>; + interrupts = <90 8 78 8 84 8 72 8>; + interrupt-parent = <&ipic>; + clock-frequency = <133333333>; + }; + + timer@600 { + compatible = "fsl,mpc8315-gtm", "fsl,gtm"; + reg = <0x600 0x100>; + interrupts = <91 8 79 8 85 8 73 8>; + interrupt-parent = <&ipic>; + clock-frequency = <133333333>; + }; + /* IPIC * interrupts cell = <intr #, sense> * sense values match linux IORESOURCE_IRQ_* defines: @@ -337,6 +355,15 @@ 0x59 0x8>; interrupt-parent = < &ipic >; }; + + pmc: power@b00 { + compatible = "fsl,mpc8315-pmc", "fsl,mpc8313-pmc", + "fsl,mpc8349-pmc"; + reg = <0xb00 0x100 0xa00 0x100>; + interrupts = <80 8>; + interrupt-parent = <&ipic>; + fsl,mpc8313-wakeup-timer = <>m1>; + }; }; pci0: pci@e0008500 { diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts index feeeb7f9d60..b53d1df11e2 100644 --- a/arch/powerpc/boot/dts/mpc8349emitx.dts +++ b/arch/powerpc/boot/dts/mpc8349emitx.dts @@ -63,6 +63,24 @@ reg = <0x200 0x100>; }; + gpio1: gpio-controller@c00 { + #gpio-cells = <2>; + compatible = "fsl,mpc8349-gpio"; + reg = <0xc00 0x100>; + interrupts = <74 0x8>; + interrupt-parent = <&ipic>; + gpio-controller; + }; + + gpio2: gpio-controller@d00 { + #gpio-cells = <2>; + compatible = "fsl,mpc8349-gpio"; + reg = <0xd00 0x100>; + interrupts = <75 0x8>; + interrupt-parent = <&ipic>; + gpio-controller; + }; + i2c@3000 { #address-cells = <1>; #size-cells = <0>; @@ -72,6 +90,12 @@ interrupts = <14 0x8>; interrupt-parent = <&ipic>; dfsrr; + + eeprom: at24@50 { + compatible = "st-micro,24c256"; + reg = <0x50>; + }; + }; i2c@3100 { @@ -91,6 +115,25 @@ interrupt-parent = <&ipic>; }; + pcf1: iexp@38 { + #gpio-cells = <2>; + compatible = "ti,pcf8574a"; + reg = <0x38>; + gpio-controller; + }; + + pcf2: iexp@39 { + #gpio-cells = <2>; + compatible = "ti,pcf8574a"; + reg = <0x39>; + gpio-controller; + }; + + spd: at24@51 { + compatible = "at24,spd"; + reg = <0x51>; + }; + mcu_pio: mcu@a { #gpio-cells = <2>; compatible = "fsl,mc9s08qg8-mpc8349emitx", @@ -275,6 +318,24 @@ reg = <0x700 0x100>; device_type = "ipic"; }; + + gpio-leds { + compatible = "gpio-leds"; + + green { + label = "Green"; + gpios = <&pcf1 0 1>; + linux,default-trigger = "heartbeat"; + }; + + yellow { + label = "Yellow"; + gpios = <&pcf1 1 1>; + /* linux,default-trigger = "heartbeat"; */ + default-state = "on"; + }; + }; + }; pci0: pci@e0008500 { @@ -331,7 +392,26 @@ compatible = "fsl,mpc8349e-localbus", "fsl,pq2pro-localbus"; reg = <0xe0005000 0xd8>; - ranges = <0x3 0x0 0xf0000000 0x210>; + ranges = <0x0 0x0 0xfe000000 0x1000000 /* flash */ + 0x1 0x0 0xf8000000 0x20000 /* VSC 7385 */ + 0x2 0x0 0xf9000000 0x200000 /* exp slot */ + 0x3 0x0 0xf0000000 0x210>; /* CF slot */ + + flash@0,0 { + compatible = "cfi-flash"; + reg = <0x0 0x0 0x800000>; + bank-width = <2>; + device-width = <1>; + }; + + flash@0,800000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x800000 0x800000>; + bank-width = <2>; + device-width = <1>; + }; pata@3,0 { compatible = "fsl,mpc8349emitx-pata", "ata-generic"; diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts index 31605ee4afb..e576ee85c42 100644 --- a/arch/powerpc/boot/dts/warp.dts +++ b/arch/powerpc/boot/dts/warp.dts @@ -146,7 +146,7 @@ fpga@2,4000 { compatible = "pika,fpga-sd"; - reg = <0x00000002 0x00004000 0x00000A00>; + reg = <0x00000002 0x00004000 0x00004000>; }; nor@0,0 { diff --git a/arch/powerpc/boot/ugecon.c b/arch/powerpc/boot/ugecon.c index 50609ea6ddf..8f2a6b31153 100644 --- a/arch/powerpc/boot/ugecon.c +++ b/arch/powerpc/boot/ugecon.c @@ -86,7 +86,7 @@ static void ug_putc(char ch) while (!ug_is_txfifo_ready() && count--) barrier(); - if (count) + if (count >= 0) ug_raw_putc(ch); } diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index fc905924c02..826a65d3f00 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -757,7 +757,7 @@ CONFIG_SUNGEM=y # CONFIG_B44 is not set # CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y -CONFIG_ACENIC=y +CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y # CONFIG_DL2K is not set CONFIG_E1000=y @@ -794,8 +794,8 @@ CONFIG_NETDEV_10000=y # CONFIG_BNX2X is not set # CONFIG_QLGE is not set # CONFIG_SFC is not set -CONFIG_TR=y -CONFIG_IBMOL=y +# CONFIG_TR is not set +# CONFIG_IBMOL is not set # CONFIG_3C359 is not set # CONFIG_TMS380TR is not set diff --git a/arch/powerpc/configs/iseries_defconfig b/arch/powerpc/configs/iseries_defconfig index f925c555508..76982c51a4c 100644 --- a/arch/powerpc/configs/iseries_defconfig +++ b/arch/powerpc/configs/iseries_defconfig @@ -714,8 +714,8 @@ CONFIG_NETDEV_10000=y # CONFIG_BNX2X is not set # CONFIG_QLGE is not set # CONFIG_SFC is not set -CONFIG_TR=y -CONFIG_IBMOL=y +# CONFIG_TR is not set +# CONFIG_IBMOL is not set # CONFIG_3C359 is not set # CONFIG_TMS380TR is not set diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 25240182457..7b3804a6e36 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -304,11 +304,11 @@ CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_SCHED_HRTICK=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set @@ -980,7 +980,7 @@ CONFIG_E100=y # CONFIG_SC92031 is not set # CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y -CONFIG_ACENIC=y +CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y # CONFIG_DL2K is not set CONFIG_E1000=y @@ -1023,8 +1023,8 @@ CONFIG_PASEMI_MAC=y # CONFIG_BNX2X is not set # CONFIG_QLGE is not set # CONFIG_SFC is not set -CONFIG_TR=y -CONFIG_IBMOL=y +# CONFIG_TR is not set +# CONFIG_IBMOL is not set # CONFIG_3C359 is not set # CONFIG_TMS380TR is not set @@ -1863,7 +1863,7 @@ CONFIG_HFSPLUS_FS=m # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -CONFIG_CRAMFS=y +CONFIG_CRAMFS=m # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 18af4603625..8195f1650cb 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -1008,8 +1008,8 @@ CONFIG_IXGB=m # CONFIG_QLGE is not set # CONFIG_SFC is not set # CONFIG_BE2NET is not set -CONFIG_TR=y -CONFIG_IBMOL=y +# CONFIG_TR is not set +# CONFIG_IBMOL is not set # CONFIG_3C359 is not set # CONFIG_TMS380TR is not set CONFIG_WLAN=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index c568329723b..ca9ff9aad74 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -230,11 +230,11 @@ CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_SCHED_HRTICK=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set @@ -796,7 +796,7 @@ CONFIG_E100=y # CONFIG_NET_POCKET is not set # CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y -CONFIG_ACENIC=y +CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y # CONFIG_DL2K is not set CONFIG_E1000=y @@ -834,8 +834,8 @@ CONFIG_S2IO=m # CONFIG_BNX2X is not set # CONFIG_QLGE is not set # CONFIG_SFC is not set -CONFIG_TR=y -CONFIG_IBMOL=y +# CONFIG_TR is not set +# CONFIG_IBMOL is not set # CONFIG_3C359 is not set # CONFIG_TMS380TR is not set @@ -1494,7 +1494,7 @@ CONFIG_CONFIGFS_FS=m # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -CONFIG_CRAMFS=y +CONFIG_CRAMFS=m # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 64e1fdca233..2c15212e170 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -68,7 +68,7 @@ _EMIT_BUG_ENTRY \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (0), "i" (sizeof(struct bug_entry))); \ - for(;;) ; \ + unreachable(); \ } while (0) #define BUG_ON(x) do { \ diff --git a/arch/powerpc/include/asm/gpio.h b/arch/powerpc/include/asm/gpio.h index ea04632399d..38762edb5e5 100644 --- a/arch/powerpc/include/asm/gpio.h +++ b/arch/powerpc/include/asm/gpio.h @@ -38,12 +38,9 @@ static inline int gpio_cansleep(unsigned int gpio) return __gpio_cansleep(gpio); } -/* - * Not implemented, yet. - */ static inline int gpio_to_irq(unsigned int gpio) { - return -ENOSYS; + return __gpio_to_irq(gpio); } static inline int irq_to_gpio(unsigned int irq) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 3839839f83c..b876e989220 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -642,10 +642,14 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, */ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, unsigned int areg, struct pt_regs *regs, - unsigned int flags, unsigned int length) + unsigned int flags, unsigned int length, + unsigned int elsize) { char *ptr; + unsigned long *lptr; int ret = 0; + int sw = 0; + int i, j; flush_vsx_to_thread(current); @@ -654,19 +658,35 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, else ptr = (char *) ¤t->thread.vr[reg - 32]; - if (flags & ST) - ret = __copy_to_user(addr, ptr, length); - else { - if (flags & SPLT){ - ret = __copy_from_user(ptr, addr, length); - ptr += length; + lptr = (unsigned long *) ptr; + + if (flags & SW) + sw = elsize-1; + + for (j = 0; j < length; j += elsize) { + for (i = 0; i < elsize; ++i) { + if (flags & ST) + ret |= __put_user(ptr[i^sw], addr + i); + else + ret |= __get_user(ptr[i^sw], addr + i); } - ret |= __copy_from_user(ptr, addr, length); + ptr += elsize; + addr += elsize; } - if (flags & U) - regs->gpr[areg] = regs->dar; - if (ret) + + if (!ret) { + if (flags & U) + regs->gpr[areg] = regs->dar; + + /* Splat load copies the same data to top and bottom 8 bytes */ + if (flags & SPLT) + lptr[1] = lptr[0]; + /* For 8 byte loads, zero the top 8 bytes */ + else if (!(flags & ST) && (8 == length)) + lptr[1] = 0; + } else return -EFAULT; + return 1; } #endif @@ -767,16 +787,25 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_VSX if ((instruction & 0xfc00003e) == 0x7c000018) { - /* Additional register addressing bit (64 VSX vs 32 FPR/GPR */ + unsigned int elsize; + + /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */ reg |= (instruction & 0x1) << 5; /* Simple inline decoder instead of a table */ + /* VSX has only 8 and 16 byte memory accesses */ + nb = 8; if (instruction & 0x200) nb = 16; - else if (instruction & 0x080) - nb = 8; - else - nb = 4; + + /* Vector stores in little-endian mode swap individual + elements, so process them separately */ + elsize = 4; + if (instruction & 0x80) + elsize = 8; + flags = 0; + if (regs->msr & MSR_LE) + flags |= SW; if (instruction & 0x100) flags |= ST; if (instruction & 0x040) @@ -787,7 +816,7 @@ int fix_alignment(struct pt_regs *regs) nb = 8; } PPC_WARN_ALIGNMENT(vsx, regs); - return emulate_vsx(addr, reg, areg, regs, flags, nb); + return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize); } #endif /* A size of 0 indicates an instruction we don't support, with diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 50f867d657d..3ecdcec0a39 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -340,7 +340,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, else def->tlbiel = 0; - DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, " + DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, " "tlbiel=%d, penc=%d\n", idx, shift, def->sllp, def->avpnm, def->tlbiel, def->penc); @@ -663,7 +663,7 @@ static void __init htab_initialize(void) base = (unsigned long)__va(lmb.memory.region[i].base); size = lmb.memory.region[i].size; - DBG("creating mapping for region: %lx..%lx (prot: %x)\n", + DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); #ifdef CONFIG_U3_DART @@ -879,7 +879,7 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) */ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) { - void *pgdir; + pgd_t *pgdir; unsigned long vsid; struct mm_struct *mm; pte_t *ptep; @@ -1025,7 +1025,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) else #endif /* CONFIG_PPC_HAS_HASH_64K */ { - int spp = subpage_protection(pgdir, ea); + int spp = subpage_protection(mm, ea); if (access & spp) rc = -2; else @@ -1115,7 +1115,7 @@ void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, { unsigned long hash, index, shift, hidx, slot; - DBG_LOW("flush_hash_page(va=%016x)\n", va); + DBG_LOW("flush_hash_page(va=%016lx)\n", va); pte_iterate_hashed_subpages(pte, psize, va, index, shift) { hash = hpt_hash(va, shift, ssize); hidx = __rpte_to_hidx(pte, index); @@ -1123,7 +1123,7 @@ void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx); + DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); ppc_md.hpte_invalidate(slot, va, psize, ssize, local); } pte_iterate_hashed_end(); } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index be4f34c30a0..1044a634b6d 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -353,7 +353,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, read_lock(&tasklist_lock); for_each_process(p) { if (p->mm) - cpu_mask_clear_cpu(cpu, mm_cpumask(p->mm)); + cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); } read_unlock(&tasklist_lock); break; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 177e4038b43..573b3bd1c45 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -382,7 +382,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot) return 0; if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) return -EINVAL; - set_pte_at(&init_mm, address, kpte, mk_pte(page, prot)); + __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); wmb(); #ifdef CONFIG_PPC_STD_MMU flush_hash_pages(0, address, pmd_val(*kpmd), 1); diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index d306f07b9aa..43805348b81 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -32,6 +32,7 @@ #define PMCCR1_NEXT_STATE 0x0C /* Next state for power management */ #define PMCCR1_NEXT_STATE_SHIFT 2 #define PMCCR1_CURR_STATE 0x03 /* Current state for power management*/ +#define IMMR_SYSCR_OFFSET 0x100 #define IMMR_RCW_OFFSET 0x900 #define RCW_PCI_HOST 0x80000000 @@ -78,6 +79,22 @@ struct mpc83xx_clock { u32 sccr; }; +struct mpc83xx_syscr { + __be32 sgprl; + __be32 sgprh; + __be32 spridr; + __be32 :32; + __be32 spcr; + __be32 sicrl; + __be32 sicrh; +}; + +struct mpc83xx_saved { + u32 sicrl; + u32 sicrh; + u32 sccr; +}; + struct pmc_type { int has_deep_sleep; }; @@ -87,6 +104,8 @@ static int has_deep_sleep, deep_sleeping; static int pmc_irq; static struct mpc83xx_pmc __iomem *pmc_regs; static struct mpc83xx_clock __iomem *clock_regs; +static struct mpc83xx_syscr __iomem *syscr_regs; +static struct mpc83xx_saved saved_regs; static int is_pci_agent, wake_from_pci; static phys_addr_t immrbase; static int pci_pm_state; @@ -137,6 +156,20 @@ static irqreturn_t pmc_irq_handler(int irq, void *dev_id) return ret; } +static void mpc83xx_suspend_restore_regs(void) +{ + out_be32(&syscr_regs->sicrl, saved_regs.sicrl); + out_be32(&syscr_regs->sicrh, saved_regs.sicrh); + out_be32(&clock_regs->sccr, saved_regs.sccr); +} + +static void mpc83xx_suspend_save_regs(void) +{ + saved_regs.sicrl = in_be32(&syscr_regs->sicrl); + saved_regs.sicrh = in_be32(&syscr_regs->sicrh); + saved_regs.sccr = in_be32(&clock_regs->sccr); +} + static int mpc83xx_suspend_enter(suspend_state_t state) { int ret = -EAGAIN; @@ -166,6 +199,8 @@ static int mpc83xx_suspend_enter(suspend_state_t state) */ if (deep_sleeping) { + mpc83xx_suspend_save_regs(); + out_be32(&pmc_regs->mask, PMCER_ALL); out_be32(&pmc_regs->config1, @@ -179,6 +214,8 @@ static int mpc83xx_suspend_enter(suspend_state_t state) in_be32(&pmc_regs->config1) & ~PMCCR1_POWER_OFF); out_be32(&pmc_regs->mask, PMCER_PMCI); + + mpc83xx_suspend_restore_regs(); } else { out_be32(&pmc_regs->mask, PMCER_PMCI); @@ -194,7 +231,7 @@ out: return ret; } -static void mpc83xx_suspend_finish(void) +static void mpc83xx_suspend_end(void) { deep_sleeping = 0; } @@ -278,7 +315,7 @@ static struct platform_suspend_ops mpc83xx_suspend_ops = { .valid = mpc83xx_suspend_valid, .begin = mpc83xx_suspend_begin, .enter = mpc83xx_suspend_enter, - .finish = mpc83xx_suspend_finish, + .end = mpc83xx_suspend_end, }; static int pmc_probe(struct of_device *ofdev, @@ -333,12 +370,23 @@ static int pmc_probe(struct of_device *ofdev, goto out_pmc; } + if (has_deep_sleep) { + syscr_regs = ioremap(immrbase + IMMR_SYSCR_OFFSET, + sizeof(*syscr_regs)); + if (!syscr_regs) { + ret = -ENOMEM; + goto out_syscr; + } + } + if (is_pci_agent) mpc83xx_set_agent(); suspend_set_ops(&mpc83xx_suspend_ops); return 0; +out_syscr: + iounmap(clock_regs); out_pmc: iounmap(pmc_regs); out: diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index c5028a2e5a5..21f61b8c445 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -86,7 +86,7 @@ static int mpc8568_fixup_125_clock(struct phy_device *phydev) scr = phy_read(phydev, MV88E1111_SCR); if (scr < 0) - return err; + return scr; err = phy_write(phydev, MV88E1111_SCR, scr | 0x0008); diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index d5963285e3b..c278bd3a8fe 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -102,7 +102,7 @@ static int flipper_pic_map(struct irq_host *h, unsigned int virq, irq_hw_number_t hwirq) { set_irq_chip_data(virq, h->host_data); - get_irq_desc(virq)->status |= IRQ_LEVEL; + irq_to_desc(virq)->status |= IRQ_LEVEL; set_irq_chip_and_handler(virq, &flipper_pic, handle_level_irq); return 0; } diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index dd20bff3320..a771f91e215 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -95,7 +95,7 @@ static int hlwd_pic_map(struct irq_host *h, unsigned int virq, irq_hw_number_t hwirq) { set_irq_chip_data(virq, h->host_data); - get_irq_desc(virq)->status |= IRQ_LEVEL; + irq_to_desc(virq)->status |= IRQ_LEVEL; set_irq_chip_and_handler(virq, &hlwd_pic, handle_level_irq); return 0; } @@ -132,9 +132,9 @@ static void hlwd_pic_irq_cascade(unsigned int cascade_virq, struct irq_host *irq_host = get_irq_data(cascade_virq); unsigned int virq; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->chip->mask(cascade_virq); /* IRQ_LEVEL */ - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); virq = __hlwd_pic_get_irq(irq_host); if (virq != NO_IRQ) @@ -142,11 +142,11 @@ static void hlwd_pic_irq_cascade(unsigned int cascade_virq, else pr_err("spurious interrupt!\n"); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->chip->ack(cascade_virq); /* IRQ_LEVEL */ if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) desc->chip->unmask(cascade_virq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } /* diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c index edc956cc8b1..20a8ed91962 100644 --- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -120,7 +120,7 @@ static void ug_putc(char ch) while (!ug_is_txfifo_ready() && count--) barrier(); - if (count) + if (count >= 0) ug_raw_putc(ch); } diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c index 0d9343df35b..6617915bcb1 100644 --- a/arch/powerpc/platforms/iseries/mf.c +++ b/arch/powerpc/platforms/iseries/mf.c @@ -855,59 +855,58 @@ static int mf_get_boot_rtc(struct rtc_time *tm) } #ifdef CONFIG_PROC_FS - -static int proc_mf_dump_cmdline(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int mf_cmdline_proc_show(struct seq_file *m, void *v) { - int len; - char *p; + char *page, *p; struct vsp_cmd_data vsp_cmd; int rc; dma_addr_t dma_addr; /* The HV appears to return no more than 256 bytes of command line */ - if (off >= 256) - return 0; - if ((off + count) > 256) - count = 256 - off; + page = kmalloc(256, GFP_KERNEL); + if (!page) + return -ENOMEM; - dma_addr = iseries_hv_map(page, off + count, DMA_FROM_DEVICE); - if (dma_addr == DMA_ERROR_CODE) + dma_addr = iseries_hv_map(page, 256, DMA_FROM_DEVICE); + if (dma_addr == DMA_ERROR_CODE) { + kfree(page); return -ENOMEM; - memset(page, 0, off + count); + } + memset(page, 0, 256); memset(&vsp_cmd, 0, sizeof(vsp_cmd)); vsp_cmd.cmd = 33; vsp_cmd.sub_data.kern.token = dma_addr; vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex; - vsp_cmd.sub_data.kern.side = (u64)data; - vsp_cmd.sub_data.kern.length = off + count; + vsp_cmd.sub_data.kern.side = (u64)m->private; + vsp_cmd.sub_data.kern.length = 256; mb(); rc = signal_vsp_instruction(&vsp_cmd); - iseries_hv_unmap(dma_addr, off + count, DMA_FROM_DEVICE); - if (rc) + iseries_hv_unmap(dma_addr, 256, DMA_FROM_DEVICE); + if (rc) { + kfree(page); return rc; - if (vsp_cmd.result_code != 0) + } + if (vsp_cmd.result_code != 0) { + kfree(page); return -ENOMEM; + } p = page; - len = 0; - while (len < (off + count)) { - if ((*p == '\0') || (*p == '\n')) { - if (*p == '\0') - *p = '\n'; - p++; - len++; - *eof = 1; + while (p - page < 256) { + if (*p == '\0' || *p == '\n') { + *p = '\n'; break; } p++; - len++; - } - if (len < off) { - *eof = 1; - len = 0; } - return len; + seq_write(m, page, p - page); + kfree(page); + return 0; +} + +static int mf_cmdline_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, mf_cmdline_proc_show, PDE(inode)->data); } #if 0 @@ -962,10 +961,8 @@ static int proc_mf_dump_vmlinux(char *page, char **start, off_t off, } #endif -static int proc_mf_dump_side(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int mf_side_proc_show(struct seq_file *m, void *v) { - int len; char mf_current_side = ' '; struct vsp_cmd_data vsp_cmd; @@ -989,21 +986,17 @@ static int proc_mf_dump_side(char *page, char **start, off_t off, } } - len = sprintf(page, "%c\n", mf_current_side); + seq_printf(m, "%c\n", mf_current_side); + return 0; +} - if (len <= (off + count)) - *eof = 1; - *start = page + off; - len -= off; - if (len > count) - len = count; - if (len < 0) - len = 0; - return len; +static int mf_side_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, mf_side_proc_show, NULL); } -static int proc_mf_change_side(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t mf_side_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { char side; u64 newSide; @@ -1041,6 +1034,15 @@ static int proc_mf_change_side(struct file *file, const char __user *buffer, return count; } +static const struct file_operations mf_side_proc_fops = { + .owner = THIS_MODULE, + .open = mf_side_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = mf_side_proc_write, +}; + #if 0 static void mf_getSrcHistory(char *buffer, int size) { @@ -1087,8 +1089,7 @@ static void mf_getSrcHistory(char *buffer, int size) } #endif -static int proc_mf_dump_src(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int mf_src_proc_show(struct seq_file *m, void *v) { #if 0 int len; @@ -1109,8 +1110,13 @@ static int proc_mf_dump_src(char *page, char **start, off_t off, #endif } -static int proc_mf_change_src(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static int mf_src_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, mf_src_proc_show, NULL); +} + +static ssize_t mf_src_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { char stkbuf[10]; @@ -1135,9 +1141,19 @@ static int proc_mf_change_src(struct file *file, const char __user *buffer, return count; } -static int proc_mf_change_cmdline(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static const struct file_operations mf_src_proc_fops = { + .owner = THIS_MODULE, + .open = mf_src_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = mf_src_proc_write, +}; + +static ssize_t mf_cmdline_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { + void *data = PDE(file->f_path.dentry->d_inode)->data; struct vsp_cmd_data vsp_cmd; dma_addr_t dma_addr; char *page; @@ -1172,6 +1188,15 @@ out: return ret; } +static const struct file_operations mf_cmdline_proc_fops = { + .owner = THIS_MODULE, + .open = mf_cmdline_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = mf_cmdline_proc_write, +}; + static ssize_t proc_mf_change_vmlinux(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -1246,12 +1271,10 @@ static int __init mf_proc_init(void) if (!mf) return 1; - ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf); + ent = proc_create_data("cmdline", S_IRUSR|S_IWUSR, mf, + &mf_cmdline_proc_fops, (void *)(long)i); if (!ent) return 1; - ent->data = (void *)(long)i; - ent->read_proc = proc_mf_dump_cmdline; - ent->write_proc = proc_mf_change_cmdline; if (i == 3) /* no vmlinux entry for 'D' */ continue; @@ -1263,19 +1286,15 @@ static int __init mf_proc_init(void) return 1; } - ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + ent = proc_create("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root, + &mf_side_proc_fops); if (!ent) return 1; - ent->data = (void *)0; - ent->read_proc = proc_mf_dump_side; - ent->write_proc = proc_mf_change_side; - ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); + ent = proc_create("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root, + &mf_src_proc_fops); if (!ent) return 1; - ent->data = (void *)0; - ent->read_proc = proc_mf_dump_src; - ent->write_proc = proc_mf_change_src; return 0; } diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c index 49ff4dc422b..5aea94f3083 100644 --- a/arch/powerpc/platforms/iseries/viopath.c +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -116,7 +116,7 @@ static int proc_viopath_show(struct seq_file *m, void *v) u16 vlanMap; dma_addr_t handle; HvLpEvent_Rc hvrc; - DECLARE_COMPLETION(done); + DECLARE_COMPLETION_ONSTACK(done); struct device_node *node; const char *sysid; diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 27554c807fd..c667f0f02c3 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -2,6 +2,8 @@ config PPC_PSERIES depends on PPC64 && PPC_BOOK3S bool "IBM pSeries & new (POWER5-based) iSeries" select MPIC + select PCI_MSI + select XICS select PPC_I8259 select PPC_RTAS select PPC_RTAS_DAEMON diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index bcdcf0ccc8d..a277f2e28db 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -38,19 +38,28 @@ #include <asm/mmu.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> +#include <linux/memory.h> #include "plpar_wrappers.h" #define CMM_DRIVER_VERSION "1.0.0" #define CMM_DEFAULT_DELAY 1 +#define CMM_HOTPLUG_DELAY 5 #define CMM_DEBUG 0 #define CMM_DISABLE 0 #define CMM_OOM_KB 1024 #define CMM_MIN_MEM_MB 256 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) +/* + * The priority level tries to ensure that this notifier is called as + * late as possible to reduce thrashing in the shared memory pool. + */ +#define CMM_MEM_HOTPLUG_PRI 1 +#define CMM_MEM_ISOLATE_PRI 15 static unsigned int delay = CMM_DEFAULT_DELAY; +static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; static unsigned int oom_kb = CMM_OOM_KB; static unsigned int cmm_debug = CMM_DEBUG; static unsigned int cmm_disabled = CMM_DISABLE; @@ -65,6 +74,10 @@ MODULE_VERSION(CMM_DRIVER_VERSION); module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); +module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove " + "before loaning resumes. " + "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " "[Default=" __stringify(CMM_OOM_KB) "]"); @@ -92,6 +105,9 @@ static unsigned long oom_freed_pages; static struct cmm_page_array *cmm_page_list; static DEFINE_SPINLOCK(cmm_lock); +static DEFINE_MUTEX(hotplug_mutex); +static int hotplug_occurred; /* protected by the hotplug mutex */ + static struct task_struct *cmm_thread_ptr; /** @@ -110,6 +126,17 @@ static long cmm_alloc_pages(long nr) cmm_dbg("Begin request for %ld pages\n", nr); while (nr) { + /* Exit if a hotplug operation is in progress or occurred */ + if (mutex_trylock(&hotplug_mutex)) { + if (hotplug_occurred) { + mutex_unlock(&hotplug_mutex); + break; + } + mutex_unlock(&hotplug_mutex); + } else { + break; + } + addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC); if (!addr) @@ -119,8 +146,9 @@ static long cmm_alloc_pages(long nr) if (!pa || pa->index >= CMM_NR_PAGES) { /* Need a new page for the page list. */ spin_unlock(&cmm_lock); - npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC); + npa = (struct cmm_page_array *)__get_free_page( + GFP_NOIO | __GFP_NOWARN | + __GFP_NORETRY | __GFP_NOMEMALLOC); if (!npa) { pr_info("%s: Can not allocate new page list\n", __func__); free_page(addr); @@ -282,9 +310,28 @@ static int cmm_thread(void *dummy) while (1) { timeleft = msleep_interruptible(delay * 1000); - if (kthread_should_stop() || timeleft) { - loaned_pages_target = loaned_pages; + if (kthread_should_stop() || timeleft) break; + + if (mutex_trylock(&hotplug_mutex)) { + if (hotplug_occurred) { + hotplug_occurred = 0; + mutex_unlock(&hotplug_mutex); + cmm_dbg("Hotplug operation has occurred, " + "loaning activity suspended " + "for %d seconds.\n", + hotplug_delay); + timeleft = msleep_interruptible(hotplug_delay * + 1000); + if (kthread_should_stop() || timeleft) + break; + continue; + } + mutex_unlock(&hotplug_mutex); + } else { + cmm_dbg("Hotplug operation in progress, activity " + "suspended\n"); + continue; } cmm_get_mpp(); @@ -414,6 +461,193 @@ static struct notifier_block cmm_reboot_nb = { }; /** + * cmm_count_pages - Count the number of pages loaned in a particular range. + * + * @arg: memory_isolate_notify structure with address range and count + * + * Return value: + * 0 on success + **/ +static unsigned long cmm_count_pages(void *arg) +{ + struct memory_isolate_notify *marg = arg; + struct cmm_page_array *pa; + unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn); + unsigned long end = start + (marg->nr_pages << PAGE_SHIFT); + unsigned long idx; + + spin_lock(&cmm_lock); + pa = cmm_page_list; + while (pa) { + if ((unsigned long)pa >= start && (unsigned long)pa < end) + marg->pages_found++; + for (idx = 0; idx < pa->index; idx++) + if (pa->page[idx] >= start && pa->page[idx] < end) + marg->pages_found++; + pa = pa->next; + } + spin_unlock(&cmm_lock); + return 0; +} + +/** + * cmm_memory_isolate_cb - Handle memory isolation notifier calls + * @self: notifier block struct + * @action: action to take + * @arg: struct memory_isolate_notify data for handler + * + * Return value: + * NOTIFY_OK or notifier error based on subfunction return value + **/ +static int cmm_memory_isolate_cb(struct notifier_block *self, + unsigned long action, void *arg) +{ + int ret = 0; + + if (action == MEM_ISOLATE_COUNT) + ret = cmm_count_pages(arg); + + if (ret) + ret = notifier_from_errno(ret); + else + ret = NOTIFY_OK; + + return ret; +} + +static struct notifier_block cmm_mem_isolate_nb = { + .notifier_call = cmm_memory_isolate_cb, + .priority = CMM_MEM_ISOLATE_PRI +}; + +/** + * cmm_mem_going_offline - Unloan pages where memory is to be removed + * @arg: memory_notify structure with page range to be offlined + * + * Return value: + * 0 on success + **/ +static int cmm_mem_going_offline(void *arg) +{ + struct memory_notify *marg = arg; + unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn); + unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT); + struct cmm_page_array *pa_curr, *pa_last, *npa; + unsigned long idx; + unsigned long freed = 0; + + cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n", + start_page, marg->nr_pages); + spin_lock(&cmm_lock); + + /* Search the page list for pages in the range to be offlined */ + pa_last = pa_curr = cmm_page_list; + while (pa_curr) { + for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) { + if ((pa_curr->page[idx] < start_page) || + (pa_curr->page[idx] >= end_page)) + continue; + + plpar_page_set_active(__pa(pa_curr->page[idx])); + free_page(pa_curr->page[idx]); + freed++; + loaned_pages--; + totalram_pages++; + pa_curr->page[idx] = pa_last->page[--pa_last->index]; + if (pa_last->index == 0) { + if (pa_curr == pa_last) + pa_curr = pa_last->next; + pa_last = pa_last->next; + free_page((unsigned long)cmm_page_list); + cmm_page_list = pa_last; + continue; + } + } + pa_curr = pa_curr->next; + } + + /* Search for page list structures in the range to be offlined */ + pa_last = NULL; + pa_curr = cmm_page_list; + while (pa_curr) { + if (((unsigned long)pa_curr >= start_page) && + ((unsigned long)pa_curr < end_page)) { + npa = (struct cmm_page_array *)__get_free_page( + GFP_NOIO | __GFP_NOWARN | + __GFP_NORETRY | __GFP_NOMEMALLOC); + if (!npa) { + spin_unlock(&cmm_lock); + cmm_dbg("Failed to allocate memory for list " + "management. Memory hotplug " + "failed.\n"); + return ENOMEM; + } + memcpy(npa, pa_curr, PAGE_SIZE); + if (pa_curr == cmm_page_list) + cmm_page_list = npa; + if (pa_last) + pa_last->next = npa; + free_page((unsigned long) pa_curr); + freed++; + pa_curr = npa; + } + + pa_last = pa_curr; + pa_curr = pa_curr->next; + } + + spin_unlock(&cmm_lock); + cmm_dbg("Released %ld pages in the search range.\n", freed); + + return 0; +} + +/** + * cmm_memory_cb - Handle memory hotplug notifier calls + * @self: notifier block struct + * @action: action to take + * @arg: struct memory_notify data for handler + * + * Return value: + * NOTIFY_OK or notifier error based on subfunction return value + * + **/ +static int cmm_memory_cb(struct notifier_block *self, + unsigned long action, void *arg) +{ + int ret = 0; + + switch (action) { + case MEM_GOING_OFFLINE: + mutex_lock(&hotplug_mutex); + hotplug_occurred = 1; + ret = cmm_mem_going_offline(arg); + break; + case MEM_OFFLINE: + case MEM_CANCEL_OFFLINE: + mutex_unlock(&hotplug_mutex); + cmm_dbg("Memory offline operation complete.\n"); + break; + case MEM_GOING_ONLINE: + case MEM_ONLINE: + case MEM_CANCEL_ONLINE: + break; + } + + if (ret) + ret = notifier_from_errno(ret); + else + ret = NOTIFY_OK; + + return ret; +} + +static struct notifier_block cmm_mem_nb = { + .notifier_call = cmm_memory_cb, + .priority = CMM_MEM_HOTPLUG_PRI +}; + +/** * cmm_init - Module initialization * * Return value: @@ -435,18 +669,24 @@ static int cmm_init(void) if ((rc = cmm_sysfs_register(&cmm_sysdev))) goto out_reboot_notifier; + if (register_memory_notifier(&cmm_mem_nb) || + register_memory_isolate_notifier(&cmm_mem_isolate_nb)) + goto out_unregister_notifier; + if (cmm_disabled) return rc; cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); if (IS_ERR(cmm_thread_ptr)) { rc = PTR_ERR(cmm_thread_ptr); - goto out_unregister_sysfs; + goto out_unregister_notifier; } return rc; -out_unregister_sysfs: +out_unregister_notifier: + unregister_memory_notifier(&cmm_mem_nb); + unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); cmm_unregister_sysfs(&cmm_sysdev); out_reboot_notifier: unregister_reboot_notifier(&cmm_reboot_nb); @@ -467,6 +707,8 @@ static void cmm_exit(void) kthread_stop(cmm_thread_ptr); unregister_oom_notifier(&cmm_oom_nb); unregister_reboot_notifier(&cmm_reboot_nb); + unregister_memory_notifier(&cmm_mem_nb); + unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); cmm_free_pages(loaned_pages); cmm_unregister_sysfs(&cmm_sysdev); } diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 12df9e8812a..67b7a10f9fc 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -346,12 +346,14 @@ int dlpar_release_drc(u32 drc_index) static DEFINE_MUTEX(pseries_cpu_hotplug_mutex); -void cpu_hotplug_driver_lock() +void cpu_hotplug_driver_lock(void) +__acquires(pseries_cpu_hotplug_mutex) { mutex_lock(&pseries_cpu_hotplug_mutex); } -void cpu_hotplug_driver_unlock() +void cpu_hotplug_driver_unlock(void) +__releases(pseries_cpu_hotplug_mutex) { mutex_unlock(&pseries_cpu_hotplug_mutex); } diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 8868c012268..b4886635972 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -144,8 +144,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr) hcpuid = get_hard_smp_processor_id(nr); rc = plpar_hcall_norets(H_PROD, hcpuid); if (rc != H_SUCCESS) - panic("Error: Prod to wake up processor %d Ret= %ld\n", - nr, rc); + printk(KERN_ERR "Error: Prod to wake up processor %d\ + Ret= %ld\n", nr, rc); } } diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index 971483f0dfa..1709ac5aac7 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -143,13 +143,23 @@ static int cpm2_set_irq_type(unsigned int virq, unsigned int flow_type) struct irq_desc *desc = irq_to_desc(virq); unsigned int vold, vnew, edibit; - if (flow_type == IRQ_TYPE_NONE) - flow_type = IRQ_TYPE_LEVEL_LOW; - - if (flow_type & IRQ_TYPE_EDGE_RISING) { - printk(KERN_ERR "CPM2 PIC: sense type 0x%x not supported\n", - flow_type); - return -EINVAL; + /* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or + * IRQ_TYPE_EDGE_BOTH (default). All others are IRQ_TYPE_EDGE_FALLING + * or IRQ_TYPE_LEVEL_LOW (default) + */ + if (src >= CPM2_IRQ_PORTC15 && src <= CPM2_IRQ_PORTC0) { + if (flow_type == IRQ_TYPE_NONE) + flow_type = IRQ_TYPE_EDGE_BOTH; + + if (flow_type != IRQ_TYPE_EDGE_BOTH && + flow_type != IRQ_TYPE_EDGE_FALLING) + goto err_sense; + } else { + if (flow_type == IRQ_TYPE_NONE) + flow_type = IRQ_TYPE_LEVEL_LOW; + + if (flow_type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_LEVEL_HIGH)) + goto err_sense; } desc->status &= ~(IRQ_TYPE_SENSE_MASK | IRQ_LEVEL); @@ -181,6 +191,10 @@ static int cpm2_set_irq_type(unsigned int virq, unsigned int flow_type) if (vold != vnew) out_be32(&cpm2_intctl->ic_siexr, vnew); return 0; + +err_sense: + pr_err("CPM2 PIC: sense type 0x%x not supported\n", flow_type); + return -EINVAL; } static struct irq_chip cpm2_pic = { diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4e3a3e345ab..e1a028c1f18 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -464,8 +464,7 @@ static void __iomem *mpc83xx_pcie_remap_cfg(struct pci_bus *bus, { struct pci_controller *hose = pci_bus_to_host(bus); struct mpc83xx_pcie_priv *pcie = hose->dn->data; - u8 bus_no = bus->number - hose->first_busno; - u32 dev_base = bus_no << 24 | devfn << 16; + u32 dev_base = bus->number << 24 | devfn << 16; int ret; ret = mpc83xx_pcie_exclude_device(bus, devfn); @@ -515,12 +514,17 @@ static int mpc83xx_pcie_read_config(struct pci_bus *bus, unsigned int devfn, static int mpc83xx_pcie_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val) { + struct pci_controller *hose = pci_bus_to_host(bus); void __iomem *cfg_addr; cfg_addr = mpc83xx_pcie_remap_cfg(bus, devfn, offset); if (!cfg_addr) return PCIBIOS_DEVICE_NOT_FOUND; + /* PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS */ + if (offset == PCI_PRIMARY_BUS && bus->number == hose->first_busno) + val &= 0xffffff00; + switch (len) { case 1: out_8(cfg_addr, val); diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c index 103eace3619..ee1c0e1cf4a 100644 --- a/arch/powerpc/sysdev/mpc8xxx_gpio.c +++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c @@ -54,6 +54,22 @@ static void mpc8xxx_gpio_save_regs(struct of_mm_gpio_chip *mm) mpc8xxx_gc->data = in_be32(mm->regs + GPIO_DAT); } +/* Workaround GPIO 1 errata on MPC8572/MPC8536. The status of GPIOs + * defined as output cannot be determined by reading GPDAT register, + * so we use shadow data register instead. The status of input pins + * is determined by reading GPDAT register. + */ +static int mpc8572_gpio_get(struct gpio_chip *gc, unsigned int gpio) +{ + u32 val; + struct of_mm_gpio_chip *mm = to_of_mm_gpio_chip(gc); + struct mpc8xxx_gpio_chip *mpc8xxx_gc = to_mpc8xxx_gpio_chip(mm); + + val = in_be32(mm->regs + GPIO_DAT) & ~in_be32(mm->regs + GPIO_DIR); + + return (val | mpc8xxx_gc->data) & mpc8xxx_gpio2mask(gpio); +} + static int mpc8xxx_gpio_get(struct gpio_chip *gc, unsigned int gpio) { struct of_mm_gpio_chip *mm = to_of_mm_gpio_chip(gc); @@ -136,7 +152,10 @@ static void __init mpc8xxx_add_controller(struct device_node *np) gc->ngpio = MPC8XXX_GPIO_PINS; gc->direction_input = mpc8xxx_gpio_dir_in; gc->direction_output = mpc8xxx_gpio_dir_out; - gc->get = mpc8xxx_gpio_get; + if (of_device_is_compatible(np, "fsl,mpc8572-gpio")) + gc->get = mpc8572_gpio_get; + else + gc->get = mpc8xxx_gpio_get; gc->set = mpc8xxx_gpio_set; ret = of_mm_gpiochip_add(np, mm_gc); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index aa9d06e5925..470dc6c11d5 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -567,13 +567,11 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic) #endif /* CONFIG_MPIC_U3_HT_IRQS */ #ifdef CONFIG_SMP -static int irq_choose_cpu(unsigned int virt_irq) +static int irq_choose_cpu(const cpumask_t *mask) { - cpumask_t mask; int cpuid; - cpumask_copy(&mask, irq_to_desc(virt_irq)->affinity); - if (cpus_equal(mask, CPU_MASK_ALL)) { + if (cpumask_equal(mask, cpu_all_mask)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); unsigned long flags; @@ -594,20 +592,15 @@ static int irq_choose_cpu(unsigned int virt_irq) spin_unlock_irqrestore(&irq_rover_lock, flags); } else { - cpumask_t tmp; - - cpus_and(tmp, cpu_online_map, mask); - - if (cpus_empty(tmp)) + cpuid = cpumask_first_and(mask, cpu_online_mask); + if (cpuid >= nr_cpu_ids) goto do_round_robin; - - cpuid = first_cpu(tmp); } return get_hard_smp_processor_id(cpuid); } #else -static int irq_choose_cpu(unsigned int virt_irq) +static int irq_choose_cpu(const cpumask_t *mask) { return hard_smp_processor_id(); } @@ -816,7 +809,7 @@ int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) unsigned int src = mpic_irq_to_hw(irq); if (mpic->flags & MPIC_SINGLE_DEST_CPU) { - int cpuid = irq_choose_cpu(irq); + int cpuid = irq_choose_cpu(cpumask); mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid); } else { diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c index 1d44eee80fa..0f67cd79d48 100644 --- a/arch/powerpc/sysdev/mpic_msi.c +++ b/arch/powerpc/sysdev/mpic_msi.c @@ -39,7 +39,12 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) pr_debug("mpic: found U3, guessing msi allocator setup\n"); - /* Reserve source numbers we know are reserved in the HW */ + /* Reserve source numbers we know are reserved in the HW. + * + * This is a bit of a mix of U3 and U4 reserves but that's going + * to work fine, we have plenty enugh numbers left so let's just + * mark anything we don't like reserved. + */ for (i = 0; i < 8; i++) msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i); @@ -49,6 +54,10 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) for (i = 100; i < 105; i++) msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i); + for (i = 124; i < mpic->irq_count; i++) + msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i); + + np = NULL; while ((np = of_find_all_nodes(np))) { pr_debug("mpic: mapping hwirqs for %s\n", np->full_name); diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index d3caf23e631..bcbfe79c704 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -64,12 +64,12 @@ static u64 read_ht_magic_addr(struct pci_dev *pdev, unsigned int pos) return addr; } -static u64 find_ht_magic_addr(struct pci_dev *pdev) +static u64 find_ht_magic_addr(struct pci_dev *pdev, unsigned int hwirq) { struct pci_bus *bus; unsigned int pos; - for (bus = pdev->bus; bus; bus = bus->parent) { + for (bus = pdev->bus; bus && bus->self; bus = bus->parent) { pos = pci_find_ht_capability(bus->self, HT_CAPTYPE_MSI_MAPPING); if (pos) return read_ht_magic_addr(bus->self, pos); @@ -78,13 +78,41 @@ static u64 find_ht_magic_addr(struct pci_dev *pdev) return 0; } +static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + + /* U4 PCIe MSIs need to write to the special register in + * the bridge that generates interrupts. There should be + * theorically a register at 0xf8005000 where you just write + * the MSI number and that triggers the right interrupt, but + * unfortunately, this is busted in HW, the bridge endian swaps + * the value and hits the wrong nibble in the register. + * + * So instead we use another register set which is used normally + * for converting HT interrupts to MPIC interrupts, which decodes + * the interrupt number as part of the low address bits + * + * This will not work if we ever use more than one legacy MSI in + * a block but we never do. For one MSI or multiple MSI-X where + * each interrupt address can be specified separately, it works + * just fine. + */ + if (of_device_is_compatible(hose->dn, "u4-pcie") || + of_device_is_compatible(hose->dn, "U4-pcie")) + return 0xf8004000 | (hwirq << 4); + + return 0; +} + static int u3msi_msi_check_device(struct pci_dev *pdev, int nvec, int type) { if (type == PCI_CAP_ID_MSIX) pr_debug("u3msi: MSI-X untested, trying anyway.\n"); /* If we can't find a magic address then MSI ain't gonna work */ - if (find_ht_magic_addr(pdev) == 0) { + if (find_ht_magic_addr(pdev, 0) == 0 && + find_u4_magic_addr(pdev, 0) == 0) { pr_debug("u3msi: no magic address found for %s\n", pci_name(pdev)); return -ENXIO; @@ -118,10 +146,6 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) u64 addr; int hwirq; - addr = find_ht_magic_addr(pdev); - msg.address_lo = addr & 0xFFFFFFFF; - msg.address_hi = addr >> 32; - list_for_each_entry(entry, &pdev->msi_list, list) { hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, 1); if (hwirq < 0) { @@ -129,6 +153,12 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return hwirq; } + addr = find_ht_magic_addr(pdev, hwirq); + if (addr == 0) + addr = find_u4_magic_addr(pdev, hwirq); + msg.address_lo = addr & 0xFFFFFFFF; + msg.address_hi = addr >> 32; + virq = irq_create_mapping(msi_mpic->irqhost, hwirq); if (virq == NO_IRQ) { pr_debug("u3msi: failed mapping hwirq 0x%x\n", hwirq); @@ -143,6 +173,8 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) pr_debug("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n", virq, hwirq, (unsigned long)addr); + printk("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n", + virq, hwirq, (unsigned long)addr); msg.data = hwirq; write_msi_msg(virq, &msg); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c4c8f2e1dd1..d7d77d4a402 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_memory_notifier); +static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain); + +int register_memory_isolate_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&memory_isolate_chain, nb); +} +EXPORT_SYMBOL(register_memory_isolate_notifier); + +void unregister_memory_isolate_notifier(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&memory_isolate_chain, nb); +} +EXPORT_SYMBOL(unregister_memory_isolate_notifier); + /* * register_memory - Setup a sysfs device for a memory block */ @@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v) return blocking_notifier_call_chain(&memory_chain, val, v); } +int memory_isolate_notify(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&memory_isolate_chain, val, v); +} + /* * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is * OK to have direct references to sparsemem variables in here. diff --git a/include/linux/memory.h b/include/linux/memory.h index 37fa19b34ef..1adfe779eb9 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -50,6 +50,19 @@ struct memory_notify { int status_change_nid; }; +/* + * During pageblock isolation, count the number of pages within the + * range [start_pfn, start_pfn + nr_pages) which are owned by code + * in the notifier chain. + */ +#define MEM_ISOLATE_COUNT (1<<0) + +struct memory_isolate_notify { + unsigned long start_pfn; /* Start of range to check */ + unsigned int nr_pages; /* # pages in range to check */ + unsigned int pages_found; /* # pages owned found by callbacks */ +}; + struct notifier_block; struct mem_section; @@ -76,14 +89,28 @@ static inline int memory_notify(unsigned long val, void *v) { return 0; } +static inline int register_memory_isolate_notifier(struct notifier_block *nb) +{ + return 0; +} +static inline void unregister_memory_isolate_notifier(struct notifier_block *nb) +{ +} +static inline int memory_isolate_notify(unsigned long val, void *v) +{ + return 0; +} #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); +extern int register_memory_isolate_notifier(struct notifier_block *nb); +extern void unregister_memory_isolate_notifier(struct notifier_block *nb); extern int register_new_memory(int, struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); +extern int memory_isolate_notify(unsigned long val, void *v); extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) enum mem_add_context { BOOT, HOTPLUG }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e869657cb5..d79b9258056 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -48,6 +48,7 @@ #include <linux/page_cgroup.h> #include <linux/debugobjects.h> #include <linux/kmemleak.h> +#include <linux/memory.h> #include <trace/events/kmem.h> #include <asm/tlbflush.h> @@ -5008,23 +5009,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, int set_migratetype_isolate(struct page *page) { struct zone *zone; - unsigned long flags; + struct page *curr_page; + unsigned long flags, pfn, iter; + unsigned long immobile = 0; + struct memory_isolate_notify arg; + int notifier_ret; int ret = -EBUSY; int zone_idx; zone = page_zone(page); zone_idx = zone_idx(zone); + spin_lock_irqsave(&zone->lock, flags); + if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE || + zone_idx == ZONE_MOVABLE) { + ret = 0; + goto out; + } + + pfn = page_to_pfn(page); + arg.start_pfn = pfn; + arg.nr_pages = pageblock_nr_pages; + arg.pages_found = 0; + /* - * In future, more migrate types will be able to be isolation target. + * It may be possible to isolate a pageblock even if the + * migratetype is not MIGRATE_MOVABLE. The memory isolation + * notifier chain is used by balloon drivers to return the + * number of pages in a range that are held by the balloon + * driver to shrink memory. If all the pages are accounted for + * by balloons, are free, or on the LRU, isolation can continue. + * Later, for example, when memory hotplug notifier runs, these + * pages reported as "can be isolated" should be isolated(freed) + * by the balloon driver through the memory notifier chain. */ - if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && - zone_idx != ZONE_MOVABLE) + notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); + notifier_ret = notifier_to_errno(notifier_ret); + if (notifier_ret || !arg.pages_found) goto out; - set_pageblock_migratetype(page, MIGRATE_ISOLATE); - move_freepages_block(zone, page, MIGRATE_ISOLATE); - ret = 0; + + for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) { + if (!pfn_valid_within(pfn)) + continue; + + curr_page = pfn_to_page(iter); + if (!page_count(curr_page) || PageLRU(curr_page)) + continue; + + immobile++; + } + + if (arg.pages_found == immobile) + ret = 0; + out: + if (!ret) { + set_pageblock_migratetype(page, MIGRATE_ISOLATE); + move_freepages_block(zone, page, MIGRATE_ISOLATE); + } + spin_unlock_irqrestore(&zone->lock, flags); if (!ret) drain_all_pages(); |