summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/Kconfig9
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/cputable.c4
-rw-r--r--arch/powerpc/kernel/head_64.S300
-rw-r--r--arch/powerpc/kernel/lparmap.c2
-rw-r--r--arch/powerpc/kernel/process.c6
-rw-r--r--arch/powerpc/kernel/prom.c76
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/lib/copypage_64.S2
-rw-r--r--arch/powerpc/lib/copyuser_64.S4
-rw-r--r--arch/powerpc/mm/hash_low_64.S613
-rw-r--r--arch/powerpc/mm/hash_native_64.c377
-rw-r--r--arch/powerpc/mm/hash_utils_64.c532
-rw-r--r--arch/powerpc/mm/hugetlbpage.c134
-rw-r--r--arch/powerpc/mm/init_64.c18
-rw-r--r--arch/powerpc/mm/mem.c56
-rw-r--r--arch/powerpc/mm/pgtable_64.c22
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c15
-rw-r--r--arch/powerpc/mm/slb.c102
-rw-r--r--arch/powerpc/mm/slb_low.S220
-rw-r--r--arch/powerpc/mm/stab.c30
-rw-r--r--arch/powerpc/mm/tlb_64.c32
-rw-r--r--arch/powerpc/platforms/iseries/htab.c65
-rw-r--r--arch/powerpc/platforms/iseries/hvlog.c4
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c74
-rw-r--r--arch/powerpc/platforms/iseries/setup.c13
-rw-r--r--arch/powerpc/platforms/iseries/vio.c39
-rw-r--r--arch/powerpc/platforms/iseries/viopath.c16
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c115
-rw-r--r--arch/ppc64/Kconfig13
-rw-r--r--arch/ppc64/kernel/asm-offsets.c3
-rw-r--r--arch/ppc64/kernel/head.S300
-rw-r--r--arch/ppc64/kernel/pacaData.c2
-rw-r--r--arch/ppc64/kernel/prom.c94
34 files changed, 2374 insertions, 952 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f4e25c648fb..ca7acb0c79f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -603,6 +603,15 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
+config PPC_64K_PAGES
+ bool "64k page size"
+ help
+ This option changes the kernel logical page size to 64k. On machines
+ without processor support for 64k pages, the kernel will simulate
+ them by loading each individual 4k page on demand transparently,
+ while on hardware with such support, it will be used to map
+ normal application pages.
+
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on PPC64 && SMP
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index bc5a3689cc0..b7575725199 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -125,6 +125,9 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_64K_PAGES
+ DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
+#endif
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b91345fa080..33c63bcf69f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -240,7 +240,7 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
- { /* Power5 */
+ { /* Power5 GR */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003a0000,
.cpu_name = "POWER5 (gr)",
@@ -255,7 +255,7 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
- { /* Power5 */
+ { /* Power5 GS */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003b0000,
.cpu_name = "POWER5 (gs)",
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 45d81976987..16ab40daa73 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -195,11 +195,11 @@ exception_marker:
#define EX_R12 24
#define EX_R13 32
#define EX_SRR0 40
-#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
#define EX_DAR 48
-#define EX_LR 48 /* SLB miss saves LR, but not DAR */
#define EX_DSISR 56
#define EX_CCR 60
+#define EX_R3 64
+#define EX_LR 72
#define EXCEPTION_PROLOG_PSERIES(area, label) \
mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
@@ -419,17 +419,22 @@ data_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_DAR
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x400, instruction_access)
@@ -440,17 +445,22 @@ instruction_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -509,6 +519,38 @@ _GLOBAL(do_stab_bolted_pSeries)
EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
/*
+ * We have some room here we use that to put
+ * the peries slb miss user trampoline code so it's reasonably
+ * away from slb_miss_user_common to avoid problems with rfid
+ *
+ * This is used for when the SLB miss handler has to go virtual,
+ * which doesn't happen for now anymore but will once we re-implement
+ * dynamic VSIDs for shared page tables
+ */
+#ifdef __DISABLED__
+slb_miss_user_pseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ clrrdi r12,r13,32
+ mfmsr r10
+ mfspr r11,SRR0 /* save SRR0 */
+ ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
+ ori r10,r10,MSR_IR|MSR_DR|MSR_RI
+ mtspr SRR0,r12
+ mfspr r12,SRR1 /* and SRR1 */
+ mtspr SRR1,r10
+ rfid
+ b . /* prevent spec. execution */
+#endif /* __DISABLED__ */
+
+/*
* Vectors for the FWNMI option. Share common code.
*/
.globl system_reset_fwnmi
@@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
.globl data_access_slb_iSeries
data_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
mfspr r3,SPRN_DAR
- b .do_slb_miss
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
.globl instruction_access_slb_iSeries
instruction_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
- ld r3,PACALPPACA+LPPACASRR0(r13)
- b .do_slb_miss
+ ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge .slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
+
+#ifdef __DISABLED__
+slb_miss_user_iseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ EXCEPTION_PROLOG_ISERIES_2
+ b slb_miss_user_common
+#endif
MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +888,126 @@ instruction_access_common:
li r5,0x400
b .do_hash_page /* Try to handle as hpte fault */
+/*
+ * Here is the common SLB miss user that is used when going to virtual
+ * mode for SLB misses, that is currently not used
+ */
+#ifdef __DISABLED__
+ .align 7
+ .globl slb_miss_user_common
+slb_miss_user_common:
+ mflr r10
+ std r3,PACA_EXGEN+EX_DAR(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ std r10,PACA_EXGEN+EX_LR(r13)
+ std r11,PACA_EXGEN+EX_SRR0(r13)
+ bl .slb_allocate_user
+
+ ld r10,PACA_EXGEN+EX_LR(r13)
+ ld r3,PACA_EXGEN+EX_R3(r13)
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ ld r11,PACA_EXGEN+EX_SRR0(r13)
+ mtlr r10
+ beq- slb_miss_fault
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_user_slb
+ mfmsr r10
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+.machine pop
+
+ clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
+ mtmsrd r10,1
+
+ mtspr SRR0,r11
+ mtspr SRR1,r12
+
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ rfid
+ b .
+
+slb_miss_fault:
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
+ ld r4,PACA_EXGEN+EX_DAR(r13)
+ li r5,0
+ std r4,_DAR(r1)
+ std r5,_DSISR(r1)
+ b .handle_page_fault
+
+unrecov_user_slb:
+ EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(slb_miss_realmode)
+ mflr r10
+
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+
+ bl .slb_allocate_realmode
+
+ /* All done -- return from exception. */
+
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+#ifdef CONFIG_PPC_ISERIES
+ ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+#endif /* CONFIG_PPC_ISERIES */
+
+ mtlr r10
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_slb
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+#ifdef CONFIG_PPC_ISERIES
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+#endif /* CONFIG_PPC_ISERIES */
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
+
+unrecov_slb:
+ EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
.align 7
.globl hardware_interrupt_common
.globl hardware_interrupt_entry
@@ -1139,62 +1338,6 @@ _GLOBAL(do_stab_bolted)
b . /* prevent speculative execution */
/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(do_slb_miss)
- mflr r10
-
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- bl .slb_allocate /* handle it */
-
- /* All done -- return from exception. */
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
- ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
-#endif /* CONFIG_PPC_ISERIES */
-
- mtlr r10
-
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_slb
-
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
-
-#ifdef CONFIG_PPC_ISERIES
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-#endif /* CONFIG_PPC_ISERIES */
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-
-unrecov_slb:
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- DISABLE_INTS
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
-/*
* Space for CPU0's segment table.
*
* On iSeries, the hypervisor must fill in at least one entry before
@@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start)
#endif
/* Initialize the first segment table (or SLB) entry */
ld r3,PACASTABVIRT(r13) /* get addr of segment table */
+BEGIN_FTR_SECTION
bl .stab_initialize
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+ bl .slb_initialize
/* Initialize the kernel stack. Just a repeat for iSeries. */
LOADADDR(r3,current_set)
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
index eded971d1bf..5a05a797485 100644
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
.xRanges = {
{ .xPages = HvPagesToMap,
.xOffset = 0,
- .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT),
+ .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT),
},
},
};
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 96843211cc5..7f64f0464d4 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -554,12 +554,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SLB)) {
unsigned long sp_vsid = get_kernel_vsid(sp);
+ unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
sp_vsid <<= SLB_VSID_SHIFT;
- sp_vsid |= SLB_VSID_KERNEL;
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- sp_vsid |= SLB_VSID_L;
-
+ sp_vsid |= SLB_VSID_KERNEL | llp;
p->thread.ksp_vsid = sp_vsid;
}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index eec2da69550..3675ef4bac9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -724,10 +724,10 @@ static inline char *find_flat_dt_string(u32 offset)
* used to extract the memory informations at boot before we can
* unflatten the tree
*/
-static int __init scan_flat_dt(int (*it)(unsigned long node,
- const char *uname, int depth,
- void *data),
- void *data)
+int __init of_scan_flat_dt(int (*it)(unsigned long node,
+ const char *uname, int depth,
+ void *data),
+ void *data)
{
unsigned long p = ((unsigned long)initial_boot_params) +
initial_boot_params->off_dt_struct;
@@ -784,8 +784,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
* This function can be used within scan_flattened_dt callback to get
* access to properties
*/
-static void* __init get_flat_dt_prop(unsigned long node, const char *name,
- unsigned long *size)
+void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+ unsigned long *size)
{
unsigned long p = node;
@@ -1087,7 +1087,7 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
unsigned long size = 0;
@@ -1095,19 +1095,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
-#ifdef CONFIG_PPC_PSERIES
- /* On LPAR, look for the first ibm,pft-size property for the hash table size
- */
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
- u32 *pft_size;
- pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL);
- if (pft_size != NULL) {
- /* pft_size[0] is the NUMA CEC cookie */
- ppc64_pft_size = pft_size[1];
- }
- }
-#endif
-
boot_cpuid = 0;
boot_cpuid_phys = 0;
if (initial_boot_params && initial_boot_params->version >= 2) {
@@ -1117,8 +1104,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
} else {
/* Check if it's the boot-cpu, set it's hw index now */
- if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
- prop = get_flat_dt_prop(node, "reg", NULL);
+ if (of_get_flat_dt_prop(node,
+ "linux,boot-cpu", NULL) != NULL) {
+ prop = of_get_flat_dt_prop(node, "reg", NULL);
if (prop != NULL)
boot_cpuid_phys = *prop;
}
@@ -1127,14 +1115,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
- prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size);
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
}
/* Same goes for Apple's "altivec" property */
- prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
if (prop) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1147,7 +1135,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* this by looking at the size of the ibm,ppc-interrupt-server#s
* property
*/
- prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
&size);
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
if (prop && ((size / sizeof(u32)) > 1))
@@ -1170,7 +1158,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
return 0;
/* get platform type */
- prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
#ifdef CONFIG_PPC64
@@ -1183,21 +1171,21 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
#ifdef CONFIG_PPC64
/* check if iommu is forced on or off */
- if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
iommu_is_off = 1;
- if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
iommu_force_on = 1;
#endif
- lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
if (lprop)
memory_limit = *lprop;
#ifdef CONFIG_PPC64
- lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
if (lprop)
tce_alloc_start = *lprop;
- lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
if (lprop)
tce_alloc_end = *lprop;
#endif
@@ -1209,9 +1197,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
{
u64 *basep, *entryp;
- basep = get_flat_dt_prop(node, "linux,rtas-base", NULL);
- entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL);
- prop = get_flat_dt_prop(node, "linux,rtas-size", NULL);
+ basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
+ entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
+ prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
if (basep && entryp && prop) {
rtas.base = *basep;
rtas.entry = *entryp;
@@ -1232,11 +1220,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
if (depth != 0)
return 0;
- prop = get_flat_dt_prop(node, "#size-cells", NULL);
+ prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
dt_root_size_cells = (prop == NULL) ? 1 : *prop;
DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
- prop = get_flat_dt_prop(node, "#address-cells", NULL);
+ prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
@@ -1271,7 +1259,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
static int __init early_init_dt_scan_memory(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
cell_t *reg, *endp;
unsigned long l;
@@ -1279,7 +1267,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
if (type == NULL || strcmp(type, "memory") != 0)
return 0;
- reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
+ reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
@@ -1343,12 +1331,12 @@ void __init early_init_devtree(void *params)
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
*/
- scan_flat_dt(early_init_dt_scan_chosen, NULL);
+ of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
/* Scan memory nodes and rebuild LMBs */
lmb_init();
- scan_flat_dt(early_init_dt_scan_root, NULL);
- scan_flat_dt(early_init_dt_scan_memory, NULL);
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
#ifdef CONFIG_PPC64
@@ -1363,10 +1351,10 @@ void __init early_init_devtree(void *params)
DBG("Scanning CPUs ...\n");
- /* Retreive hash table size from flattened tree plus other
- * CPU related informations (altivec support, boot CPU ID, ...)
+ /* Retreive CPU related informations from the flat tree
+ * (altivec support, boot CPU ID, ...)
*/
- scan_flat_dt(early_init_dt_scan_cpus, NULL);
+ of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
DBG(" <- early_init_devtree()\n");
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6b52cce872b..b0994050024 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -277,16 +277,21 @@ void __init early_setup(unsigned long dt_ptr)
DBG("Found, Initializing memory management...\n");
/*
- * Initialize stab / SLB management
+ * Initialize the MMU Hash table and create the linear mapping
+ * of memory. Has to be done before stab/slb initialization as
+ * this is currently where the page size encoding is obtained
*/
- if (!firmware_has_feature(FW_FEATURE_ISERIES))
- stab_initialize(lpaca->stab_real);
+ htab_initialize();
/*
- * Initialize the MMU Hash table and create the linear mapping
- * of memory
+ * Initialize stab / SLB management except on iSeries
*/
- htab_initialize();
+ if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
+ if (cpu_has_feature(CPU_FTR_SLB))
+ slb_initialize();
+ else
+ stab_initialize(lpaca->stab_real);
+ }
DBG(" <- early_setup()\n");
}
@@ -552,10 +557,12 @@ static void __init irqstack_early_init(void)
* SLB misses on them.
*/
for_each_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
- THREAD_SIZE, 0x10000000));
- hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
- THREAD_SIZE, 0x10000000));
+ softirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, 0x10000000));
+ hardirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, 0x10000000));
}
}
#else
@@ -583,8 +590,8 @@ static void __init emergency_stack_init(void)
limit = min(0x10000000UL, lmb.rmo_size);
for_each_cpu(i)
- paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128,
- limit)) + PAGE_SIZE;
+ paca[i].emergency_sp =
+ __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
}
/*
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 733d61618bb..40523b14010 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -11,7 +11,7 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-_GLOBAL(copy_page)
+_GLOBAL(copy_4K_page)
std r31,-8(1)
std r30,-16(1)
std r29,-24(1)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index a0b3fbbd6fb..6d69ef39b7d 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
std r4,-16(r1)
std r5,-8(r1)
dcbt 0,r4
- beq .Lcopy_page
+ beq .Lcopy_page_4K
andi. r6,r6,7
mtcrf 0x01,r5
blt cr1,.Lshort_copy
@@ -366,7 +366,7 @@ _GLOBAL(__copy_tofrom_user)
* above (following the .Ldst_aligned label) but it runs slightly
* slower on POWER3.
*/
-.Lcopy_page:
+.Lcopy_page_4K:
std r31,-32(1)
std r30,-40(1)
std r29,-48(1)
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eee..e0d02c4a261 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
/*
* ppc64 MMU hashtable management routines
*
- * (c) Copyright IBM Corp. 2003
+ * (c) Copyright IBM Corp. 2003, 2005
*
* Maintained by: Benjamin Herrenschmidt
* <benh@kernel.crashing.org>
@@ -10,6 +10,7 @@
* described in the kernel's COPYING file.
*/
+#include <linux/config.h>
#include <asm/reg.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -42,14 +43,24 @@
/* Save non-volatile offsets */
#define STK_REG(i) (112 + ((i)-14)*8)
+
+#ifndef CONFIG_PPC_64K_PAGES
+
+/*****************************************************************************
+ * *
+ * 4K SW & 4K HW pages implementation *
+ * *
+ *****************************************************************************/
+
+
/*
- * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local)
+ * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
*
- * Adds a page to the hash table. This is the non-LPAR version for now
+ * Adds a 4K page to the hash table in a segment of 4K pages only
*/
-_GLOBAL(__hash_page)
+_GLOBAL(__hash_page_4K)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
/* If so, just bail out and refault if needed. Someone else
* is changing this PTE anyway and might hash it.
*/
- bne- bail_ok
+ bne- htab_bail_ok
+
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
/* Convert linux PTE bits into HW equivalents */
andi. r3,r30,0x1fe /* Get basic set of flags */
- xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
- and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
@@ -158,19 +170,21 @@ htab_insert_pte:
andc r30,r30,r0
ori r30,r30,_PAGE_HASHPTE
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate primary group hash */
and r0,r28,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,0 /* no vflags */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert1)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
/* Now try secondary slot */
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate secondary group hash */
andc r0,r27,r28
rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,HPTE_V_SECONDARY@l /* secondary slot */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert2)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge+ htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
/* Call ppc_md.hpte_remove */
_GLOBAL(htab_call_hpte_remove)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* Try all again */
b htab_insert_pte
-bail_ok:
+htab_bail_ok:
li r3,0
- b bail
+ b htab_bail
htab_pte_insert_ok:
/* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
ld r6,STK_PARM(r6)(r1)
std r30,0(r6)
li r3, 0
-bail:
+htab_bail:
ld r27,STK_REG(r27)(r1)
ld r28,STK_REG(r28)(r1)
ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
/* Call ppc_md.hpte_updatepp */
mr r5,r29 /* va */
- li r6,0 /* large is 0 */
+ li r6,MMU_PAGE_4K /* page size */
ld r7,STK_PARM(r8)(r1) /* get "local" param */
_GLOBAL(htab_call_hpte_updatepp)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* if we failed because typically the HPTE wasn't really here
* we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
/* Bail out clearing reservation */
stdcx. r31,0,r6
li r3,1
- b bail
+ b htab_bail
+
+htab_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b htab_bail
+
+
+#else /* CONFIG_PPC_64K_PAGES */
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
+ */
+
+/*
+ * For now, we do NOT implement Admixed pages
+ */
+_GLOBAL(__hash_page_4K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ * r26 is the hidx mask
+ * r25 is the index in combo page
+ */
+ std r25,STK_REG(r25)(r1)
+ std r26,STK_REG(r26)(r1)
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- htab_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- htab_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Load the hidx index */
+ rldicl r25,r3,64-12,60
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
+ rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
+ or r29,r3,r29 /* r29 = va
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE. We look for
+ * the bit at (1 >> (index + 32))
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ li r26,0 /* Default hidx */
+ beq htab_insert_pte
+ ld r6,STK_PARM(r6)(r1)
+ ori r26,r6,0x8000 /* Load the hidx mask */
+ ld r26,0(r26)
+ addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
+ rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
+ bne htab_modify_pte
+
+htab_insert_pte:
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(htab_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b htab_insert_pte
+
+htab_bail_ok:
+ li r3,0
+ b htab_bail
+
+htab_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE second half,
+ * clear _PAGE_BUSY and set approriate HPTE slot bit
+ */
+ ld r6,STK_PARM(r6)(r1)
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ /* HPTE SUB bit */
+ li r0,1
+ subfic r5,r25,27 /* Must match bit position in */
+ sld r0,r0,r5 /* pgtable.h */
+ or r30,r30,r0
+ /* hindx */
+ sldi r5,r25,2
+ sld r3,r3,r5
+ li r4,0xf
+ sld r4,r4,r5
+ andc r26,r26,r4
+ or r26,r26,r3
+ ori r5,r6,0x8000
+ std r26,0(r5)
+ lwsync
+ std r30,0(r6)
+ li r3, 0
+htab_bail:
+ ld r25,STK_REG(r25)(r1)
+ ld r26,STK_REG(r26)(r1)
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+htab_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ sldi r5,r25,2
+ srd r3,r26,r5
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r3,0x8 /* page secondary ? */
+ beq 1f
+ not r5,r5
+1: andi. r3,r3,0x7 /* extract idx alone */
+
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_4K /* page size */
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(htab_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- htab_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3,0
+ b htab_bail
+
+htab_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b htab_bail
htab_pte_insert_failure:
/* Bail out restoring old PTE */
ld r6,STK_PARM(r6)(r1)
std r31,0(r6)
li r3,-1
- b bail
+ b htab_bail
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 64K HW in a 64K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+_GLOBAL(__hash_page_64K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ */
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- ht64_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- ht64_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY,HASHPTE and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28
+ rldicl r3,r3,0,36
+ or r29,r3,r29
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ bne ht64_modify_pte
+
+ht64_insert_pte:
+ /* Clear hpte bits in new pte (we also clear BUSY btw) and
+ * add _PAGE_HASHPTE
+ */
+ lis r0,_PAGE_HPTEFLAGS@h
+ ori r0,r0,_PAGE_HPTEFLAGS@l
+ andc r30,r30,r0
+ ori r30,r30,_PAGE_HASHPTE
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(ht64_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b ht64_insert_pte
+
+ht64_bail_ok:
+ li r3,0
+ b ht64_bail
+
+ht64_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE */
+ rldimi r30,r3,12,63-15
+
+ /* Write out the PTE with a normal write
+ * (maybe add eieio may be good still ?)
+ */
+ht64_write_out_pte:
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3, 0
+ht64_bail:
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+ht64_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ rlwinm r3,r31,32-12,29,31
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r31,_PAGE_F_SECOND
+ beq 1f
+ not r5,r5
+1:
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_64K
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(ht64_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- ht64_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ b ht64_write_out_pte
+
+ht64_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b ht64_bail
+
+ht64_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b ht64_bail
+
+
+#endif /* CONFIG_PPC_64K_PAGES */
+/*****************************************************************************
+ * *
+ * Huge pages implementation is in hugetlbpage.c *
+ * *
+ *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c2..d96bcfe4c6f 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#undef DEBUG_LOW
+
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/threads.h>
@@ -22,11 +25,84 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
+#include <asm/udbg.h>
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
#define HPTE_LOCK_BIT 3
static DEFINE_SPINLOCK(native_tlbie_lock);
+static inline void __tlbie(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile("tlbie %0,0" : : "r" (va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile("tlbie %0,1" : : "r" (va) : "memory");
+ break;
+ }
+}
+
+static inline void __tlbiel(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ }
+
+}
+
+static inline void tlbie(unsigned long va, int psize, int local)
+{
+ unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
+ int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+ if (use_local)
+ use_local = mmu_psize_defs[psize].tlbiel;
+ if (lock_tlbie && !use_local)
+ spin_lock(&native_tlbie_lock);
+ asm volatile("ptesync": : :"memory");
+ if (use_local) {
+ __tlbiel(va, psize);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie(va, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ if (lock_tlbie && !use_local)
+ spin_unlock(&native_tlbie_lock);
+}
+
static inline void native_lock_hpte(hpte_t *hptep)
{
unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
}
long native_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
hpte_t *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
int i;
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
+ " rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+ }
+
for (i = 0; i < HPTES_PER_GROUP; i++) {
if (! (hptep->v & HPTE_V_VALID)) {
/* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
if (i == HPTES_PER_GROUP)
return -1;
- hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- if (vflags & HPTE_V_LARGE)
- va &= ~(1UL << HPTE_V_AVPN_SHIFT);
- hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+ hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
+ i, hpte_v, hpte_r);
+ }
hptep->r = hpte_r;
/* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
int slot_offset;
unsigned long hpte_v;
+ DBG_LOW(" remove(group=%lx)\n", hpte_group);
+
/* pick a random entry to start at */
slot_offset = mftb() & 0x7;
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
return i;
}
-static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
+static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int psize, int local)
{
- unsigned long old;
- unsigned long *p = &addr->r;
-
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- rldimi %0,%2,0,61\n\
- stdcx. %0,0,%3\n\
- bne 1b"
- : "=&r" (old), "=m" (*p)
- : "r" (pp), "r" (p), "m" (*p)
- : "cc");
+ hpte_t *hptep = htab_address + slot;
+ unsigned long hpte_v, want_v;
+ int ret = 0;
+
+ want_v = hpte_encode_v(va, psize);
+
+ DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+ va, want_v & HPTE_V_AVPN, slot, newpp);
+
+ native_lock_hpte(hptep);
+
+ hpte_v = hptep->v;
+
+ /* Even if we miss, we need to invalidate the TLB */
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ DBG_LOW(" -> miss\n");
+ native_unlock_hpte(hptep);
+ ret = -1;
+ } else {
+ DBG_LOW(" -> hit\n");
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
+ native_unlock_hpte(hptep);
+ }
+
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, local);
+
+ return ret;
}
-/*
- * Only works on small pages. Yes its ugly to have to check each slot in
- * the group but we only use this during bootup.
- */
-static long native_hpte_find(unsigned long vpn)
+static long native_hpte_find(unsigned long va, int psize)
{
hpte_t *hptep;
unsigned long hash;
unsigned long i, j;
long slot;
- unsigned long hpte_v;
+ unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, 0);
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift);
+ want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
hptep = htab_address + slot;
hpte_v = hptep->v;
- if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
@@ -177,127 +281,101 @@ static long native_hpte_find(unsigned long vpn)
return -1;
}
-static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
-{
- hpte_t *hptep = htab_address + slot;
- unsigned long hpte_v;
- unsigned long avpn = va >> 23;
- int ret = 0;
-
- if (large)
- avpn &= ~1;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- ret = -1;
- } else {
- set_pp_bit(newpp, hptep);
- native_unlock_hpte(hptep);
- }
-
- /* Ensure it is out of the tlb too */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
-
- return ret;
-}
-
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
* in the HPT. Assumes pages being operated on will not be stolen.
- * Does not work on large pages.
*
* No need to lock here because we should be the only user.
*/
-static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
- unsigned long vsid, va, vpn, flags = 0;
+ unsigned long vsid, va;
long slot;
hpte_t *hptep;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- slot = native_hpte_find(vpn);
+ slot = native_hpte_find(va, psize);
if (slot == -1)
panic("could not find page to bolt\n");
hptep = htab_address + slot;
- set_pp_bit(newpp, hptep);
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
- /* Ensure it is out of the tlb too */
- if (lock_tlbie)
- spin_lock_irqsave(&native_tlbie_lock, flags);
- tlbie(va, 0);
- if (lock_tlbie)
- spin_unlock_irqrestore(&native_tlbie_lock, flags);
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, 0);
}
static void native_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
hpte_t *hptep = htab_address + slot;
unsigned long hpte_v;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
unsigned long flags;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (large)
- avpn &= ~1;
local_irq_save(flags);
- native_lock_hpte(hptep);
+ DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
+
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
hpte_v = hptep->v;
/* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- } else {
+ else
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->v = 0;
- }
- /* Invalidate the tlb */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
+ /* Invalidate the TLB */
+ tlbie(va, psize, local);
+
local_irq_restore(flags);
}
/*
+ * XXX This need fixing based on page size. It's only used by
+ * native_hpte_clear() for now which needs fixing too so they
+ * make a good pair...
+ */
+static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
+{
+ unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
+ unsigned long va;
+
+ va = avpn << 23;
+
+ if (! (hpte_v & HPTE_V_LARGE)) {
+ unsigned long vpi, pteg;
+
+ pteg = slot / HPTES_PER_GROUP;
+ if (hpte_v & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+
+ vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
+
+ va |= vpi << PAGE_SHIFT;
+ }
+
+ return va;
+}
+
+/*
* clear all mappings on kexec. All cpus are in real mode (or they will
* be when they isi), and we are the only one left. We rely on our kernel
* mapping being 0xC0's and the hardware ignoring those two real bits.
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
* athough there is the control page available...
+ *
+ * XXX FIXME: 4k only for now !
*/
static void native_hpte_clear(void)
{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
if (hpte_v & HPTE_V_VALID) {
hptep->v = 0;
- tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
+ tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
}
}
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
local_irq_restore(flags);
}
+/*
+ * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
+ * the lock all the time
+ */
static void native_flush_hash_range(unsigned long number, int local)
{
- unsigned long va, vpn, hash, secondary, slot, flags, avpn;
- int i, j;
+ unsigned long va, hash, index, hidx, shift, slot;
hpte_t *hptep;
unsigned long hpte_v;
+ unsigned long want_v;
+ unsigned long flags;
+ real_pte_t pte;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
- unsigned long large = batch->large;
+ unsigned long psize = batch->psize;
+ int i;
local_irq_save(flags);
- j = 0;
for (i = 0; i < number; i++) {
- va = batch->vaddr[j];
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, large);
- secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12;
-
- hptep = htab_address + slot;
-
- avpn = va >> 23;
- if (large)
- avpn &= ~0x1UL;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- } else {
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
- }
-
- j++;
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ hptep = htab_address + slot;
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
+ hpte_v = hptep->v;
+ if (!HPTE_V_COMPARE(hpte_v, want_v) ||
+ !(hpte_v & HPTE_V_VALID))
+ native_unlock_hpte(hptep);
+ else
+ hptep->v = 0;
+ } pte_iterate_hashed_end();
}
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
+ if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+ mmu_psize_defs[psize].tlbiel && local) {
asm volatile("ptesync":::"memory");
-
- for (i = 0; i < j; i++)
- __tlbiel(batch->vaddr[i]);
-
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbiel(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("ptesync":::"memory");
} else {
int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
spin_lock(&native_tlbie_lock);
asm volatile("ptesync":::"memory");
-
- for (i = 0; i < j; i++)
- __tlbie(batch->vaddr[i], large);
-
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbie(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6e9e05cce02..b2f3dbca695 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
*/
#undef DEBUG
+#undef DEBUG_LOW
#include <linux/config.h>
#include <linux/spinlock.h>
@@ -59,6 +60,15 @@
#define DBG(fmt...)
#endif
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
+#define KB (1024)
+#define MB (1024*KB)
+
/*
* Note: pte --> Linux PTE
* HPTE --> PowerPC Hashed Page Table Entry
@@ -77,91 +87,290 @@ extern unsigned long dart_tablebase;
hpte_t *htab_address;
unsigned long htab_hash_mask;
-
unsigned long _SDR1;
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+int mmu_linear_psize = MMU_PAGE_4K;
+int mmu_virtual_psize = MMU_PAGE_4K;
+#ifdef CONFIG_HUGETLB_PAGE
+int mmu_huge_psize = MMU_PAGE_16M;
+unsigned int HPAGE_SHIFT;
+#endif
-#define KB (1024)
-#define MB (1024*KB)
-
-static inline void loop_forever(void)
-{
- volatile unsigned long x = 1;
- for(;x;x|=1)
- ;
-}
+/* There are definitions of page sizes arrays to be used when none
+ * is provided by the firmware.
+ */
-static inline void create_pte_mapping(unsigned long start, unsigned long end,
- unsigned long mode, int large)
+/* Pre-POWER4 CPUs (4k pages only)
+ */
+struct mmu_psize_def mmu_psize_defaults_old[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 0,
+ },
+};
+
+/* POWER4, GPUL, POWER5
+ *
+ * Support for 16Mb large pages
+ */
+struct mmu_psize_def mmu_psize_defaults_gp[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 1,
+ },
+ [MMU_PAGE_16M] = {
+ .shift = 24,
+ .sllp = SLB_VSID_L,
+ .penc = 0,
+ .avpnm = 0x1UL,
+ .tlbiel = 0,
+ },
+};
+
+
+int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long mode, int psize)
{
- unsigned long addr;
- unsigned int step;
+ unsigned long vaddr, paddr;
+ unsigned int step, shift;
unsigned long tmp_mode;
- unsigned long vflags;
+ int ret = 0;
- if (large) {
- step = 16*MB;
- vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
- } else {
- step = 4*KB;
- vflags = HPTE_V_BOLTED;
- }
+ shift = mmu_psize_defs[psize].shift;
+ step = 1 << shift;
- for (addr = start; addr < end; addr += step) {
+ for (vaddr = vstart, paddr = pstart; vaddr < vend;
+ vaddr += step, paddr += step) {
unsigned long vpn, hash, hpteg;
- unsigned long vsid = get_kernel_vsid(addr);
- unsigned long va = (vsid << 28) | (addr & 0xfffffff);
- int ret = -1;
-
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
-
+ unsigned long vsid = get_kernel_vsid(vaddr);
+ unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
+ vpn = va >> shift;
tmp_mode = mode;
/* Make non-kernel text non-executable */
- if (!in_kernel_text(addr))
- tmp_mode = mode | HW_NO_EXEC;
-
- hash = hpt_hash(vpn, large);
+ if (!in_kernel_text(vaddr))
+ tmp_mode = mode | HPTE_R_N;
+ hash = hpt_hash(va, shift);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+ /* The crap below can be cleaned once ppd_md.probe() can
+ * set up the hash callbacks, thus we can just used the
+ * normal insert callback here.
+ */
#ifdef CONFIG_PPC_ISERIES
- if (systemcfg->platform & PLATFORM_ISERIES_LPAR)
- ret = iSeries_hpte_bolt_or_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ if (systemcfg->platform == PLATFORM_ISERIES_LPAR)
+ ret = iSeries_hpte_insert(hpteg, va,
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_PSERIES
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_MULTIPLATFORM
ret = native_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode, HPTE_V_BOLTED,
+ psize);
#endif
+ if (ret < 0)
+ break;
+ }
+ return ret < 0 ? ret : 0;
+}
- if (ret == -1) {
- ppc64_terminate_msg(0x20, "create_pte_mapping");
- loop_forever();
+static int __init htab_dt_scan_page_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+ unsigned long size = 0;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node,
+ "ibm,segment-page-sizes", &size);
+ if (prop != NULL) {
+ DBG("Page sizes from device-tree:\n");
+ size /= 4;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+ while(size > 0) {
+ unsigned int shift = prop[0];
+ unsigned int slbenc = prop[1];
+ unsigned int lpnum = prop[2];
+ unsigned int lpenc = 0;
+ struct mmu_psize_def *def;
+ int idx = -1;
+
+ size -= 3; prop += 3;
+ while(size > 0 && lpnum) {
+ if (prop[0] == shift)
+ lpenc = prop[1];
+ prop += 2; size -= 2;
+ lpnum--;
+ }
+ switch(shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x14:
+ idx = MMU_PAGE_1M;
+ break;
+ case 0x18:
+ idx = MMU_PAGE_16M;
+ cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+ break;
+ case 0x22:
+ idx = MMU_PAGE_16G;
+ break;
+ }
+ if (idx < 0)
+ continue;
+ def = &mmu_psize_defs[idx];
+ def->shift = shift;
+ if (shift <= 23)
+ def->avpnm = 0;
+ else
+ def->avpnm = (1 << (shift - 23)) - 1;
+ def->sllp = slbenc;
+ def->penc = lpenc;
+ /* We don't know for sure what's up with tlbiel, so
+ * for now we only set it for 4K and 64K pages
+ */
+ if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
+ def->tlbiel = 1;
+ else
+ def->tlbiel = 0;
+
+ DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
+ "tlbiel=%d, penc=%d\n",
+ idx, shift, def->sllp, def->avpnm, def->tlbiel,
+ def->penc);
}
+ return 1;
+ }
+ return 0;
+}
+
+
+static void __init htab_init_page_sizes(void)
+{
+ int rc;
+
+ /* Default to 4K pages only */
+ memcpy(mmu_psize_defs, mmu_psize_defaults_old,
+ sizeof(mmu_psize_defaults_old));
+
+ /*
+ * Try to find the available page sizes in the device-tree
+ */
+ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
+ if (rc != 0) /* Found */
+ goto found;
+
+ /*
+ * Not in the device-tree, let's fallback on known size
+ * list for 16M capable GP & GR
+ */
+ if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) &&
+ cpu_has_feature(CPU_FTR_16M_PAGE))
+ memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
+ sizeof(mmu_psize_defaults_gp));
+ found:
+ /*
+ * Pick a size for the linear mapping. Currently, we only support
+ * 16M, 1M and 4K which is the default
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+
+ /*
+ * Pick a size for the ordinary pages. Default is 4K, we support
+ * 64K if cache inhibited large pages are supported by the
+ * processor
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+ if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+ cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
+ mmu_virtual_psize = MMU_PAGE_64K;
+#endif
+
+ printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
+ mmu_psize_defs[mmu_linear_psize].shift,
+ mmu_psize_defs[mmu_virtual_psize].shift);
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /* Init large page size. Currently, we pick 16M or 1M depending
+ * on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_huge_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_huge_psize = MMU_PAGE_1M;
+
+ /* Calculate HPAGE_SHIFT and sanity check it */
+ if (mmu_psize_defs[mmu_huge_psize].shift > 16 &&
+ mmu_psize_defs[mmu_huge_psize].shift < 28)
+ HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
+ else
+ HPAGE_SHIFT = 0; /* No huge pages dude ! */
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+static int __init htab_dt_scan_pftsize(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+ if (prop != NULL) {
+ /* pft_size[0] is the NUMA CEC cookie */
+ ppc64_pft_size = prop[1];
+ return 1;
}
+ return 0;
}
-static unsigned long get_hashtable_size(void)
+static unsigned long __init htab_get_table_size(void)
{
unsigned long rnd_mem_size, pteg_count;
- /* If hash size wasn't obtained in prom.c, we calculate it now based on
- * the total RAM size
+ /* If hash size isn't already provided by the platform, we try to
+ * retreive it from the device-tree. If it's not there neither, we
+ * calculate it now based on the total RAM size
*/
+ if (ppc64_pft_size == 0)
+ of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
if (ppc64_pft_size)
return 1UL << ppc64_pft_size;
@@ -181,17 +390,21 @@ void __init htab_initialize(void)
unsigned long table, htab_size_bytes;
unsigned long pteg_count;
unsigned long mode_rw;
- int i, use_largepages = 0;
unsigned long base = 0, size = 0;
+ int i;
+
extern unsigned long tce_alloc_start, tce_alloc_end;
DBG(" -> htab_initialize()\n");
+ /* Initialize page sizes */
+ htab_init_page_sizes();
+
/*
* Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages.
*/
- htab_size_bytes = get_hashtable_size();
+ htab_size_bytes = htab_get_table_size();
pteg_count = htab_size_bytes >> 7;
/* For debug, make the HTAB 1/8 as big as it normally would be. */
@@ -211,14 +424,11 @@ void __init htab_initialize(void)
* the absolute address space.
*/
table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+ BUG_ON(table == 0);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
- if ( !table ) {
- ppc64_terminate_msg(0x20, "hpt space");
- loop_forever();
- }
htab_address = abs_to_virt(table);
/* htab absolute addr + encoded htabsize */
@@ -234,8 +444,6 @@ void __init htab_initialize(void)
* _NOT_ map it to avoid cache paradoxes as it's remapped non
* cacheable later on
*/
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- use_largepages = 1;
/* create bolted the linear mapping in the hash table */
for (i=0; i < lmb.memory.cnt; i++) {
@@ -246,27 +454,32 @@ void __init htab_initialize(void)
#ifdef CONFIG_U3_DART
/* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two lmb regions and will
- * fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if we
- * only use 2Mb of that space. We will use more of it later for
- * AGP GART. We have to use a full 16Mb large page.
+ * in such a way that it will not cross two lmb regions and
+ * will fit within a single 16Mb page.
+ * The DART space is assumed to be a full 16Mb region even if
+ * we only use 2Mb of that space. We will use more of it later
+ * for AGP GART. We have to use a full 16Mb large page.
*/
DBG("DART base: %lx\n", dart_tablebase);
if (dart_tablebase != 0 && dart_tablebase >= base
&& dart_tablebase < (base + size)) {
if (base != dart_tablebase)
- create_pte_mapping(base, dart_tablebase, mode_rw,
- use_largepages);
+ BUG_ON(htab_bolt_mapping(base, dart_tablebase,
+ base, mode_rw,
+ mmu_linear_psize));
if ((base + size) > (dart_tablebase + 16*MB))
- create_pte_mapping(dart_tablebase + 16*MB, base + size,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
+ base + size,
+ dart_tablebase+16*MB,
+ mode_rw,
+ mmu_linear_psize));
continue;
}
#endif /* CONFIG_U3_DART */
- create_pte_mapping(base, base + size, mode_rw, use_largepages);
- }
+ BUG_ON(htab_bolt_mapping(base, base + size, base,
+ mode_rw, mmu_linear_psize));
+ }
/*
* If we have a memory_limit and we've allocated TCEs then we need to
@@ -282,8 +495,9 @@ void __init htab_initialize(void)
if (base + size >= tce_alloc_start)
tce_alloc_start = base + size + 1;
- create_pte_mapping(tce_alloc_start, tce_alloc_end,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+ tce_alloc_start, mode_rw,
+ mmu_linear_psize));
}
DBG(" <- htab_initialize()\n");
@@ -298,9 +512,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
{
struct page *page;
- if (!pfn_valid(pte_pfn(pte)))
- return pp;
-
page = pte_page(pte);
/* page is dirty */
@@ -309,7 +520,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
} else
- pp |= HW_NO_EXEC;
+ pp |= HPTE_R_N;
}
return pp;
}
@@ -325,94 +536,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
unsigned long vsid;
struct mm_struct *mm;
pte_t *ptep;
- int ret;
- int user_region = 0;
- int local = 0;
cpumask_t tmp;
+ int rc, user_region = 0, local = 0;
- if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
- return 1;
+ DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
+ ea, access, trap);
+ if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
+ DBG_LOW(" out of pgtable range !\n");
+ return 1;
+ }
+
+ /* Get region & vsid */
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
mm = current->mm;
- if (! mm)
+ if (! mm) {
+ DBG_LOW(" user region with no mm !\n");
return 1;
-
+ }
vsid = get_vsid(mm->context.id, ea);
break;
case VMALLOC_REGION_ID:
mm = &init_mm;
vsid = get_kernel_vsid(ea);
break;
-#if 0
- case KERNEL_REGION_ID:
- /*
- * Should never get here - entire 0xC0... region is bolted.
- * Send the problem up to do_page_fault
- */
-#endif
default:
/* Not a valid range
* Send the problem up to do_page_fault
*/
return 1;
- break;
}
+ DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
+ /* Get pgdir */
pgdir = mm->pgd;
-
if (pgdir == NULL)
return 1;
+ /* Check CPU locality */
tmp = cpumask_of_cpu(smp_processor_id());
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
- /* Is this a huge page ? */
- if (unlikely(in_hugepage_area(mm->context, ea)))
- ret = hash_huge_page(mm, access, ea, vsid, local);
- else {
- ptep = find_linux_pte(pgdir, ea);
- if (ptep == NULL)
- return 1;
- ret = __hash_page(ea, access, vsid, ptep, trap, local);
+ /* Handle hugepage regions */
+ if (unlikely(in_hugepage_area(mm->context, ea))) {
+ DBG_LOW(" -> huge page !\n");
+ return hash_huge_page(mm, access, ea, vsid, local);
+ }
+
+ /* Get PTE and page size from page tables */
+ ptep = find_linux_pte(pgdir, ea);
+ if (ptep == NULL || !pte_present(*ptep)) {
+ DBG_LOW(" no PTE !\n");
+ return 1;
+ }
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ /* Pre-check access permissions (will be re-checked atomically
+ * in __hash_page_XX but this pre-check is a fast path
+ */
+ if (access & ~pte_val(*ptep)) {
+ DBG_LOW(" no access !\n");
+ return 1;
}
- return ret;
+ /* Do actual hashing */
+#ifndef CONFIG_PPC_64K_PAGES
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
+ else
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ DBG_LOW(" -> rc=%d\n", rc);
+ return rc;
}
-void flush_hash_page(unsigned long va, pte_t pte, int local)
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
{
- unsigned long vpn, hash, secondary, slot;
- unsigned long huge = pte_huge(pte);
+ unsigned long vsid;
+ void *pgdir;
+ pte_t *ptep;
+ cpumask_t mask;
+ unsigned long flags;
+ int local = 0;
+
+ /* We don't want huge pages prefaulted for now
+ */
+ if (unlikely(in_hugepage_area(mm->context, ea)))
+ return;
+
+ DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
+ " trap=%lx\n", mm, mm->pgd, ea, access, trap);
- if (huge)
- vpn = va >> HPAGE_SHIFT;
+ /* Get PTE, VSID, access mask */
+ pgdir = mm->pgd;
+ if (pgdir == NULL)
+ return;
+ ptep = find_linux_pte(pgdir, ea);
+ if (!ptep)
+ return;
+ vsid = get_vsid(mm->context.id, ea);
+
+ /* Hash it in */
+ local_irq_save(flags);
+ mask = cpumask_of_cpu(smp_processor_id());
+ if (cpus_equal(mm->cpu_vm_mask, mask))
+ local = 1;
+#ifndef CONFIG_PPC_64K_PAGES
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ __hash_page_64K(ea, access, vsid, ptep, trap, local);
else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, huge);
- secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
-
- ppc_md.hpte_invalidate(slot, va, huge, local);
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+ local_irq_restore(flags);
+}
+
+void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
+{
+ unsigned long hash, index, shift, hidx, slot;
+
+ DBG_LOW("flush_hash_page(va=%016x)\n", va);
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
+ ppc_md.hpte_invalidate(slot, va, psize, local);
+ } pte_iterate_hashed_end();
}
void flush_hash_range(unsigned long number, int local)
{
- if (ppc_md.flush_hash_range) {
+ if (ppc_md.flush_hash_range)
ppc_md.flush_hash_range(number, local);
- } else {
+ else {
int i;
struct ppc64_tlb_batch *batch =
&__get_cpu_var(ppc64_tlb_batch);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+ flush_hash_page(batch->vaddr[i], batch->pte[i],
+ batch->psize, local);
}
}
@@ -452,6 +738,18 @@ void __init htab_finish_init(void)
extern unsigned int *htab_call_hpte_remove;
extern unsigned int *htab_call_hpte_updatepp;
+#ifdef CONFIG_PPC_64K_PAGES
+ extern unsigned int *ht64_call_hpte_insert1;
+ extern unsigned int *ht64_call_hpte_insert2;
+ extern unsigned int *ht64_call_hpte_remove;
+ extern unsigned int *ht64_call_hpte_updatepp;
+
+ make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
+ make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+#endif /* CONFIG_PPC_64K_PAGES */
+
make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed97..0073a04047e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
pm = pmd_offset(pu, addr);
+#ifdef CONFIG_PPC_64K_PAGES
+ /* Currently, we use the normal PTE offset within full
+ * size PTE pages, thus our huge PTEs are scattered in
+ * the PTE page and we do waste some. We may change
+ * that in the future, but the current mecanism keeps
+ * things much simpler
+ */
+ if (!pmd_none(*pm)) {
+ /* Note: pte_offset_* are all equivalent on
+ * ppc64 as we don't have HIGHMEM
+ */
+ pt = pte_offset_kernel(pm, addr);
+ return pt;
+ }
+#else /* CONFIG_PPC_64K_PAGES */
+ /* On 4k pages, we put huge PTEs in the PMD page */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
return pt;
+#endif /* CONFIG_PPC_64K_PAGES */
}
}
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
if (pu) {
pm = pmd_alloc(mm, pu, addr);
if (pm) {
+#ifdef CONFIG_PPC_64K_PAGES
+ /* See comment in huge_pte_offset. Note that if we ever
+ * want to put the page size in the PMD, we would have
+ * to open code our own pte_alloc* function in order
+ * to populate and set the size atomically
+ */
+ pt = pte_alloc_map(mm, pm, addr);
+#else /* CONFIG_PPC_64K_PAGES */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
+#endif /* CONFIG_PPC_64K_PAGES */
return pt;
}
}
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
return NULL;
}
-#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
-
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- int i;
-
if (pte_present(*ptep)) {
- pte_clear(mm, addr, ptep);
+ /* We open-code pte_clear because we need to pass the right
+ * argument to hpte_update (huge / !huge)
+ */
+ unsigned long old = pte_update(ptep, ~0UL);
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
flush_tlb_pending();
}
-
- for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
- *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
- ptep++;
- }
+ *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
- int i;
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
-
- for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
- ptep[i] = __pte(0);
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
+ *ptep = __pte(0);
return __pte(old);
}
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
int lastshift;
u16 areamask, curareas;
+ if (HPAGE_SHIFT == 0)
+ return -EINVAL;
if (len & ~HPAGE_MASK)
return -EINVAL;
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local)
{
pte_t *ptep;
- unsigned long va, vpn;
- pte_t old_pte, new_pte;
- unsigned long rflags, prpn;
+ unsigned long old_pte, new_pte;
+ unsigned long va, rflags, pa;
long slot;
int err = 1;
- spin_lock(&mm->page_table_lock);
-
ptep = huge_pte_offset(mm, ea);
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> HPAGE_SHIFT;
/*
* If no pte found or not present, send the problem up to
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
if (unlikely(!ptep || pte_none(*ptep)))
goto out;
-/* BUG_ON(pte_bad(*ptep)); */
-
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
*/
- old_pte = *ptep;
- new_pte = old_pte;
-
- rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ do {
+ old_pte = pte_val(*ptep);
+ if (old_pte & _PAGE_BUSY)
+ goto out;
+ new_pte = old_pte | _PAGE_BUSY |
+ _PAGE_ACCESSED | _PAGE_HASHPTE;
+ } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
+ old_pte, new_pte));
+
+ rflags = 0x2 | (!(new_pte & _PAGE_RW));
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
- rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
+ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
/* Check if pte already has an hpte (case 2) */
- if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
+ if (unlikely(old_pte & _PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot;
- hash = hpt_hash(vpn, 1);
- if (pte_val(old_pte) & _PAGE_SECONDARY)
+ hash = hpt_hash(va, HPAGE_SHIFT);
+ if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
+ slot += (old_pte & _PAGE_F_GIX) >> 12;
if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
- pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
+ old_pte &= ~_PAGE_HPTEFLAGS;
}
- if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
- unsigned long hash = hpt_hash(vpn, 1);
+ if (likely(!(old_pte & _PAGE_HASHPTE))) {
+ unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
unsigned long hpte_group;
- prpn = pte_pfn(old_pte);
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
repeat:
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- /* Update the linux pte with the HPTE slot */
- pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
- pte_val(new_pte) |= _PAGE_HASHPTE;
+ /* clear HPTE slot informations in new PTE */
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
/* Add in WIMG bits */
/* XXX We should store these in the pte */
+ /* --BenH: I think they are ... */
rflags |= _PAGE_COHERENT;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE, rflags);
+ /* Insert into the hash table, primary slot */
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+ mmu_huge_psize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
- pte_val(new_pte) |= _PAGE_SECONDARY;
+ new_pte |= _PAGE_F_SECOND;
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE |
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
HPTE_V_SECONDARY,
- rflags);
+ mmu_huge_psize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +744,18 @@ repeat:
if (unlikely(slot == -2))
panic("hash_huge_page: pte_insert failed\n");
- pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
-
- /*
- * No need to use ldarx/stdcx here because all who
- * might be updating the pte will hold the
- * page_table_lock
- */
- *ptep = new_pte;
+ new_pte |= (slot << 12) & _PAGE_F_GIX;
}
+ /*
+ * No need to use ldarx/stdcx here because all who
+ * might be updating the pte will hold the
+ * page_table_lock
+ */
+ *ptep = __pte(new_pte & ~_PAGE_BUSY);
+
err = 0;
out:
- spin_unlock(&mm->page_table_lock);
-
return err;
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index b0fc822ec29..dfe7fa37b41 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -188,12 +188,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
memset(addr, 0, kmem_cache_size(cache));
}
+#ifdef CONFIG_PPC_64K_PAGES
+static const int pgtable_cache_size[2] = {
+ PTE_TABLE_SIZE, PGD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+ "pte_pmd_cache", "pgd_cache",
+};
+#else
static const int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pgd_pte_cache", "pud_pmd_cache",
};
+#endif /* CONFIG_PPC_64K_PAGES */
kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
@@ -201,19 +210,14 @@ void pgtable_cache_init(void)
{
int i;
- BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
- BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
- BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
- BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
-
for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];
pgtable_cache[i] = kmem_cache_create(name,
size, size,
- SLAB_HWCACHE_ALIGN
- | SLAB_MUST_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_MUST_HWCACHE_ALIGN,
zero_ctor,
NULL);
if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e1..7faa46b71f2 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,6 +61,9 @@ int init_bootmem_done;
int mem_init_done;
unsigned long memory_limit;
+extern void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap);
+
/*
* This is called by /dev/mem to know if a given address has to
* be mapped non-cacheable or not
@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t pte)
{
- /* handle i-cache coherency */
- unsigned long pfn = pte_pfn(pte);
-#ifdef CONFIG_PPC32
- pmd_t *pmd;
-#else
- unsigned long vsid;
- void *pgdir;
- pte_t *ptep;
- int local = 0;
- cpumask_t tmp;
- unsigned long flags;
+#ifdef CONFIG_PPC_STD_MMU
+ unsigned long access = 0, trap;
#endif
+ unsigned long pfn = pte_pfn(pte);
/* handle i-cache coherency */
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte) || address >= TASK_SIZE)
return;
-#ifdef CONFIG_PPC32
- if (Hash == 0)
- return;
- pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
- if (!pmd_none(*pmd))
- add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
-#else
- pgdir = vma->vm_mm->pgd;
- if (pgdir == NULL)
- return;
- ptep = find_linux_pte(pgdir, address);
- if (!ptep)
+ /* We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot
+ *
+ * We also avoid filling the hash if not coming from a fault
+ */
+ if (current->thread.regs == NULL)
return;
-
- vsid = get_vsid(vma->vm_mm->context.id, address);
-
- local_irq_save(flags);
- tmp = cpumask_of_cpu(smp_processor_id());
- if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
- local = 1;
-
- __hash_page(address, 0, vsid, ptep, 0x300, local);
- local_irq_restore(flags);
-#endif
-#endif
+ trap = TRAP(current->thread.regs);
+ if (trap == 0x400)
+ access |= _PAGE_EXEC;
+ else if (trap != 0x300)
+ return;
+ hash_preload(vma->vm_mm, address, access, trap);
+#endif /* CONFIG_PPC_STD_MMU */
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b79a7820613..51b78694097 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -101,7 +101,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
- unsigned long vsid;
if (mem_init_done) {
pgdp = pgd_offset_k(ea);
@@ -117,28 +116,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
} else {
- unsigned long va, vpn, hash, hpteg;
-
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
* entry in the hardware page table.
+ *
*/
- vsid = get_kernel_vsid(ea);
- va = (vsid << 28) | (ea & 0xFFFFFFF);
- vpn = va >> PAGE_SHIFT;
-
- hash = hpt_hash(vpn, 0);
-
- hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
- /* Panic if a pte grpup is full */
- if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
- HPTE_V_BOLTED,
- _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
- == -1) {
- panic("map_io_page: could not insert mapping");
- }
+ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+ mmu_virtual_psize))
+ panic("Can't map bolted IO mapping");
}
return 0;
}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e..d137abd241f 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -179,6 +179,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
}
/*
+ * Preload a translation in the hash table
+ */
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
+{
+ pmd_t *pmd;
+
+ if (Hash == 0)
+ return;
+ pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
+ if (!pmd_none(*pmd))
+ add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
+}
+
+/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
void __init MMU_init_hw(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a3..60e852f2f8e 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
* 2 of the License, or (at your option) any later version.
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
-extern void slb_allocate(unsigned long ea);
+extern void slb_allocate_realmode(unsigned long ea);
+extern void slb_allocate_user(unsigned long ea);
+
+static void slb_allocate(unsigned long ea)
+{
+ /* Currently, we do real mode for all SLBs including user, but
+ * that will change if we bring back dynamic VSIDs
+ */
+ slb_allocate_realmode(ea);
+}
static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
{
@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
{
/* If you change this make sure you change SLB_NUM_BOLTED
* appropriately too. */
- unsigned long ksp_flags = SLB_VSID_KERNEL;
+ unsigned long linear_llp, virtual_llp, lflags, vflags;
unsigned long ksp_esid_data;
WARN_ON(!irqs_disabled());
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- ksp_flags |= SLB_VSID_L;
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+ lflags = SLB_VSID_KERNEL | linear_llp;
+ vflags = SLB_VSID_KERNEL | virtual_llp;
ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
/* Slot 2 - kernel stack */
"slbmte %2,%3\n"
"isync"
- :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)),
+ :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
"r"(mk_esid_data(VMALLOCBASE, 1)),
- "r"(mk_vsid_data(ksp_esid_data, ksp_flags)),
+ "r"(mk_vsid_data(ksp_esid_data, lflags)),
"r"(ksp_esid_data)
: "memory");
}
@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
get_paca()->slb_cache_ptr = 0;
get_paca()->context = mm->context;
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = mm->pgd;
+#endif /* CONFIG_PPC_64K_PAGES */
/*
* preload some userspace segments into the SLB.
@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
slb_allocate(unmapped_base);
}
+static inline void patch_slb_encoding(unsigned int *insn_addr,
+ unsigned int immed)
+{
+ /* Assume the instruction had a "0" immediate value, just
+ * "or" in the new value
+ */
+ *insn_addr |= immed;
+ flush_icache_range((unsigned long)insn_addr, 4+
+ (unsigned long)insn_addr);
+}
+
void slb_initialize(void)
{
+ unsigned long linear_llp, virtual_llp;
+ static int slb_encoding_inited;
+ extern unsigned int *slb_miss_kernel_load_linear;
+ extern unsigned int *slb_miss_kernel_load_virtual;
+ extern unsigned int *slb_miss_user_load_normal;
+#ifdef CONFIG_HUGETLB_PAGE
+ extern unsigned int *slb_miss_user_load_huge;
+ unsigned long huge_llp;
+
+ huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
+#endif
+
+ /* Prepare our SLB miss handler based on our page size */
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+ if (!slb_encoding_inited) {
+ slb_encoding_inited = 1;
+ patch_slb_encoding(slb_miss_kernel_load_linear,
+ SLB_VSID_KERNEL | linear_llp);
+ patch_slb_encoding(slb_miss_kernel_load_virtual,
+ SLB_VSID_KERNEL | virtual_llp);
+ patch_slb_encoding(slb_miss_user_load_normal,
+ SLB_VSID_USER | virtual_llp);
+
+ DBG("SLB: linear LLP = %04x\n", linear_llp);
+ DBG("SLB: virtual LLP = %04x\n", virtual_llp);
+#ifdef CONFIG_HUGETLB_PAGE
+ patch_slb_encoding(slb_miss_user_load_huge,
+ SLB_VSID_USER | huge_llp);
+ DBG("SLB: huge LLP = %04x\n", huge_llp);
+#endif
+ }
+
/* On iSeries the bolted entries have already been set up by
* the hypervisor from the lparMap data in head.S */
#ifndef CONFIG_PPC_ISERIES
- unsigned long flags = SLB_VSID_KERNEL;
+ {
+ unsigned long lflags, vflags;
- /* Invalidate the entire SLB (even slot 0) & all the ERATS */
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- flags |= SLB_VSID_L;
+ lflags = SLB_VSID_KERNEL | linear_llp;
+ vflags = SLB_VSID_KERNEL | virtual_llp;
- asm volatile("isync":::"memory");
- asm volatile("slbmte %0,%0"::"r" (0) : "memory");
+ /* Invalidate the entire SLB (even slot 0) & all the ERATS */
+ asm volatile("isync":::"memory");
+ asm volatile("slbmte %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
- create_slbe(KERNELBASE, flags, 0);
- create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1);
+ create_slbe(KERNELBASE, lflags, 0);
+
+ /* VMALLOC space has 4K pages always for now */
+ create_slbe(VMALLOCBASE, vflags, 1);
+
/* We don't bolt the stack for the time being - we're in boot,
* so the stack is in the bolted segment. By the time it goes
* elsewhere, we'll call _switch() which will bolt in the new
* one. */
asm volatile("isync":::"memory");
-#endif
+ }
+#endif /* CONFIG_PPC_ISERIES */
get_paca()->stab_rr = SLB_NUM_BOLTED;
}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a3a03da503b..3e18241b6f3 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -18,61 +18,28 @@
#include <linux/config.h>
#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
-/* void slb_allocate(unsigned long ea);
+/* void slb_allocate_realmode(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
* No other registers are examined or changed.
*/
-_GLOBAL(slb_allocate)
- /*
- * First find a slot, round robin. Previously we tried to find
- * a free slot first but that took too long. Unfortunately we
- * dont have any LRU information to help us choose a slot.
- */
-#ifdef CONFIG_PPC_ISERIES
- /*
- * On iSeries, the "bolted" stack segment can be cast out on
- * shared processor switch so we need to check for a miss on
- * it and restore it to the right slot.
- */
- ld r9,PACAKSAVE(r13)
- clrrdi r9,r9,28
- clrrdi r11,r3,28
- li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
- cmpld r9,r11
- beq 3f
-#endif /* CONFIG_PPC_ISERIES */
-
- ld r10,PACASTABRR(r13)
- addi r10,r10,1
- /* use a cpu feature mask if we ever change our slb size */
- cmpldi r10,SLB_NUM_ENTRIES
-
- blt+ 4f
- li r10,SLB_NUM_BOLTED
-
-4:
- std r10,PACASTABRR(r13)
-3:
- /* r3 = faulting address, r10 = entry */
+_GLOBAL(slb_allocate_realmode)
+ /* r3 = faulting address */
srdi r9,r3,60 /* get region */
- srdi r3,r3,28 /* get esid */
+ srdi r10,r3,28 /* get esid */
cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
- rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */
- oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
-
- /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
-
+ /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
blt cr7,0f /* user or kernel? */
/* kernel address: proto-VSID = ESID */
@@ -81,43 +48,161 @@ _GLOBAL(slb_allocate)
* top segment. That's ok, the scramble below will translate
* it to VSID 0, which is reserved as a bad VSID - one which
* will never have any pages in it. */
- li r11,SLB_VSID_KERNEL
-BEGIN_FTR_SECTION
- bne cr7,9f
- li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
- b 9f
-0: /* user address: proto-VSID = context<<15 | ESID */
- srdi. r9,r3,USER_ESID_BITS
+ /* Check if hitting the linear mapping of the vmalloc/ioremap
+ * kernel space
+ */
+ bne cr7,1f
+
+ /* Linear mapping encoding bits, the "li" instruction below will
+ * be patched by the kernel at boot
+ */
+_GLOBAL(slb_miss_kernel_load_linear)
+ li r11,0
+ b slb_finish_load
+
+1: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
+ * will be patched by the kernel at boot
+ */
+_GLOBAL(slb_miss_kernel_load_virtual)
+ li r11,0
+ b slb_finish_load
+
+
+0: /* user address: proto-VSID = context << 15 | ESID. First check
+ * if the address is within the boundaries of the user region
+ */
+ srdi. r9,r10,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
+ /* Figure out if the segment contains huge pages */
#ifdef CONFIG_HUGETLB_PAGE
BEGIN_FTR_SECTION
+ b 1f
+END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
lhz r9,PACAHIGHHTLBAREAS(r13)
- srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
+ srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
srd r9,r9,r11
lhz r11,PACALOWHTLBAREAS(r13)
- srd r11,r11,r3
- or r9,r9,r11
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
+ srd r11,r11,r10
+ or. r9,r9,r11
+ beq 1f
+_GLOBAL(slb_miss_user_load_huge)
+ li r11,0
+ b 2f
+1:
#endif /* CONFIG_HUGETLB_PAGE */
- li r11,SLB_VSID_USER
+_GLOBAL(slb_miss_user_load_normal)
+ li r11,0
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
- rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-#endif /* CONFIG_HUGETLB_PAGE */
+2:
+ ld r9,PACACONTEXTID(r13)
+ rldimi r10,r9,USER_ESID_BITS,0
+ b slb_finish_load
+
+8: /* invalid EA */
+ li r10,0 /* BAD_VSID */
+ li r11,SLB_VSID_USER /* flags don't much matter */
+ b slb_finish_load
+
+#ifdef __DISABLED__
+
+/* void slb_allocate_user(unsigned long ea);
+ *
+ * Create an SLB entry for the given EA (user or kernel).
+ * r3 = faulting address, r13 = PACA
+ * r9, r10, r11 are clobbered by this function
+ * No other registers are examined or changed.
+ *
+ * It is called with translation enabled in order to be able to walk the
+ * page tables. This is not currently used.
+ */
+_GLOBAL(slb_allocate_user)
+ /* r3 = faulting address */
+ srdi r10,r3,28 /* get esid */
+
+ crset 4*cr7+lt /* set "user" flag for later */
+
+ /* check if we fit in the range covered by the pagetables*/
+ srdi. r9,r3,PGTABLE_EADDR_SIZE
+ crnot 4*cr0+eq,4*cr0+eq
+ beqlr
+ /* now we need to get to the page tables in order to get the page
+ * size encoding from the PMD. In the future, we'll be able to deal
+ * with 1T segments too by getting the encoding from the PGD instead
+ */
+ ld r9,PACAPGDIR(r13)
+ cmpldi cr0,r9,0
+ beqlr
+ rlwinm r11,r10,8,25,28
+ ldx r9,r9,r11 /* get pgd_t */
+ cmpldi cr0,r9,0
+ beqlr
+ rlwinm r11,r10,3,17,28
+ ldx r9,r9,r11 /* get pmd_t */
+ cmpldi cr0,r9,0
+ beqlr
+
+ /* build vsid flags */
+ andi. r11,r9,SLB_VSID_LLP
+ ori r11,r11,SLB_VSID_USER
+
+ /* get context to calculate proto-VSID */
ld r9,PACACONTEXTID(r13)
- rldimi r3,r9,USER_ESID_BITS,0
+ rldimi r10,r9,USER_ESID_BITS,0
+
+ /* fall through slb_finish_load */
+
+#endif /* __DISABLED__ */
-9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
- ASM_VSID_SCRAMBLE(r3,r9)
- rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+/*
+ * Finish loading of an SLB entry and return
+ *
+ * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
+ */
+slb_finish_load:
+ ASM_VSID_SCRAMBLE(r10,r9)
+ rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+
+ /* r3 = EA, r11 = VSID data */
+ /*
+ * Find a slot, round robin. Previously we tried to find a
+ * free slot first but that took too long. Unfortunately we
+ * dont have any LRU information to help us choose a slot.
+ */
+#ifdef CONFIG_PPC_ISERIES
+ /*
+ * On iSeries, the "bolted" stack segment can be cast out on
+ * shared processor switch so we need to check for a miss on
+ * it and restore it to the right slot.
+ */
+ ld r9,PACAKSAVE(r13)
+ clrrdi r9,r9,28
+ clrrdi r3,r3,28
+ li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
+ cmpld r9,r3
+ beq 3f
+#endif /* CONFIG_PPC_ISERIES */
+
+ ld r10,PACASTABRR(r13)
+ addi r10,r10,1
+ /* use a cpu feature mask if we ever change our slb size */
+ cmpldi r10,SLB_NUM_ENTRIES
+
+ blt+ 4f
+ li r10,SLB_NUM_BOLTED
+
+4:
+ std r10,PACASTABRR(r13)
+
+3:
+ rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
+ oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
+
+ /* r3 = ESID data, r11 = VSID data */
/*
* No need for an isync before or after this slbmte. The exception
@@ -125,7 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
*/
slbmte r11,r10
- bgelr cr7 /* we're done for kernel addresses */
+ /* we're done for kernel addresses */
+ crclr 4*cr0+eq /* set result to "success" */
+ bgelr cr7
/* Update the slb cache */
lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
@@ -143,9 +230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
li r3,SLB_CACHE_ENTRIES+1
2:
sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ crclr 4*cr0+eq /* set result to "success" */
blr
-8: /* invalid EA */
- li r3,0 /* BAD_VSID */
- li r11,SLB_VSID_USER /* flags don't much matter */
- b 9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 1b83f002bf2..fa325dbf98f 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -26,7 +26,6 @@ struct stab_entry {
unsigned long vsid_data;
};
-/* Both the segment table and SLB code uses the following cache */
#define NR_STAB_CACHE_ENTRIES 8
DEFINE_PER_CPU(long, stab_cache_ptr);
DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -186,7 +185,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
/* Never flush the first entry. */
ste += 1;
for (entry = 1;
- entry < (PAGE_SIZE / sizeof(struct stab_entry));
+ entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
entry++, ste++) {
unsigned long ea;
ea = ste->esid_data & ESID_MASK;
@@ -200,6 +199,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
__get_cpu_var(stab_cache_ptr) = 0;
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = mm->pgd;
+#endif /* CONFIG_PPC_64K_PAGES */
+
/* Now preload some entries for the new task */
if (test_tsk_thread_flag(tsk, TIF_32BIT))
unmapped_base = TASK_UNMAPPED_BASE_USER32;
@@ -223,8 +226,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
asm volatile("sync" : : : "memory");
}
-extern void slb_initialize(void);
-
/*
* Allocate segment tables for secondary CPUs. These must all go in
* the first (bolted) segment, so that do_stab_bolted won't get a
@@ -243,18 +244,21 @@ void stabs_alloc(void)
if (cpu == 0)
continue; /* stab for CPU 0 is statically allocated */
- newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
+ newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
+ 1<<SID_SHIFT);
if (! newstab)
panic("Unable to allocate segment table for CPU %d.\n",
cpu);
newstab += KERNELBASE;
- memset((void *)newstab, 0, PAGE_SIZE);
+ memset((void *)newstab, 0, HW_PAGE_SIZE);
paca[cpu].stab_addr = newstab;
paca[cpu].stab_real = virt_to_abs(newstab);
- printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
+ printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx "
+ "virtual, 0x%lx absolute\n",
+ cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
}
}
@@ -267,13 +271,9 @@ void stab_initialize(unsigned long stab)
{
unsigned long vsid = get_kernel_vsid(KERNELBASE);
- if (cpu_has_feature(CPU_FTR_SLB)) {
- slb_initialize();
- } else {
- asm volatile("isync; slbia; isync":::"memory");
- make_ste(stab, GET_ESID(KERNELBASE), vsid);
+ asm volatile("isync; slbia; isync":::"memory");
+ make_ste(stab, GET_ESID(KERNELBASE), vsid);
- /* Order update */
- asm volatile("sync":::"memory");
- }
+ /* Order update */
+ asm volatile("sync":::"memory");
}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 09ab81a10f4..53e31b834ac 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -21,6 +21,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -30,7 +31,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
-#include <linux/highmem.h>
+#include <asm/bug.h>
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
* (if we remove it we should clear the _PTE_HPTEFLAGS bits).
*/
void hpte_update(struct mm_struct *mm, unsigned long addr,
- unsigned long pte, int wrprot)
+ pte_t *ptep, unsigned long pte, int huge)
{
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
unsigned long vsid;
+ unsigned int psize = mmu_virtual_psize;
int i;
i = batch->index;
+ /* We mask the address for the base page size. Huge pages will
+ * have applied their own masking already
+ */
+ addr &= PAGE_MASK;
+
+ /* Get page size (maybe move back to caller) */
+ if (huge) {
+#ifdef CONFIG_HUGETLB_PAGE
+ psize = mmu_huge_psize;
+#else
+ BUG();
+#endif
+ }
+
/*
* This can happen when we are in the middle of a TLB batch and
* we encounter memory pressure (eg copy_page_range when it tries
* to allocate a new pte). If we have to reclaim memory and end
* up scanning and resetting referenced bits then our batch context
* will change mid stream.
+ *
+ * We also need to ensure only one page size is present in a given
+ * batch
*/
- if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) {
+ if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
flush_tlb_pending();
i = 0;
}
if (i == 0) {
batch->mm = mm;
- batch->large = pte_huge(pte);
+ batch->psize = psize;
}
if (addr < KERNELBASE) {
vsid = get_vsid(mm->context.id, addr);
@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
} else
vsid = get_kernel_vsid(addr);
batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
- batch->pte[i] = __pte(pte);
+ batch->pte[i] = __real_pte(__pte(pte), ptep);
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
flush_tlb_pending();
@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
local = 1;
if (i == 1)
- flush_hash_page(batch->vaddr[0], batch->pte[0], local);
+ flush_hash_page(batch->vaddr[0], batch->pte[0],
+ batch->psize, local);
else
flush_hash_range(i, local);
batch->index = 0;
diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c
index b3c6c3374ca..30bdcf3925d 100644
--- a/arch/powerpc/platforms/iseries/htab.c
+++ b/arch/powerpc/platforms/iseries/htab.c
@@ -39,15 +39,16 @@ static inline void iSeries_hunlock(unsigned long slot)
spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
}
-static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
- unsigned long arpn;
long slot;
hpte_t lhpte;
int secondary = 0;
+ BUG_ON(psize != MMU_PAGE_4K);
+
/*
* The hypervisor tries both primary and secondary.
* If we are being called to insert in the secondary,
@@ -59,8 +60,19 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
iSeries_hlock(hpte_group);
- slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
- BUG_ON(lhpte.v & HPTE_V_VALID);
+ slot = HvCallHpt_findValid(&lhpte, va >> HW_PAGE_SHIFT);
+ if (unlikely(lhpte.v & HPTE_V_VALID)) {
+ if (vflags & HPTE_V_BOLTED) {
+ HvCallHpt_setSwBits(slot, 0x10, 0);
+ HvCallHpt_setPp(slot, PP_RWXX);
+ iSeries_hunlock(hpte_group);
+ if (slot < 0)
+ return 0x8 | (slot & 7);
+ else
+ return slot & 7;
+ }
+ BUG();
+ }
if (slot == -1) { /* No available entry found in either group */
iSeries_hunlock(hpte_group);
@@ -73,10 +85,9 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
slot &= 0x7fffffffffffffff;
}
- arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
- lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+ lhpte.v = hpte_encode_v(va, MMU_PAGE_4K) | vflags | HPTE_V_VALID;
+ lhpte.r = hpte_encode_r(phys_to_abs(pa), MMU_PAGE_4K) | rflags;
/* Now fill in the actual HPTE */
HvCallHpt_addValidate(slot, secondary, &lhpte);
@@ -86,25 +97,6 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
return (secondary << 3) | (slot & 7);
}
-long iSeries_hpte_bolt_or_insert(unsigned long hpte_group,
- unsigned long va, unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
-{
- long slot;
- hpte_t lhpte;
-
- slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
-
- if (lhpte.v & HPTE_V_VALID) {
- /* Bolt the existing HPTE */
- HvCallHpt_setSwBits(slot, 0x10, 0);
- HvCallHpt_setPp(slot, PP_RWXX);
- return 0;
- }
-
- return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags);
-}
-
static unsigned long iSeries_hpte_getword0(unsigned long slot)
{
hpte_t hpte;
@@ -150,15 +142,17 @@ static long iSeries_hpte_remove(unsigned long hpte_group)
* bits 61..63 : PP2,PP1,PP0
*/
static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
+ unsigned long va, int psize, int local)
{
hpte_t hpte;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
iSeries_hlock(slot);
HvCallHpt_get(&hpte, slot);
- if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) {
+ want_v = hpte_encode_v(va, MMU_PAGE_4K);
+
+ if (HPTE_V_COMPARE(hpte.v, want_v) && (hpte.v & HPTE_V_VALID)) {
/*
* Hypervisor expects bits as NPPP, which is
* different from how they are mapped in our PP.
@@ -210,14 +204,17 @@ static long iSeries_hpte_find(unsigned long vpn)
*
* No need to lock here because we should be the only user.
*/
-static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
unsigned long vsid,va,vpn;
long slot;
+ BUG_ON(psize != MMU_PAGE_4K);
+
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
+ vpn = va >> HW_PAGE_SHIFT;
slot = iSeries_hpte_find(vpn);
if (slot == -1)
panic("updateboltedpp: Could not find page to bolt\n");
@@ -225,7 +222,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
}
static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
unsigned long hpte_v;
unsigned long avpn = va >> 23;
diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c
index 62ec7347968..f476d71194f 100644
--- a/arch/powerpc/platforms/iseries/hvlog.c
+++ b/arch/powerpc/platforms/iseries/hvlog.c
@@ -22,7 +22,7 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
while (len) {
hv_buf.addr = cur;
- left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur;
+ left_this_page = ((cur & HW_PAGE_MASK) + HW_PAGE_SIZE) - cur;
if (left_this_page > len)
left_this_page = len;
hv_buf.len = left_this_page;
@@ -30,6 +30,6 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
HvCall2(HvCallBaseWriteLogBuffer,
virt_to_abs(&hv_buf),
left_this_page);
- cur = (cur & PAGE_MASK) + PAGE_SIZE;
+ cur = (cur & HW_PAGE_MASK) + HW_PAGE_SIZE;
}
}
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 1a6845b5c5a..bf081b34582 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -43,9 +43,12 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
u64 rc;
union tce_entry tce;
+ index <<= TCE_PAGE_FACTOR;
+ npages <<= TCE_PAGE_FACTOR;
+
while (npages--) {
tce.te_word = 0;
- tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT;
+ tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> TCE_SHIFT;
if (tbl->it_type == TCE_VB) {
/* Virtual Bus */
@@ -66,7 +69,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
rc);
index++;
- uaddr += PAGE_SIZE;
+ uaddr += TCE_PAGE_SIZE;
}
}
@@ -74,6 +77,9 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
{
u64 rc;
+ npages <<= TCE_PAGE_FACTOR;
+ index <<= TCE_PAGE_FACTOR;
+
while (npages--) {
rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
if (rc)
@@ -83,27 +89,6 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
}
}
-#ifdef CONFIG_PCI
-/*
- * This function compares the known tables to find an iommu_table
- * that has already been built for hardware TCEs.
- */
-static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
-{
- struct pci_dn *pdn;
-
- list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
- struct iommu_table *it = pdn->iommu_table;
- if ((it != NULL) &&
- (it->it_type == TCE_PCI) &&
- (it->it_offset == tbl->it_offset) &&
- (it->it_index == tbl->it_index) &&
- (it->it_size == tbl->it_size))
- return it;
- }
- return NULL;
-}
-
/*
* Call Hv with the architected data structure to get TCE table info.
* info. Put the returned data into the Linux representation of the
@@ -113,8 +98,10 @@ static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
* 2. TCE table per Bus.
* 3. TCE Table per IOA.
*/
-static void iommu_table_getparms(struct pci_dn *pdn,
- struct iommu_table* tbl)
+void iommu_table_getparms_iSeries(unsigned long busno,
+ unsigned char slotno,
+ unsigned char virtbus,
+ struct iommu_table* tbl)
{
struct iommu_table_cb *parms;
@@ -124,9 +111,9 @@ static void iommu_table_getparms(struct pci_dn *pdn,
memset(parms, 0, sizeof(*parms));
- parms->itc_busno = pdn->busno;
- parms->itc_slotno = pdn->LogicalSlot;
- parms->itc_virtbus = 0;
+ parms->itc_busno = busno;
+ parms->itc_slotno = slotno;
+ parms->itc_virtbus = virtbus;
HvCallXm_getTceTableParms(iseries_hv_addr(parms));
@@ -134,17 +121,40 @@ static void iommu_table_getparms(struct pci_dn *pdn,
panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
/* itc_size is in pages worth of table, it_size is in # of entries */
- tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry);
+ tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) /
+ sizeof(union tce_entry)) >> TCE_PAGE_FACTOR;
tbl->it_busno = parms->itc_busno;
- tbl->it_offset = parms->itc_offset;
+ tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR;
tbl->it_index = parms->itc_index;
tbl->it_blocksize = 1;
- tbl->it_type = TCE_PCI;
+ tbl->it_type = virtbus ? TCE_VB : TCE_PCI;
kfree(parms);
}
+#ifdef CONFIG_PCI
+/*
+ * This function compares the known tables to find an iommu_table
+ * that has already been built for hardware TCEs.
+ */
+static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
+{
+ struct pci_dn *pdn;
+
+ list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
+ struct iommu_table *it = pdn->iommu_table;
+ if ((it != NULL) &&
+ (it->it_type == TCE_PCI) &&
+ (it->it_offset == tbl->it_offset) &&
+ (it->it_index == tbl->it_index) &&
+ (it->it_size == tbl->it_size))
+ return it;
+ }
+ return NULL;
+}
+
+
void iommu_devnode_init_iSeries(struct device_node *dn)
{
struct iommu_table *tbl;
@@ -152,7 +162,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn)
tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
- iommu_table_getparms(pdn, tbl);
+ iommu_table_getparms_iSeries(pdn->busno, pdn->LogicalSlot, 0, tbl);
/* Look for existing tce table */
pdn->iommu_table = iommu_table_find(tbl);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index fda712b4216..c5207064977 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -320,11 +320,11 @@ static void __init iSeries_init_early(void)
*/
if (naca.xRamDisk) {
initrd_start = (unsigned long)__va(naca.xRamDisk);
- initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE;
+ initrd_end = initrd_start + naca.xRamDiskSize * HW_PAGE_SIZE;
initrd_below_start_ok = 1; // ramdisk in kernel space
ROOT_DEV = Root_RAM0;
- if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize)
- rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024;
+ if (((rd_size * 1024) / HW_PAGE_SIZE) < naca.xRamDiskSize)
+ rd_size = (naca.xRamDiskSize * HW_PAGE_SIZE) / 1024;
} else
#endif /* CONFIG_BLK_DEV_INITRD */
{
@@ -470,13 +470,14 @@ static void __init build_iSeries_Memory_Map(void)
*/
hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
hptSizePages = (u32)HvCallHpt_getHptPages();
- hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
+ hptSizeChunks = hptSizePages >>
+ (MSCHUNKS_CHUNK_SHIFT - HW_PAGE_SHIFT);
hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
printk("HPT absolute addr = %016lx, size = %dK\n",
chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
- ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE);
+ ppc64_pft_size = __ilog2(hptSizePages * HW_PAGE_SIZE);
/*
* The actual hashed page table is in the hypervisor,
@@ -629,7 +630,7 @@ static void __init iSeries_fixup_klimit(void)
*/
if (naca.xRamDisk)
klimit = KERNELBASE + (u64)naca.xRamDisk +
- (naca.xRamDiskSize * PAGE_SIZE);
+ (naca.xRamDiskSize * HW_PAGE_SIZE);
else {
/*
* No ram disk was included - check and see if there
diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c
index c27a66876c2..384360ee06e 100644
--- a/arch/powerpc/platforms/iseries/vio.c
+++ b/arch/powerpc/platforms/iseries/vio.c
@@ -30,41 +30,14 @@ static struct iommu_table vio_iommu_table;
static void __init iommu_vio_init(void)
{
- struct iommu_table *t;
- struct iommu_table_cb cb;
- unsigned long cbp;
- unsigned long itc_entries;
+ iommu_table_getparms_iSeries(255, 0, 0xff, &veth_iommu_table);
+ veth_iommu_table.it_size /= 2;
+ vio_iommu_table = veth_iommu_table;
+ vio_iommu_table.it_offset += veth_iommu_table.it_size;
- cb.itc_busno = 255; /* Bus 255 is the virtual bus */
- cb.itc_virtbus = 0xff; /* Ask for virtual bus */
-
- cbp = virt_to_abs(&cb);
- HvCallXm_getTceTableParms(cbp);
-
- itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
- veth_iommu_table.it_size = itc_entries / 2;
- veth_iommu_table.it_busno = cb.itc_busno;
- veth_iommu_table.it_offset = cb.itc_offset;
- veth_iommu_table.it_index = cb.itc_index;
- veth_iommu_table.it_type = TCE_VB;
- veth_iommu_table.it_blocksize = 1;
-
- t = iommu_init_table(&veth_iommu_table);
-
- if (!t)
+ if (!iommu_init_table(&veth_iommu_table))
printk("Virtual Bus VETH TCE table failed.\n");
-
- vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
- vio_iommu_table.it_busno = cb.itc_busno;
- vio_iommu_table.it_offset = cb.itc_offset +
- veth_iommu_table.it_size;
- vio_iommu_table.it_index = cb.itc_index;
- vio_iommu_table.it_type = TCE_VB;
- vio_iommu_table.it_blocksize = 1;
-
- t = iommu_init_table(&vio_iommu_table);
-
- if (!t)
+ if (!iommu_init_table(&vio_iommu_table))
printk("Virtual Bus VIO TCE table failed.\n");
}
diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index fe97bfbf746..84267269559 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -68,7 +68,8 @@ static DEFINE_SPINLOCK(statuslock);
* For each kind of event we allocate a buffer that is
* guaranteed not to cross a page boundary
*/
-static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned;
+static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256]
+ __attribute__((__aligned__(4096)));
static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
static int event_buffer_initialised;
@@ -116,12 +117,12 @@ static int proc_viopath_show(struct seq_file *m, void *v)
HvLpEvent_Rc hvrc;
DECLARE_MUTEX_LOCKED(Semaphore);
- buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
if (!buf)
return 0;
- memset(buf, 0, PAGE_SIZE);
+ memset(buf, 0, HW_PAGE_SIZE);
- handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE,
+ handle = dma_map_single(iSeries_vio_dev, buf, HW_PAGE_SIZE,
DMA_FROM_DEVICE);
hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
@@ -131,7 +132,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
viopath_sourceinst(viopath_hostLp),
viopath_targetinst(viopath_hostLp),
(u64)(unsigned long)&Semaphore, VIOVERSION << 16,
- ((u64)handle) << 32, PAGE_SIZE, 0, 0);
+ ((u64)handle) << 32, HW_PAGE_SIZE, 0, 0);
if (hvrc != HvLpEvent_Rc_Good)
printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
@@ -140,7 +141,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
vlanMap = HvLpConfig_getVirtualLanIndexMap();
- buf[PAGE_SIZE-1] = '\0';
+ buf[HW_PAGE_SIZE-1] = '\0';
seq_printf(m, "%s", buf);
seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
@@ -152,7 +153,8 @@ static int proc_viopath_show(struct seq_file *m, void *v)
e2a(xItExtVpdPanel.systemSerial[4]),
e2a(xItExtVpdPanel.systemSerial[5]));
- dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE);
+ dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
+ DMA_FROM_DEVICE);
kfree(buf);
return 0;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index e384a5a9179..ab0c6dd6ec9 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -19,7 +19,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#define DEBUG
+#undef DEBUG_LOW
#include <linux/config.h>
#include <linux/kernel.h>
@@ -41,10 +41,10 @@
#include "plpar_wrappers.h"
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0)
#else
-#define DBG(fmt...)
+#define DBG_LOW(fmt...) do { } while(0)
#endif
/* in pSeries_hvCall.S */
@@ -276,8 +276,9 @@ void vpa_init(int cpu)
}
long pSeries_lpar_hpte_insert(unsigned long hpte_group,
- unsigned long va, unsigned long prpn,
- unsigned long vflags, unsigned long rflags)
+ unsigned long va, unsigned long pa,
+ unsigned long rflags, unsigned long vflags,
+ int psize)
{
unsigned long lpar_rc;
unsigned long flags;
@@ -285,11 +286,28 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long hpte_v, hpte_r;
unsigned long dummy0, dummy1;
- hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID;
- if (vflags & HPTE_V_LARGE)
- hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
-
- hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+ "rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+
+ hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+#if 1
+ {
+ int i;
+ for (i=0;i<8;i++) {
+ unsigned long w0, w1;
+ plpar_pte_read(0, hpte_group, &w0, &w1);
+ BUG_ON (HPTE_V_COMPARE(hpte_v, w0)
+ && (w0 & HPTE_V_VALID));
+ }
+ }
+#endif
/* Now fill in the actual HPTE */
/* Set CEC cookie to 0 */
@@ -299,23 +317,30 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Exact = 0 */
flags = 0;
- /* XXX why is this here? - Anton */
+ /* Make pHyp happy */
if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
hpte_r &= ~_PAGE_COHERENT;
lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
hpte_r, &slot, &dummy0, &dummy1);
-
- if (unlikely(lpar_rc == H_PTEG_Full))
+ if (unlikely(lpar_rc == H_PTEG_Full)) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" full\n");
return -1;
+ }
/*
* Since we try and ioremap PHBs we don't own, the pte insert
* will fail. However we must catch the failure in hash_page
* or we will loop forever, so return -2 in this case.
*/
- if (unlikely(lpar_rc != H_Success))
+ if (unlikely(lpar_rc != H_Success)) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" lpar err %d\n", lpar_rc);
return -2;
+ }
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" -> slot: %d\n", slot & 7);
/* Because of iSeries, we have to pass down the secondary
* bucket bit here as well
@@ -340,10 +365,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
/* don't remove a bolted entry */
lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
(0x1UL << 4), &dummy1, &dummy2);
-
if (lpar_rc == H_Success)
return i;
-
BUG_ON(lpar_rc != H_Not_Found);
slot_offset++;
@@ -371,20 +394,28 @@ static void pSeries_lpar_hptab_clear(void)
* We can probably optimize here and assume the high bits of newpp are
* already zero. For now I am paranoid.
*/
-static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
+static long pSeries_lpar_hpte_updatepp(unsigned long slot,
+ unsigned long newpp,
+ unsigned long va,
+ int psize, int local)
{
unsigned long lpar_rc;
unsigned long flags = (newpp & 7) | H_AVPN;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
- if (large)
- avpn &= ~0x1UL;
+ want_v = hpte_encode_v(va, psize);
- lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
+ DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ",
+ want_v & HPTE_V_AVPN, slot, flags, psize);
- if (lpar_rc == H_Not_Found)
+ lpar_rc = plpar_pte_protect(flags, slot, want_v & HPTE_V_AVPN);
+
+ if (lpar_rc == H_Not_Found) {
+ DBG_LOW("not found !\n");
return -1;
+ }
+
+ DBG_LOW("ok\n");
BUG_ON(lpar_rc != H_Success);
@@ -410,21 +441,22 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
return dword0;
}
-static long pSeries_lpar_hpte_find(unsigned long vpn)
+static long pSeries_lpar_hpte_find(unsigned long va, int psize)
{
unsigned long hash;
unsigned long i, j;
long slot;
- unsigned long hpte_v;
+ unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, 0);
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift);
+ want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hpte_v = pSeries_lpar_hpte_getword0(slot);
- if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
@@ -441,17 +473,15 @@ static long pSeries_lpar_hpte_find(unsigned long vpn)
}
static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
- unsigned long ea)
+ unsigned long ea,
+ int psize)
{
- unsigned long lpar_rc;
- unsigned long vsid, va, vpn, flags;
- long slot;
+ unsigned long lpar_rc, slot, vsid, va, flags;
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- slot = pSeries_lpar_hpte_find(vpn);
+ slot = pSeries_lpar_hpte_find(va, psize);
BUG_ON(slot == -1);
flags = newpp & 7;
@@ -461,18 +491,18 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
}
static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
unsigned long lpar_rc;
unsigned long dummy1, dummy2;
- if (large)
- avpn &= ~0x1UL;
-
- lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
- &dummy2);
+ DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d",
+ slot, va, psize, local);
+ want_v = hpte_encode_v(va, psize);
+ lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v & HPTE_V_AVPN,
+ &dummy1, &dummy2);
if (lpar_rc == H_Not_Found)
return;
@@ -494,7 +524,8 @@ void pSeries_lpar_flush_hash_range(unsigned long number, int local)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+ flush_hash_page(batch->vaddr[i], batch->pte[i],
+ batch->psize, local);
if (lock_tlbie)
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig
index b987164fca4..2130cc31595 100644
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -47,6 +47,10 @@ config ARCH_MAY_HAVE_PC_FDC
bool
default y
+config PPC_STD_MMU
+ bool
+ default y
+
# We optimistically allocate largepages from the VM, so make the limit
# large enough (16MB). This badly named config option is actually
# max order + 1
@@ -294,6 +298,15 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
+config PPC_64K_PAGES
+ bool "64k page size"
+ help
+ This option changes the kernel logical page size to 64k. On machines
+ without processor support for 64k pages, the kernel will simulate
+ them by loading each individual 4k page on demand transparently,
+ while on hardware with such support, it will be used to map
+ normal application pages.
+
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on SMP
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c
index 504dee836d2..bce9065da6c 100644
--- a/arch/ppc64/kernel/asm-offsets.c
+++ b/arch/ppc64/kernel/asm-offsets.c
@@ -93,6 +93,9 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_64K_PAGES
+ DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
+#endif
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
index db1cf397be2..9e8050ea122 100644
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -195,11 +195,11 @@ exception_marker:
#define EX_R12 24
#define EX_R13 32
#define EX_SRR0 40
-#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
#define EX_DAR 48
-#define EX_LR 48 /* SLB miss saves LR, but not DAR */
#define EX_DSISR 56
#define EX_CCR 60
+#define EX_R3 64
+#define EX_LR 72
#define EXCEPTION_PROLOG_PSERIES(area, label) \
mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
@@ -419,17 +419,22 @@ data_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_DAR
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x400, instruction_access)
@@ -440,17 +445,22 @@ instruction_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -509,6 +519,38 @@ _GLOBAL(do_stab_bolted_pSeries)
EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
/*
+ * We have some room here we use that to put
+ * the peries slb miss user trampoline code so it's reasonably
+ * away from slb_miss_user_common to avoid problems with rfid
+ *
+ * This is used for when the SLB miss handler has to go virtual,
+ * which doesn't happen for now anymore but will once we re-implement
+ * dynamic VSIDs for shared page tables
+ */
+#ifdef __DISABLED__
+slb_miss_user_pseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ clrrdi r12,r13,32
+ mfmsr r10
+ mfspr r11,SRR0 /* save SRR0 */
+ ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
+ ori r10,r10,MSR_IR|MSR_DR|MSR_RI
+ mtspr SRR0,r12
+ mfspr r12,SRR1 /* and SRR1 */
+ mtspr SRR1,r10
+ rfid
+ b . /* prevent spec. execution */
+#endif /* __DISABLED__ */
+
+/*
* Vectors for the FWNMI option. Share common code.
*/
.globl system_reset_fwnmi
@@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
.globl data_access_slb_iSeries
data_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
mfspr r3,SPRN_DAR
- b .do_slb_miss
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
.globl instruction_access_slb_iSeries
instruction_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
- ld r3,PACALPPACA+LPPACASRR0(r13)
- b .do_slb_miss
+ ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge .slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
+
+#ifdef __DISABLED__
+slb_miss_user_iseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ EXCEPTION_PROLOG_ISERIES_2
+ b slb_miss_user_common
+#endif
MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +888,126 @@ instruction_access_common:
li r5,0x400
b .do_hash_page /* Try to handle as hpte fault */
+/*
+ * Here is the common SLB miss user that is used when going to virtual
+ * mode for SLB misses, that is currently not used
+ */
+#ifdef __DISABLED__
+ .align 7
+ .globl slb_miss_user_common
+slb_miss_user_common:
+ mflr r10
+ std r3,PACA_EXGEN+EX_DAR(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ std r10,PACA_EXGEN+EX_LR(r13)
+ std r11,PACA_EXGEN+EX_SRR0(r13)
+ bl .slb_allocate_user
+
+ ld r10,PACA_EXGEN+EX_LR(r13)
+ ld r3,PACA_EXGEN+EX_R3(r13)
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ ld r11,PACA_EXGEN+EX_SRR0(r13)
+ mtlr r10
+ beq- slb_miss_fault
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_user_slb
+ mfmsr r10
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+.machine pop
+
+ clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
+ mtmsrd r10,1
+
+ mtspr SRR0,r11
+ mtspr SRR1,r12
+
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ rfid
+ b .
+
+slb_miss_fault:
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
+ ld r4,PACA_EXGEN+EX_DAR(r13)
+ li r5,0
+ std r4,_DAR(r1)
+ std r5,_DSISR(r1)
+ b .handle_page_fault
+
+unrecov_user_slb:
+ EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(slb_miss_realmode)
+ mflr r10
+
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+
+ bl .slb_allocate_realmode
+
+ /* All done -- return from exception. */
+
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+#ifdef CONFIG_PPC_ISERIES
+ ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+#endif /* CONFIG_PPC_ISERIES */
+
+ mtlr r10
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_slb
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+#ifdef CONFIG_PPC_ISERIES
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+#endif /* CONFIG_PPC_ISERIES */
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
+
+unrecov_slb:
+ EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
.align 7
.globl hardware_interrupt_common
.globl hardware_interrupt_entry
@@ -1139,62 +1338,6 @@ _GLOBAL(do_stab_bolted)
b . /* prevent speculative execution */
/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(do_slb_miss)
- mflr r10
-
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- bl .slb_allocate /* handle it */
-
- /* All done -- return from exception. */
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
- ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
-#endif /* CONFIG_PPC_ISERIES */
-
- mtlr r10
-
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_slb
-
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
-
-#ifdef CONFIG_PPC_ISERIES
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-#endif /* CONFIG_PPC_ISERIES */
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-
-unrecov_slb:
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- DISABLE_INTS
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
-/*
* Space for CPU0's segment table.
*
* On iSeries, the hypervisor must fill in at least one entry before
@@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start)
#endif
/* Initialize the first segment table (or SLB) entry */
ld r3,PACASTABVIRT(r13) /* get addr of segment table */
+BEGIN_FTR_SECTION
bl .stab_initialize
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+ bl .slb_initialize
/* Initialize the kernel stack. Just a repeat for iSeries. */
LOADADDR(r3,current_set)
diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c
index 5e27e5a6a35..3133c72b28e 100644
--- a/arch/ppc64/kernel/pacaData.c
+++ b/arch/ppc64/kernel/pacaData.c
@@ -23,7 +23,7 @@
static union {
struct systemcfg data;
u8 page[PAGE_SIZE];
-} systemcfg_store __page_aligned;
+} systemcfg_store __attribute__((__section__(".data.page.aligned")));
struct systemcfg *systemcfg = &systemcfg_store.data;
EXPORT_SYMBOL(systemcfg);
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
index 97bfceb5353..dece31e58bc 100644
--- a/arch/ppc64/kernel/prom.c
+++ b/arch/ppc64/kernel/prom.c
@@ -635,10 +635,10 @@ static inline char *find_flat_dt_string(u32 offset)
* used to extract the memory informations at boot before we can
* unflatten the tree
*/
-static int __init scan_flat_dt(int (*it)(unsigned long node,
- const char *uname, int depth,
- void *data),
- void *data)
+int __init of_scan_flat_dt(int (*it)(unsigned long node,
+ const char *uname, int depth,
+ void *data),
+ void *data)
{
unsigned long p = ((unsigned long)initial_boot_params) +
initial_boot_params->off_dt_struct;
@@ -695,8 +695,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
* This function can be used within scan_flattened_dt callback to get
* access to properties
*/
-static void* __init get_flat_dt_prop(unsigned long node, const char *name,
- unsigned long *size)
+void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+ unsigned long *size)
{
unsigned long p = node;
@@ -996,7 +996,7 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
unsigned long size;
@@ -1004,17 +1004,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
- /* On LPAR, look for the first ibm,pft-size property for the hash table size
- */
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
- u32 *pft_size;
- pft_size = (u32 *)get_flat_dt_prop(node, "ibm,pft-size", NULL);
- if (pft_size != NULL) {
- /* pft_size[0] is the NUMA CEC cookie */
- ppc64_pft_size = pft_size[1];
- }
- }
-
if (initial_boot_params && initial_boot_params->version >= 2) {
/* version 2 of the kexec param format adds the phys cpuid
* of booted proc.
@@ -1023,8 +1012,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
boot_cpuid = 0;
} else {
/* Check if it's the boot-cpu, set it's hw index in paca now */
- if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
- u32 *prop = get_flat_dt_prop(node, "reg", NULL);
+ if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL)
+ != NULL) {
+ u32 *prop = of_get_flat_dt_prop(node, "reg", NULL);
set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop);
boot_cpuid_phys = get_hard_smp_processor_id(0);
}
@@ -1032,14 +1022,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
- prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
}
/* Same goes for Apple's "altivec" property */
- prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
if (prop) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1051,7 +1041,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* this by looking at the size of the ibm,ppc-interrupt-server#s
* property
*/
- prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
&size);
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
if (prop && ((size / sizeof(u32)) > 1))
@@ -1072,26 +1062,26 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
return 0;
/* get platform type */
- prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
systemcfg->platform = *prop;
/* check if iommu is forced on or off */
- if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
iommu_is_off = 1;
- if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
iommu_force_on = 1;
- prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL);
+ prop64 = (u64*)of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
if (prop64)
memory_limit = *prop64;
- prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+ prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-start",NULL);
if (prop64)
tce_alloc_start = *prop64;
- prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+ prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
if (prop64)
tce_alloc_end = *prop64;
@@ -1102,9 +1092,12 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
{
u64 *basep, *entryp;
- basep = (u64*)get_flat_dt_prop(node, "linux,rtas-base", NULL);
- entryp = (u64*)get_flat_dt_prop(node, "linux,rtas-entry", NULL);
- prop = (u32*)get_flat_dt_prop(node, "linux,rtas-size", NULL);
+ basep = (u64*)of_get_flat_dt_prop(node,
+ "linux,rtas-base", NULL);
+ entryp = (u64*)of_get_flat_dt_prop(node,
+ "linux,rtas-entry", NULL);
+ prop = (u32*)of_get_flat_dt_prop(node,
+ "linux,rtas-size", NULL);
if (basep && entryp && prop) {
rtas.base = *basep;
rtas.entry = *entryp;
@@ -1125,11 +1118,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
if (depth != 0)
return 0;
- prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "#size-cells", NULL);
dt_root_size_cells = (prop == NULL) ? 1 : *prop;
DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
- prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "#address-cells", NULL);
dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
@@ -1161,7 +1154,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
static int __init early_init_dt_scan_memory(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
cell_t *reg, *endp;
unsigned long l;
@@ -1169,7 +1162,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
if (type == NULL || strcmp(type, "memory") != 0)
return 0;
- reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
+ reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
@@ -1225,19 +1218,16 @@ void __init early_init_devtree(void *params)
/* Setup flat device-tree pointer */
initial_boot_params = params;
- /* By default, hash size is not set */
- ppc64_pft_size = 0;
-
/* Retreive various informations from the /chosen node of the
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
*/
- scan_flat_dt(early_init_dt_scan_chosen, NULL);
+ of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
/* Scan memory nodes and rebuild LMBs */
lmb_init();
- scan_flat_dt(early_init_dt_scan_root, NULL);
- scan_flat_dt(early_init_dt_scan_memory, NULL);
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
systemcfg->physicalMemorySize = lmb_phys_mem_size();
@@ -1253,26 +1243,8 @@ void __init early_init_devtree(void *params)
/* Retreive hash table size from flattened tree plus other
* CPU related informations (altivec support, boot CPU ID, ...)
*/
- scan_flat_dt(early_init_dt_scan_cpus, NULL);
-
- /* If hash size wasn't obtained above, we calculate it now based on
- * the total RAM size
- */
- if (ppc64_pft_size == 0) {
- unsigned long rnd_mem_size, pteg_count;
-
- /* round mem_size up to next power of 2 */
- rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
- if (rnd_mem_size < systemcfg->physicalMemorySize)
- rnd_mem_size <<= 1;
-
- /* # pages / 2 */
- pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11);
-
- ppc64_pft_size = __ilog2(pteg_count << 7);
- }
+ of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
- DBG("Hash pftSize: %x\n", (int)ppc64_pft_size);
DBG(" <- early_init_devtree()\n");
}