diff options
62 files changed, 1867 insertions, 368 deletions
diff --git a/Documentation/powerpc/phyp-assisted-dump.txt b/Documentation/powerpc/phyp-assisted-dump.txt new file mode 100644 index 00000000000..c4682b982a2 --- /dev/null +++ b/Documentation/powerpc/phyp-assisted-dump.txt @@ -0,0 +1,127 @@ + + Hypervisor-Assisted Dump + ------------------------ + November 2007 + +The goal of hypervisor-assisted dump is to enable the dump of +a crashed system, and to do so from a fully-reset system, and +to minimize the total elapsed time until the system is back +in production use. + +As compared to kdump or other strategies, hypervisor-assisted +dump offers several strong, practical advantages: + +-- Unlike kdump, the system has been reset, and loaded + with a fresh copy of the kernel. In particular, + PCI and I/O devices have been reinitialized and are + in a clean, consistent state. +-- As the dump is performed, the dumped memory becomes + immediately available to the system for normal use. +-- After the dump is completed, no further reboots are + required; the system will be fully usable, and running + in it's normal, production mode on it normal kernel. + +The above can only be accomplished by coordination with, +and assistance from the hypervisor. The procedure is +as follows: + +-- When a system crashes, the hypervisor will save + the low 256MB of RAM to a previously registered + save region. It will also save system state, system + registers, and hardware PTE's. + +-- After the low 256MB area has been saved, the + hypervisor will reset PCI and other hardware state. + It will *not* clear RAM. It will then launch the + bootloader, as normal. + +-- The freshly booted kernel will notice that there + is a new node (ibm,dump-kernel) in the device tree, + indicating that there is crash data available from + a previous boot. It will boot into only 256MB of RAM, + reserving the rest of system memory. + +-- Userspace tools will parse /sys/kernel/release_region + and read /proc/vmcore to obtain the contents of memory, + which holds the previous crashed kernel. The userspace + tools may copy this info to disk, or network, nas, san, + iscsi, etc. as desired. + + For Example: the values in /sys/kernel/release-region + would look something like this (address-range pairs). + CPU:0x177fee000-0x10000: HPTE:0x177ffe020-0x1000: / + DUMP:0x177fff020-0x10000000, 0x10000000-0x16F1D370A + +-- As the userspace tools complete saving a portion of + dump, they echo an offset and size to + /sys/kernel/release_region to release the reserved + memory back to general use. + + An example of this is: + "echo 0x40000000 0x10000000 > /sys/kernel/release_region" + which will release 256MB at the 1GB boundary. + +Please note that the hypervisor-assisted dump feature +is only available on Power6-based systems with recent +firmware versions. + +Implementation details: +---------------------- + +During boot, a check is made to see if firmware supports +this feature on this particular machine. If it does, then +we check to see if a active dump is waiting for us. If yes +then everything but 256 MB of RAM is reserved during early +boot. This area is released once we collect a dump from user +land scripts that are run. If there is dump data, then +the /sys/kernel/release_region file is created, and +the reserved memory is held. + +If there is no waiting dump data, then only the highest +256MB of the ram is reserved as a scratch area. This area +is *not* released: this region will be kept permanently +reserved, so that it can act as a receptacle for a copy +of the low 256MB in the case a crash does occur. See, +however, "open issues" below, as to whether +such a reserved region is really needed. + +Currently the dump will be copied from /proc/vmcore to a +a new file upon user intervention. The starting address +to be read and the range for each data point in provided +in /sys/kernel/release_region. + +The tools to examine the dump will be same as the ones +used for kdump. + +General notes: +-------------- +Security: please note that there are potential security issues +with any sort of dump mechanism. In particular, plaintext +(unencrypted) data, and possibly passwords, may be present in +the dump data. Userspace tools must take adequate precautions to +preserve security. + +Open issues/ToDo: +------------ + o The various code paths that tell the hypervisor that a crash + occurred, vs. it simply being a normal reboot, should be + reviewed, and possibly clarified/fixed. + + o Instead of using /sys/kernel, should there be a /sys/dump + instead? There is a dump_subsys being created by the s390 code, + perhaps the pseries code should use a similar layout as well. + + o Is reserving a 256MB region really required? The goal of + reserving a 256MB scratch area is to make sure that no + important crash data is clobbered when the hypervisor + save low mem to the scratch area. But, if one could assure + that nothing important is located in some 256MB area, then + it would not need to be reserved. Something that can be + improved in subsequent versions. + + o Still working the kdump team to integrate this with kdump, + some work remains but this would not affect the current + patches. + + o Still need to write a shell script, to copy the dump away. + Currently I am parsing it manually. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1189d8d6170..3651355b6ad 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -91,6 +91,7 @@ config PPC select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_LMB config EARLY_PRINTK bool @@ -307,6 +308,16 @@ config CRASH_DUMP Don't change this unless you know what you are doing. +config PHYP_DUMP + bool "Hypervisor-assisted dump (EXPERIMENTAL)" + depends on PPC_PSERIES && EXPERIMENTAL + help + Hypervisor-assisted dump is meant to be a kdump replacement + offering robustness and speed not possible without system + hypervisor assistence. + + If unsure, say "N" + config PPCBUG_NVRAM bool "Enable reading PPCBUG NVRAM during boot" if PPLUS || LOPEC default y if PPC_PREP diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 8b810d05644..7a166a39d92 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -333,7 +333,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set +CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set @@ -374,6 +374,7 @@ CONFIG_BLK_DEV_IDEDISK=y CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDETAPE is not set # CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_BLK_DEV_IDESCSI is not set CONFIG_IDE_TASK_IOCTL=y CONFIG_IDE_PROC_FS=y @@ -427,10 +428,129 @@ CONFIG_IDE_ARCH_OBSOLETE_INIT=y # SCSI device support # # CONFIG_RAID_ATTRS is not set -# CONFIG_SCSI is not set -# CONFIG_SCSI_DMA is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set # CONFIG_SCSI_NETLINK is not set -# CONFIG_ATA is not set +# CONFIG_SCSI_PROC_FS is not set + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +# CONFIG_BLK_DEV_SR is not set +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +CONFIG_SCSI_IPR=y +CONFIG_SCSI_IPR_TRACE=y +CONFIG_SCSI_IPR_DUMP=y +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set +CONFIG_ATA=y +CONFIG_ATA_NONSTANDARD=y +# CONFIG_SATA_AHCI is not set +# CONFIG_SATA_SVW is not set +# CONFIG_ATA_PIIX is not set +# CONFIG_SATA_MV is not set +# CONFIG_SATA_NV is not set +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +# CONFIG_SATA_SIL is not set +# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +# CONFIG_SATA_VIA is not set +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set # CONFIG_MD is not set # CONFIG_FUSION is not set @@ -536,6 +656,7 @@ CONFIG_USB_PEGASUS=y # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set +# CONFIG_NET_FC is not set # CONFIG_SHAPER is not set # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set @@ -783,12 +904,14 @@ CONFIG_USB_UHCI_HCD=y # # may also be needed; see USB_STORAGE Help for more information # +# CONFIG_USB_STORAGE is not set # CONFIG_USB_LIBUSUAL is not set # # USB Imaging devices # # CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set CONFIG_USB_MON=y # diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4b749c41646..e932b43bd82 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -26,8 +26,6 @@ #ifdef CONFIG_PPC64 #include <linux/time.h> #include <linux/hardirq.h> -#else -#include <linux/ptrace.h> #endif #include <asm/io.h> @@ -60,7 +58,6 @@ int main(void) DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); - DEFINE(PTRACE, offsetof(struct task_struct, ptrace)); #endif /* CONFIG_PPC64 */ DEFINE(KSP, offsetof(struct thread_struct, ksp)); @@ -80,7 +77,6 @@ int main(void) DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); - DEFINE(PT_PTRACED, PT_PTRACED); #endif #ifdef CONFIG_SPE DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 80e2eef05b2..9f937774549 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -7,6 +7,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/lmb.h> #include <asm/sections.h> #include <asm/prom.h> @@ -15,7 +16,7 @@ #include <asm/mmu.h> #include <asm/pgtable.h> #include <asm/io.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/processor.h> #include <asm/udbg.h> diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 571132ed12c..eae401de3f7 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -24,12 +24,13 @@ #include <linux/init.h> #include <linux/irq.h> #include <linux/types.h> +#include <linux/lmb.h> #include <asm/processor.h> #include <asm/machdep.h> #include <asm/kexec.h> #include <asm/kdump.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/firmware.h> #include <asm/smp.h> #include <asm/system.h> diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 29ff77c468a..9ee3c5278db 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,8 +13,9 @@ #include <linux/crash_dump.h> #include <linux/bootmem.h> +#include <linux/lmb.h> #include <asm/kdump.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/firmware.h> #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 0f4fac51202..c16d1354b19 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -763,23 +763,6 @@ load_up_altivec: b fast_exception_return /* - * AltiVec unavailable trap from kernel - print a message, but let - * the task use AltiVec in the kernel until it returns to user mode. - */ -KernelAltiVec: - lwz r3,_MSR(r1) - oris r3,r3,MSR_VEC@h - stw r3,_MSR(r1) /* enable use of AltiVec after return */ - lis r3,87f@h - ori r3,r3,87f@l - mr r4,r2 /* current */ - lwz r5,_NIP(r1) - bl printk - b ret_from_except -87: .string "AltiVec used in kernel (task=%p, pc=%x) \n" - .align 4,0 - -/* * giveup_altivec(tsk) * Disable AltiVec for the task given as the argument, * and save the AltiVec registers in its thread_struct. diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index c0c8e8c3ced..2d202f274e7 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -12,8 +12,9 @@ #include <linux/kexec.h> #include <linux/reboot.h> #include <linux/threads.h> +#include <linux/lmb.h> #include <asm/machdep.h> -#include <asm/lmb.h> +#include <asm/prom.h> void machine_crash_shutdown(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 9c98424277a..a722ede726d 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -58,7 +58,6 @@ extern void program_check_exception(struct pt_regs *regs); extern void single_step_exception(struct pt_regs *regs); extern int sys_sigreturn(struct pt_regs *regs); -EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(clear_pages); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(ISA_DMA_THRESHOLD); @@ -192,3 +191,4 @@ EXPORT_SYMBOL(intercept_table); EXPORT_SYMBOL(__mtdcr); EXPORT_SYMBOL(__mfdcr); #endif +EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 59311ec0d42..8d506d86e2d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -868,11 +868,6 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, flush_spe_to_thread(current); error = do_execve(filename, (char __user * __user *) a1, (char __user * __user *) a2, regs); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); - } putname(filename); out: return error; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index eac97f48b9b..9330920265f 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -31,10 +31,10 @@ #include <linux/kexec.h> #include <linux/debugfs.h> #include <linux/irq.h> +#include <linux/lmb.h> #include <asm/prom.h> #include <asm/rtas.h> -#include <asm/lmb.h> #include <asm/page.h> #include <asm/processor.h> #include <asm/irq.h> @@ -51,6 +51,7 @@ #include <asm/machdep.h> #include <asm/pSeries_reconfig.h> #include <asm/pci-bridge.h> +#include <asm/phyp_dump.h> #include <asm/kexec.h> #ifdef DEBUG @@ -1040,6 +1041,56 @@ static void __init early_reserve_mem(void) #endif } +#ifdef CONFIG_PHYP_DUMP +/** + * phyp_dump_reserve_mem() - reserve all not-yet-dumped mmemory + * + * This routine may reserve memory regions in the kernel only + * if the system is supported and a dump was taken in last + * boot instance or if the hardware is supported and the + * scratch area needs to be setup. In other instances it returns + * without reserving anything. The memory in case of dump being + * active is freed when the dump is collected (by userland tools). + */ +static void __init phyp_dump_reserve_mem(void) +{ + unsigned long base, size; + if (!phyp_dump_info->phyp_dump_configured) { + printk(KERN_ERR "Phyp-dump not supported on this hardware\n"); + return; + } + + if (!phyp_dump_info->phyp_dump_at_boot) { + printk(KERN_INFO "Phyp-dump disabled at boot time\n"); + return; + } + + if (phyp_dump_info->phyp_dump_is_active) { + /* Reserve *everything* above RMR.Area freed by userland tools*/ + base = PHYP_DUMP_RMR_END; + size = lmb_end_of_DRAM() - base; + + /* XXX crashed_ram_end is wrong, since it may be beyond + * the memory_limit, it will need to be adjusted. */ + lmb_reserve(base, size); + + phyp_dump_info->init_reserve_start = base; + phyp_dump_info->init_reserve_size = size; + } else { + size = phyp_dump_info->cpu_state_size + + phyp_dump_info->hpte_region_size + + PHYP_DUMP_RMR_END; + base = lmb_end_of_DRAM() - size; + lmb_reserve(base, size); + phyp_dump_info->init_reserve_start = base; + phyp_dump_info->init_reserve_size = size; + } +} +#else +static inline void __init phyp_dump_reserve_mem(void) {} +#endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ + + void __init early_init_devtree(void *params) { DBG(" -> early_init_devtree(%p)\n", params); @@ -1052,6 +1103,11 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_rtas, NULL); #endif +#ifdef CONFIG_PHYP_DUMP + /* scan tree to see if dump occured during last boot */ + of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); +#endif + /* Retrieve various informations from the /chosen node of the * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... @@ -1072,6 +1128,7 @@ void __init early_init_devtree(void *params) reserve_kdump_trampoline(); reserve_crashkernel(); early_reserve_mem(); + phyp_dump_reserve_mem(); lmb_enforce_memory_limit(memory_limit); lmb_analyze(); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 52e95c2158c..e2e78d967f3 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -22,6 +22,7 @@ #include <linux/smp.h> #include <linux/completion.h> #include <linux/cpumask.h> +#include <linux/lmb.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -34,7 +35,6 @@ #include <asm/system.h> #include <asm/delay.h> #include <asm/uaccess.h> -#include <asm/lmb.h> #include <asm/udbg.h> #include <asm/syscalls.h> #include <asm/smp.h> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 6adb5a1e98b..db540eab09f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -34,6 +34,7 @@ #include <linux/serial_8250.h> #include <linux/debugfs.h> #include <linux/percpu.h> +#include <linux/lmb.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/processor.h> @@ -56,7 +57,6 @@ #include <asm/cache.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/lmb.h> #include <asm/xmon.h> #include <asm/cputhreads.h> @@ -167,6 +167,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short min; if (cpu_id == NR_CPUS) { + struct device_node *root; + const char *model = NULL; #if defined(CONFIG_SMP) && defined(CONFIG_PPC32) unsigned long bogosum = 0; int i; @@ -178,6 +180,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); if (ppc_md.name) seq_printf(m, "platform\t: %s\n", ppc_md.name); + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + if (model) + seq_printf(m, "model\t\t: %s\n", model); + of_node_put(root); + if (ppc_md.show_cpuinfo != NULL) ppc_md.show_cpuinfo(m); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 3b1529c103e..2c2d8315193 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -33,6 +33,7 @@ #include <linux/serial_8250.h> #include <linux/bootmem.h> #include <linux/pci.h> +#include <linux/lmb.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -55,7 +56,6 @@ #include <asm/cache.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/lmb.h> #include <asm/firmware.h> #include <asm/xmon.h> #include <asm/udbg.h> diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 4a4f5c6b560..9c3371e6958 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -368,11 +368,6 @@ long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, error = compat_do_execve(filename, compat_ptr(a1), compat_ptr(a2), regs); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); - } putname(filename); out: diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d3437c4c4a6..c21a626af67 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -21,13 +21,14 @@ #include <linux/elf.h> #include <linux/security.h> #include <linux/bootmem.h> +#include <linux/lmb.h> #include <asm/pgtable.h> #include <asm/system.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/machdep.h> #include <asm/cputable.h> #include <asm/sections.h> diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 41649a5d360..1c00e0196f6 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif -obj-y := fault.o mem.o lmb.o \ +obj-y := fault.o mem.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o \ mmu_context_$(CONFIG_WORD_SIZE).o diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a83dfa3cf40..c2e5f61788b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -31,6 +31,7 @@ #include <linux/cache.h> #include <linux/init.h> #include <linux/signal.h> +#include <linux/lmb.h> #include <asm/processor.h> #include <asm/pgtable.h> @@ -41,7 +42,7 @@ #include <asm/system.h> #include <asm/uaccess.h> #include <asm/machdep.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/abs_addr.h> #include <asm/tlbflush.h> #include <asm/io.h> @@ -191,6 +192,24 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, return ret < 0 ? ret : 0; } +static void htab_remove_mapping(unsigned long vstart, unsigned long vend, + int psize, int ssize) +{ + unsigned long vaddr; + unsigned int step, shift; + + shift = mmu_psize_defs[psize].shift; + step = 1 << shift; + + if (!ppc_md.hpte_removebolted) { + printk("Sub-arch doesn't implement hpte_removebolted\n"); + return; + } + + for (vaddr = vstart; vaddr < vend; vaddr += step) + ppc_md.hpte_removebolted(vaddr, psize, ssize); +} + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -434,6 +453,11 @@ void create_section_mapping(unsigned long start, unsigned long end) _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX, mmu_linear_psize, mmu_kernel_ssize)); } + +void remove_section_mapping(unsigned long start, unsigned long end) +{ + htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); +} #endif /* CONFIG_MEMORY_HOTPLUG */ static inline void make_bl(unsigned int *insn_addr, void *func) diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 977cb1ee5e7..59a725b8ece 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -30,6 +30,7 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/pagemap.h> +#include <linux/lmb.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -41,7 +42,6 @@ #include <asm/machdep.h> #include <asm/btext.h> #include <asm/tlb.h> -#include <asm/lmb.h> #include <asm/sections.h> #include "mmu_decl.h" diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index c0f5cff7703..abeb0eb7931 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -38,11 +38,11 @@ #include <linux/nodemask.h> #include <linux/module.h> #include <linux/poison.h> +#include <linux/lmb.h> #include <asm/pgalloc.h> #include <asm/page.h> #include <asm/prom.h> -#include <asm/lmb.h> #include <asm/rtas.h> #include <asm/io.h> #include <asm/mmu_context.h> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index be5c506779a..60c019cdc69 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -31,6 +31,7 @@ #include <linux/initrd.h> #include <linux/pagemap.h> #include <linux/suspend.h> +#include <linux/lmb.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -42,7 +43,6 @@ #include <asm/machdep.h> #include <asm/btext.h> #include <asm/tlb.h> -#include <asm/lmb.h> #include <asm/sections.h> #include <asm/vdso.h> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index a300d254aac..1efd631211e 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -17,8 +17,9 @@ #include <linux/nodemask.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/lmb.h> #include <asm/sparsemem.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/system.h> #include <asm/smp.h> diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 5c45d474cfc..72de3c79210 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -26,11 +26,11 @@ #include <linux/mm.h> #include <linux/init.h> #include <linux/highmem.h> +#include <linux/lmb.h> #include <asm/prom.h> #include <asm/mmu.h> #include <asm/machdep.h> -#include <asm/lmb.h> #include "mmu_decl.h" diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 50448d5de9d..efbbd13d93e 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -12,12 +12,14 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/lmb.h> + #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/paca.h> #include <asm/cputable.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/abs_addr.h> #include <asm/firmware.h> #include <asm/iseries/hv_call.h> diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 184f998d1be..0d9f75c74f8 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -111,17 +111,12 @@ void __init mpc8xx_calibrate_decr(void) /* Processor frequency is MHz. */ - ppc_tb_freq = 50000000; - if (!get_freq("bus-frequency", &ppc_tb_freq)) { - printk(KERN_ERR "WARNING: Estimating decrementer frequency " - "(not found)\n"); - } - ppc_tb_freq /= 16; ppc_proc_freq = 50000000; if (!get_freq("clock-frequency", &ppc_proc_freq)) printk(KERN_ERR "WARNING: Estimating processor frequency " "(not found)\n"); + ppc_tb_freq = ppc_proc_freq / 16; printk("Decrementer Frequency = 0x%lx\n", ppc_tb_freq); /* Perform some more timer/timebase initialization. This used diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index d75ccded7f1..49fe641d434 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -28,13 +28,13 @@ #include <linux/notifier.h> #include <linux/of.h> #include <linux/of_platform.h> +#include <linux/lmb.h> #include <asm/prom.h> #include <asm/iommu.h> #include <asm/machdep.h> #include <asm/pci-bridge.h> #include <asm/udbg.h> -#include <asm/lmb.h> #include <asm/firmware.h> #include <asm/cell-regs.h> diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c index cc562e4c2f3..02a634faedb 100644 --- a/arch/powerpc/platforms/iseries/pci.c +++ b/arch/powerpc/platforms/iseries/pci.c @@ -23,6 +23,7 @@ #undef DEBUG +#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/string.h> @@ -586,7 +587,7 @@ static inline struct device_node *xlate_iomm_address( static unsigned long last_jiffies; static int num_printed; - if ((jiffies - last_jiffies) > 60 * HZ) { + if (time_after(jiffies, last_jiffies + 60 * HZ)) { last_jiffies = jiffies; num_printed = 0; } diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index 3ffa0ac170e..301855263b8 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -592,50 +592,3 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) } return irq; } - -/* XXX: To remove once all firmwares are ok */ -static void fixup_maple_ide(struct pci_dev* dev) -{ - if (!machine_is(maple)) - return; - -#if 0 /* Enable this to enable IDE port 0 */ - { - u8 v; - - pci_read_config_byte(dev, 0x40, &v); - v |= 2; - pci_write_config_byte(dev, 0x40, v); - } -#endif -#if 0 /* fix bus master base */ - pci_write_config_dword(dev, 0x20, 0xcc01); - printk("old ide resource: %lx -> %lx \n", - dev->resource[4].start, dev->resource[4].end); - dev->resource[4].start = 0xcc00; - dev->resource[4].end = 0xcc10; -#endif -#if 0 /* Enable this to fixup IDE sense/polarity of irqs in IO-APICs */ - { - struct pci_dev *apicdev; - u32 v; - - apicdev = pci_get_slot (dev->bus, PCI_DEVFN(5,0)); - if (apicdev == NULL) - printk("IDE Fixup IRQ: Can't find IO-APIC !\n"); - else { - pci_write_config_byte(apicdev, 0xf2, 0x10 + 2*14); - pci_read_config_dword(apicdev, 0xf4, &v); - v &= ~0x00000022; - pci_write_config_dword(apicdev, 0xf4, v); - pci_write_config_byte(apicdev, 0xf2, 0x10 + 2*15); - pci_read_config_dword(apicdev, 0xf4, &v); - v &= ~0x00000022; - pci_write_config_dword(apicdev, 0xf4, v); - pci_dev_put(apicdev); - } - } -#endif -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_IDE, - fixup_maple_ide); diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 3ce2d73b417..364714757cf 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -43,6 +43,7 @@ #include <linux/smp.h> #include <linux/bitops.h> #include <linux/of_device.h> +#include <linux/lmb.h> #include <asm/processor.h> #include <asm/sections.h> @@ -57,7 +58,6 @@ #include <asm/dma.h> #include <asm/cputable.h> #include <asm/time.h> -#include <asm/lmb.h> #include <asm/mpic.h> #include <asm/rtas.h> #include <asm/udbg.h> @@ -319,7 +319,7 @@ static int __init maple_probe(void) return 1; } -define_machine(maple_md) { +define_machine(maple) { .name = "Maple", .probe = maple_probe, .setup_arch = maple_setup_arch, diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index c529d8dff39..217af321b0c 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/pci.h> @@ -26,6 +27,8 @@ #define MAX_TXCH 64 #define MAX_RXCH 64 +#define MAX_FLAGS 64 +#define MAX_FUN 8 static struct pasdma_status *dma_status; @@ -43,6 +46,8 @@ static struct pci_dev *dma_pdev; static DECLARE_BITMAP(txch_free, MAX_TXCH); static DECLARE_BITMAP(rxch_free, MAX_RXCH); +static DECLARE_BITMAP(flags_free, MAX_FLAGS); +static DECLARE_BITMAP(fun_free, MAX_FUN); /* pasemi_read_iob_reg - read IOB register * @reg: Register to read (offset into PCI CFG space) @@ -373,6 +378,106 @@ void pasemi_dma_free_buf(struct pasemi_dmachan *chan, int size, } EXPORT_SYMBOL(pasemi_dma_free_buf); +/* pasemi_dma_alloc_flag - Allocate a flag (event) for channel syncronization + * + * Allocates a flag for use with channel syncronization (event descriptors). + * Returns allocated flag (0-63), < 0 on error. + */ +int pasemi_dma_alloc_flag(void) +{ + int bit; + +retry: + bit = find_next_bit(flags_free, MAX_FLAGS, 0); + if (bit >= MAX_FLAGS) + return -ENOSPC; + if (!test_and_clear_bit(bit, flags_free)) + goto retry; + + return bit; +} +EXPORT_SYMBOL(pasemi_dma_alloc_flag); + + +/* pasemi_dma_free_flag - Deallocates a flag (event) + * @flag: Flag number to deallocate + * + * Frees up a flag so it can be reused for other purposes. + */ +void pasemi_dma_free_flag(int flag) +{ + BUG_ON(test_bit(flag, flags_free)); + BUG_ON(flag >= MAX_FLAGS); + set_bit(flag, flags_free); +} +EXPORT_SYMBOL(pasemi_dma_free_flag); + + +/* pasemi_dma_set_flag - Sets a flag (event) to 1 + * @flag: Flag number to set active + * + * Sets the flag provided to 1. + */ +void pasemi_dma_set_flag(int flag) +{ + BUG_ON(flag >= MAX_FLAGS); + if (flag < 32) + pasemi_write_dma_reg(PAS_DMA_TXF_SFLG0, 1 << flag); + else + pasemi_write_dma_reg(PAS_DMA_TXF_SFLG1, 1 << flag); +} +EXPORT_SYMBOL(pasemi_dma_set_flag); + +/* pasemi_dma_clear_flag - Sets a flag (event) to 0 + * @flag: Flag number to set inactive + * + * Sets the flag provided to 0. + */ +void pasemi_dma_clear_flag(int flag) +{ + BUG_ON(flag >= MAX_FLAGS); + if (flag < 32) + pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 1 << flag); + else + pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 1 << flag); +} +EXPORT_SYMBOL(pasemi_dma_clear_flag); + +/* pasemi_dma_alloc_fun - Allocate a function engine + * + * Allocates a function engine to use for crypto/checksum offload + * Returns allocated engine (0-8), < 0 on error. + */ +int pasemi_dma_alloc_fun(void) +{ + int bit; + +retry: + bit = find_next_bit(fun_free, MAX_FLAGS, 0); + if (bit >= MAX_FLAGS) + return -ENOSPC; + if (!test_and_clear_bit(bit, fun_free)) + goto retry; + + return bit; +} +EXPORT_SYMBOL(pasemi_dma_alloc_fun); + + +/* pasemi_dma_free_fun - Deallocates a function engine + * @flag: Engine number to deallocate + * + * Frees up a function engine so it can be used for other purposes. + */ +void pasemi_dma_free_fun(int fun) +{ + BUG_ON(test_bit(fun, fun_free)); + BUG_ON(fun >= MAX_FLAGS); + set_bit(fun, fun_free); +} +EXPORT_SYMBOL(pasemi_dma_free_fun); + + static void *map_onedev(struct pci_dev *p, int index) { struct device_node *dn; @@ -410,6 +515,7 @@ int pasemi_dma_init(void) struct resource res; struct device_node *dn; int i, intf, err = 0; + unsigned long timeout; u32 tmp; if (!machine_is(pasemi)) @@ -478,6 +584,44 @@ int pasemi_dma_init(void) for (i = 0; i < MAX_RXCH; i++) __set_bit(i, rxch_free); + timeout = jiffies + HZ; + pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0); + while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) { + if (time_after(jiffies, timeout)) { + pr_warning("Warning: Could not disable RX section\n"); + break; + } + } + + timeout = jiffies + HZ; + pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0); + while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) { + if (time_after(jiffies, timeout)) { + pr_warning("Warning: Could not disable TX section\n"); + break; + } + } + + /* setup resource allocations for the different DMA sections */ + tmp = pasemi_read_dma_reg(PAS_DMA_COM_CFG); + pasemi_write_dma_reg(PAS_DMA_COM_CFG, tmp | 0x18000000); + + /* enable tx section */ + pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, PAS_DMA_COM_TXCMD_EN); + + /* enable rx section */ + pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, PAS_DMA_COM_RXCMD_EN); + + for (i = 0; i < MAX_FLAGS; i++) + __set_bit(i, flags_free); + + for (i = 0; i < MAX_FUN; i++) + __set_bit(i, fun_free); + + /* clear all status flags */ + pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 0xffffffff); + pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 0xffffffff); + printk(KERN_INFO "PA Semi PWRficient DMA library initialized " "(%d tx, %d rx channels)\n", num_txch, num_rxch); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 36ff1b6b7fa..59404baf911 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -53,6 +53,7 @@ #include <linux/suspend.h> #include <linux/of_device.h> #include <linux/of_platform.h> +#include <linux/lmb.h> #include <asm/reg.h> #include <asm/sections.h> @@ -74,7 +75,6 @@ #include <asm/iommu.h> #include <asm/smu.h> #include <asm/pmc.h> -#include <asm/lmb.h> #include <asm/udbg.h> #include "pmac.h" diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 7382f195c4f..1cf901fa903 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -19,9 +19,10 @@ */ #include <linux/kernel.h> +#include <linux/lmb.h> #include <asm/machdep.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/udbg.h> #include <asm/lv1call.h> #include <asm/ps3fb.h> diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 68900476c84..5b3fb2b321a 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -21,9 +21,10 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/memory_hotplug.h> +#include <linux/lmb.h> #include <asm/firmware.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include <asm/udbg.h> #include <asm/lv1call.h> diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index b9ea09d9d2f..c73379ec914 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -24,8 +24,9 @@ #include <linux/fs.h> #include <linux/syscalls.h> #include <linux/ctype.h> +#include <linux/lmb.h> -#include <asm/lmb.h> +#include <asm/prom.h> #include "platform.h" diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 992ba6753cf..bdae04bb7a0 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o +obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 9a455d46379..233d9be25f4 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -520,6 +520,20 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, BUG_ON(lpar_rc != H_SUCCESS); } +static void pSeries_lpar_hpte_removebolted(unsigned long ea, + int psize, int ssize) +{ + unsigned long slot, vsid, va; + + vsid = get_kernel_vsid(ea, ssize); + va = hpt_va(ea, vsid, ssize); + + slot = pSeries_lpar_hpte_find(va, psize, ssize); + BUG_ON(slot == -1); + + pSeries_lpar_hpte_invalidate(slot, va, psize, ssize, 0); +} + /* Flag bits for H_BULK_REMOVE */ #define HBR_REQUEST 0x4000000000000000UL #define HBR_RESPONSE 0x8000000000000000UL @@ -597,6 +611,7 @@ void __init hpte_init_lpar(void) ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; ppc_md.hpte_insert = pSeries_lpar_hpte_insert; ppc_md.hpte_remove = pSeries_lpar_hpte_remove; + ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; } diff --git a/arch/powerpc/platforms/pseries/phyp_dump.c b/arch/powerpc/platforms/pseries/phyp_dump.c new file mode 100644 index 00000000000..7ddd10526ce --- /dev/null +++ b/arch/powerpc/platforms/pseries/phyp_dump.c @@ -0,0 +1,498 @@ +/* + * Hypervisor-assisted dump + * + * Linas Vepstas, Manish Ahuja 2008 + * Copyright 2008 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/init.h> +#include <linux/kobject.h> +#include <linux/mm.h> +#include <linux/of.h> +#include <linux/pfn.h> +#include <linux/swap.h> +#include <linux/sysfs.h> + +#include <asm/page.h> +#include <asm/phyp_dump.h> +#include <asm/machdep.h> +#include <asm/prom.h> +#include <asm/rtas.h> + +/* Variables, used to communicate data between early boot and late boot */ +static struct phyp_dump phyp_dump_vars; +struct phyp_dump *phyp_dump_info = &phyp_dump_vars; + +static int ibm_configure_kernel_dump; +/* ------------------------------------------------- */ +/* RTAS interfaces to declare the dump regions */ + +struct dump_section { + u32 dump_flags; + u16 source_type; + u16 error_flags; + u64 source_address; + u64 source_length; + u64 length_copied; + u64 destination_address; +}; + +struct phyp_dump_header { + u32 version; + u16 num_of_sections; + u16 status; + + u32 first_offset_section; + u32 dump_disk_section; + u64 block_num_dd; + u64 num_of_blocks_dd; + u32 offset_dd; + u32 maxtime_to_auto; + /* No dump disk path string used */ + + struct dump_section cpu_data; + struct dump_section hpte_data; + struct dump_section kernel_data; +}; + +/* The dump header *must be* in low memory, so .bss it */ +static struct phyp_dump_header phdr; + +#define NUM_DUMP_SECTIONS 3 +#define DUMP_HEADER_VERSION 0x1 +#define DUMP_REQUEST_FLAG 0x1 +#define DUMP_SOURCE_CPU 0x0001 +#define DUMP_SOURCE_HPTE 0x0002 +#define DUMP_SOURCE_RMO 0x0011 +#define DUMP_ERROR_FLAG 0x2000 +#define DUMP_TRIGGERED 0x4000 +#define DUMP_PERFORMED 0x8000 + + +/** + * init_dump_header() - initialize the header declaring a dump + * Returns: length of dump save area. + * + * When the hypervisor saves crashed state, it needs to put + * it somewhere. The dump header tells the hypervisor where + * the data can be saved. + */ +static unsigned long init_dump_header(struct phyp_dump_header *ph) +{ + unsigned long addr_offset = 0; + + /* Set up the dump header */ + ph->version = DUMP_HEADER_VERSION; + ph->num_of_sections = NUM_DUMP_SECTIONS; + ph->status = 0; + + ph->first_offset_section = + (u32)offsetof(struct phyp_dump_header, cpu_data); + ph->dump_disk_section = 0; + ph->block_num_dd = 0; + ph->num_of_blocks_dd = 0; + ph->offset_dd = 0; + + ph->maxtime_to_auto = 0; /* disabled */ + + /* The first two sections are mandatory */ + ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG; + ph->cpu_data.source_type = DUMP_SOURCE_CPU; + ph->cpu_data.source_address = 0; + ph->cpu_data.source_length = phyp_dump_info->cpu_state_size; + ph->cpu_data.destination_address = addr_offset; + addr_offset += phyp_dump_info->cpu_state_size; + + ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG; + ph->hpte_data.source_type = DUMP_SOURCE_HPTE; + ph->hpte_data.source_address = 0; + ph->hpte_data.source_length = phyp_dump_info->hpte_region_size; + ph->hpte_data.destination_address = addr_offset; + addr_offset += phyp_dump_info->hpte_region_size; + + /* This section describes the low kernel region */ + ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG; + ph->kernel_data.source_type = DUMP_SOURCE_RMO; + ph->kernel_data.source_address = PHYP_DUMP_RMR_START; + ph->kernel_data.source_length = PHYP_DUMP_RMR_END; + ph->kernel_data.destination_address = addr_offset; + addr_offset += ph->kernel_data.source_length; + + return addr_offset; +} + +static void print_dump_header(const struct phyp_dump_header *ph) +{ +#ifdef DEBUG + printk(KERN_INFO "dump header:\n"); + /* setup some ph->sections required */ + printk(KERN_INFO "version = %d\n", ph->version); + printk(KERN_INFO "Sections = %d\n", ph->num_of_sections); + printk(KERN_INFO "Status = 0x%x\n", ph->status); + + /* No ph->disk, so all should be set to 0 */ + printk(KERN_INFO "Offset to first section 0x%x\n", + ph->first_offset_section); + printk(KERN_INFO "dump disk sections should be zero\n"); + printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section); + printk(KERN_INFO "block num = %ld\n", ph->block_num_dd); + printk(KERN_INFO "number of blocks = %ld\n", ph->num_of_blocks_dd); + printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd); + printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto); + + /*set cpu state and hpte states as well scratch pad area */ + printk(KERN_INFO " CPU AREA \n"); + printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags); + printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type); + printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags); + printk(KERN_INFO "cpu source_address =%lx\n", + ph->cpu_data.source_address); + printk(KERN_INFO "cpu source_length =%lx\n", + ph->cpu_data.source_length); + printk(KERN_INFO "cpu length_copied =%lx\n", + ph->cpu_data.length_copied); + + printk(KERN_INFO " HPTE AREA \n"); + printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags); + printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type); + printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags); + printk(KERN_INFO "HPTE source_address =%lx\n", + ph->hpte_data.source_address); + printk(KERN_INFO "HPTE source_length =%lx\n", + ph->hpte_data.source_length); + printk(KERN_INFO "HPTE length_copied =%lx\n", + ph->hpte_data.length_copied); + + printk(KERN_INFO " SRSD AREA \n"); + printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags); + printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type); + printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags); + printk(KERN_INFO "SRSD source_address =%lx\n", + ph->kernel_data.source_address); + printk(KERN_INFO "SRSD source_length =%lx\n", + ph->kernel_data.source_length); + printk(KERN_INFO "SRSD length_copied =%lx\n", + ph->kernel_data.length_copied); +#endif +} + +static ssize_t show_phyp_dump_active(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + + /* create filesystem entry so kdump is phyp-dump aware */ + return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot); +} + +static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600, + show_phyp_dump_active, + NULL); + +static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr) +{ + int rc; + + /* Add addr value if not initialized before */ + if (ph->cpu_data.destination_address == 0) { + ph->cpu_data.destination_address += addr; + ph->hpte_data.destination_address += addr; + ph->kernel_data.destination_address += addr; + } + + /* ToDo Invalidate kdump and free memory range. */ + + do { + rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, + 1, ph, sizeof(struct phyp_dump_header)); + } while (rtas_busy_delay(rc)); + + if (rc) { + printk(KERN_ERR "phyp-dump: unexpected error (%d) on " + "register\n", rc); + print_dump_header(ph); + return; + } + + rc = sysfs_create_file(kernel_kobj, &pdl.attr); + if (rc) + printk(KERN_ERR "phyp-dump: unable to create sysfs" + " file (%d)\n", rc); +} + +static +void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr) +{ + int rc; + + /* Add addr value if not initialized before */ + if (ph->cpu_data.destination_address == 0) { + ph->cpu_data.destination_address += addr; + ph->hpte_data.destination_address += addr; + ph->kernel_data.destination_address += addr; + } + + do { + rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, + 2, ph, sizeof(struct phyp_dump_header)); + } while (rtas_busy_delay(rc)); + + if (rc) { + printk(KERN_ERR "phyp-dump: unexpected error (%d) " + "on invalidate\n", rc); + print_dump_header(ph); + } +} + +/* ------------------------------------------------- */ +/** + * release_memory_range -- release memory previously lmb_reserved + * @start_pfn: starting physical frame number + * @nr_pages: number of pages to free. + * + * This routine will release memory that had been previously + * lmb_reserved in early boot. The released memory becomes + * available for genreal use. + */ +static void release_memory_range(unsigned long start_pfn, + unsigned long nr_pages) +{ + struct page *rpage; + unsigned long end_pfn; + long i; + + end_pfn = start_pfn + nr_pages; + + for (i = start_pfn; i <= end_pfn; i++) { + rpage = pfn_to_page(i); + if (PageReserved(rpage)) { + ClearPageReserved(rpage); + init_page_count(rpage); + __free_page(rpage); + totalram_pages++; + } + } +} + +/** + * track_freed_range -- Counts the range being freed. + * Once the counter goes to zero, it re-registers dump for + * future use. + */ +static void +track_freed_range(unsigned long addr, unsigned long length) +{ + static unsigned long scratch_area_size, reserved_area_size; + + if (addr < phyp_dump_info->init_reserve_start) + return; + + if ((addr >= phyp_dump_info->init_reserve_start) && + (addr <= phyp_dump_info->init_reserve_start + + phyp_dump_info->init_reserve_size)) + reserved_area_size += length; + + if ((addr >= phyp_dump_info->reserved_scratch_addr) && + (addr <= phyp_dump_info->reserved_scratch_addr + + phyp_dump_info->reserved_scratch_size)) + scratch_area_size += length; + + if ((reserved_area_size == phyp_dump_info->init_reserve_size) && + (scratch_area_size == phyp_dump_info->reserved_scratch_size)) { + + invalidate_last_dump(&phdr, + phyp_dump_info->reserved_scratch_addr); + register_dump_area(&phdr, + phyp_dump_info->reserved_scratch_addr); + } +} + +/* ------------------------------------------------- */ +/** + * sysfs_release_region -- sysfs interface to release memory range. + * + * Usage: + * "echo <start addr> <length> > /sys/kernel/release_region" + * + * Example: + * "echo 0x40000000 0x10000000 > /sys/kernel/release_region" + * + * will release 256MB starting at 1GB. + */ +static ssize_t store_release_region(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long start_addr, length, end_addr; + unsigned long start_pfn, nr_pages; + ssize_t ret; + + ret = sscanf(buf, "%lx %lx", &start_addr, &length); + if (ret != 2) + return -EINVAL; + + track_freed_range(start_addr, length); + + /* Range-check - don't free any reserved memory that + * wasn't reserved for phyp-dump */ + if (start_addr < phyp_dump_info->init_reserve_start) + start_addr = phyp_dump_info->init_reserve_start; + + end_addr = phyp_dump_info->init_reserve_start + + phyp_dump_info->init_reserve_size; + if (start_addr+length > end_addr) + length = end_addr - start_addr; + + /* Release the region of memory assed in by user */ + start_pfn = PFN_DOWN(start_addr); + nr_pages = PFN_DOWN(length); + release_memory_range(start_pfn, nr_pages); + + return count; +} + +static ssize_t show_release_region(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + u64 second_addr_range; + + /* total reserved size - start of scratch area */ + second_addr_range = phyp_dump_info->init_reserve_size - + phyp_dump_info->reserved_scratch_size; + return sprintf(buf, "CPU:0x%lx-0x%lx: HPTE:0x%lx-0x%lx:" + " DUMP:0x%lx-0x%lx, 0x%lx-0x%lx:\n", + phdr.cpu_data.destination_address, + phdr.cpu_data.length_copied, + phdr.hpte_data.destination_address, + phdr.hpte_data.length_copied, + phdr.kernel_data.destination_address, + phdr.kernel_data.length_copied, + phyp_dump_info->init_reserve_start, + second_addr_range); +} + +static struct kobj_attribute rr = __ATTR(release_region, 0600, + show_release_region, + store_release_region); + +static int __init phyp_dump_setup(void) +{ + struct device_node *rtas; + const struct phyp_dump_header *dump_header = NULL; + unsigned long dump_area_start; + unsigned long dump_area_length; + int header_len = 0; + int rc; + + /* If no memory was reserved in early boot, there is nothing to do */ + if (phyp_dump_info->init_reserve_size == 0) + return 0; + + /* Return if phyp dump not supported */ + if (!phyp_dump_info->phyp_dump_configured) + return -ENOSYS; + + /* Is there dump data waiting for us? If there isn't, + * then register a new dump area, and release all of + * the rest of the reserved ram. + * + * The /rtas/ibm,kernel-dump rtas node is present only + * if there is dump data waiting for us. + */ + rtas = of_find_node_by_path("/rtas"); + if (rtas) { + dump_header = of_get_property(rtas, "ibm,kernel-dump", + &header_len); + of_node_put(rtas); + } + + print_dump_header(dump_header); + dump_area_length = init_dump_header(&phdr); + /* align down */ + dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK; + + if (dump_header == NULL) { + register_dump_area(&phdr, dump_area_start); + return 0; + } + + /* re-register the dump area, if old dump was invalid */ + if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) { + invalidate_last_dump(&phdr, dump_area_start); + register_dump_area(&phdr, dump_area_start); + return 0; + } + + if (dump_header) { + phyp_dump_info->reserved_scratch_addr = + dump_header->cpu_data.destination_address; + phyp_dump_info->reserved_scratch_size = + dump_header->cpu_data.source_length + + dump_header->hpte_data.source_length + + dump_header->kernel_data.source_length; + } + + /* Should we create a dump_subsys, analogous to s390/ipl.c ? */ + rc = sysfs_create_file(kernel_kobj, &rr.attr); + if (rc) + printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n", + rc); + + /* ToDo: re-register the dump area, for next time. */ + return 0; +} +machine_subsys_initcall(pseries, phyp_dump_setup); + +int __init early_init_dt_scan_phyp_dump(unsigned long node, + const char *uname, int depth, void *data) +{ + const unsigned int *sizes; + + phyp_dump_info->phyp_dump_configured = 0; + phyp_dump_info->phyp_dump_is_active = 0; + + if (depth != 1 || strcmp(uname, "rtas") != 0) + return 0; + + if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL)) + phyp_dump_info->phyp_dump_configured++; + + if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL)) + phyp_dump_info->phyp_dump_is_active++; + + sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", + NULL); + if (!sizes) + return 0; + + if (sizes[0] == 1) + phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]); + + if (sizes[3] == 2) + phyp_dump_info->hpte_region_size = + *((unsigned long *)&sizes[4]); + return 1; +} + +/* Look for phyp_dump= cmdline option */ +static int __init early_phyp_dump_enabled(char *p) +{ + phyp_dump_info->phyp_dump_at_boot = 1; + + if (!p) + return 0; + + if (strncmp(p, "1", 1) == 0) + phyp_dump_info->phyp_dump_at_boot = 1; + else if (strncmp(p, "0", 1) == 0) + phyp_dump_info->phyp_dump_at_boot = 0; + + return 0; +} +early_param("phyp_dump", early_phyp_dump_enabled); + diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index 8e1ef168e2d..e5b0ea87016 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -195,31 +195,30 @@ const struct file_operations scanlog_fops = { static int __init scanlog_init(void) { struct proc_dir_entry *ent; + void *data; + int err = -ENOMEM; ibm_scan_log_dump = rtas_token("ibm,scan-log-dump"); - if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) { - printk(KERN_ERR "scan-log-dump not implemented on this system\n"); - return -EIO; - } + if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) + return -ENODEV; - ent = create_proc_entry("ppc64/rtas/scan-log-dump", S_IRUSR, NULL); - if (ent) { - ent->proc_fops = &scanlog_fops; - /* Ideally we could allocate a buffer < 4G */ - ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); - if (!ent->data) { - printk(KERN_ERR "Failed to allocate a buffer\n"); - remove_proc_entry("scan-log-dump", ent->parent); - return -ENOMEM; - } - ((unsigned int *)ent->data)[0] = 0; - } else { - printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n"); - return -EIO; - } + /* Ideally we could allocate a buffer < 4G */ + data = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!data) + goto err; + + ent = proc_create("ppc64/rtas/scan-log-dump", S_IRUSR, NULL, + &scanlog_fops); + if (!ent) + goto err; + + ent->data = data; proc_ppc64_scan_log_dump = ent; return 0; +err: + kfree(data); + return err; } static void __exit scanlog_cleanup(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index fdb9b1c8f97..90555a39fe6 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -393,6 +393,7 @@ static void pseries_dedicated_idle_sleep(void) { unsigned int cpu = smp_processor_id(); unsigned long start_snooze; + unsigned long in_purr, out_purr; /* * Indicate to the HV that we are idle. Now would be @@ -400,6 +401,7 @@ static void pseries_dedicated_idle_sleep(void) */ get_lppaca()->idle = 1; get_lppaca()->donate_dedicated_cpu = 1; + in_purr = mfspr(SPRN_PURR); /* * We come in with interrupts disabled, and need_resched() @@ -432,6 +434,8 @@ static void pseries_dedicated_idle_sleep(void) out: HMT_medium(); + out_purr = mfspr(SPRN_PURR); + get_lppaca()->wait_state_cycles += out_purr - in_purr; get_lppaca()->donate_dedicated_cpu = 0; get_lppaca()->idle = 0; } diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index e0e24b01e3a..005c2ecf976 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -37,6 +37,7 @@ #include <linux/dma-mapping.h> #include <linux/vmalloc.h> #include <linux/suspend.h> +#include <linux/lmb.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -44,7 +45,6 @@ #include <asm/machdep.h> #include <asm/abs_addr.h> #include <asm/cacheflush.h> -#include <asm/lmb.h> #include <asm/ppc-pci.h> #include "dart.h" diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 6ffdda244bb..6131fd2b661 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -175,13 +175,16 @@ static inline void _mpic_write(enum mpic_reg_type type, switch(type) { #ifdef CONFIG_PPC_DCR case mpic_access_dcr: - return dcr_write(rb->dhost, reg, value); + dcr_write(rb->dhost, reg, value); + break; #endif case mpic_access_mmio_be: - return out_be32(rb->base + (reg >> 2), value); + out_be32(rb->base + (reg >> 2), value); + break; case mpic_access_mmio_le: default: - return out_le32(rb->base + (reg >> 2), value); + out_le32(rb->base + (reg >> 2), value); + break; } } @@ -1000,7 +1003,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, const char *name) { struct mpic *mpic; - u32 reg; + u32 greg_feature; const char *vers; int i; int intvec_top; @@ -1064,7 +1067,8 @@ struct mpic * __init mpic_alloc(struct device_node *node, /* Look for protected sources */ if (node) { - unsigned int psize, bits, mapsize; + int psize; + unsigned int bits, mapsize; const u32 *psrc = of_get_property(node, "protected-sources", &psize); if (psrc) { @@ -1107,8 +1111,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, * in, try to obtain one */ if (paddr == 0 && !(mpic->flags & MPIC_USES_DCR)) { - const u32 *reg; - reg = of_get_property(node, "reg", NULL); + const u32 *reg = of_get_property(node, "reg", NULL); BUG_ON(reg == NULL); paddr = of_translate_address(node, reg); BUG_ON(paddr == OF_BAD_ADDR); @@ -1137,12 +1140,13 @@ struct mpic * __init mpic_alloc(struct device_node *node, * MPICs, num sources as well. On ISU MPICs, sources are counted * as ISUs are added */ - reg = mpic_read(mpic->gregs, MPIC_INFO(GREG_FEATURE_0)); - mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK) + greg_feature = mpic_read(mpic->gregs, MPIC_INFO(GREG_FEATURE_0)); + mpic->num_cpus = ((greg_feature & MPIC_GREG_FEATURE_LAST_CPU_MASK) >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1; if (isu_size == 0) - mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK) - >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; + mpic->num_sources = + ((greg_feature & MPIC_GREG_FEATURE_LAST_SRC_MASK) + >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; /* Map the per-CPU registers */ for (i = 0; i < mpic->num_cpus; i++) { @@ -1161,7 +1165,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, mpic->isu_mask = (1 << mpic->isu_shift) - 1; /* Display version */ - switch (reg & MPIC_GREG_FEATURE_VERSION_MASK) { + switch (greg_feature & MPIC_GREG_FEATURE_VERSION_MASK) { case 1: vers = "1.0"; break; @@ -1321,7 +1325,7 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable) void mpic_irq_set_priority(unsigned int irq, unsigned int pri) { - int is_ipi; + unsigned int is_ipi; struct mpic *mpic = mpic_find(irq, &is_ipi); unsigned int src = mpic_irq_to_hw(irq); unsigned long flags; @@ -1344,7 +1348,7 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) unsigned int mpic_irq_get_priority(unsigned int irq) { - int is_ipi; + unsigned int is_ipi; struct mpic *mpic = mpic_find(irq, &is_ipi); unsigned int src = mpic_irq_to_hw(irq); unsigned long flags; diff --git a/arch/ppc/8xx_io/commproc.c b/arch/ppc/8xx_io/commproc.c index 9d656de0f0f..752443df5ec 100644 --- a/arch/ppc/8xx_io/commproc.c +++ b/arch/ppc/8xx_io/commproc.c @@ -43,7 +43,7 @@ ({ \ u32 offset = offsetof(immap_t, member); \ void *addr = ioremap (IMAP_ADDR + offset, \ - sizeof( ((immap_t*)0)->member)); \ + FIELD_SIZEOF(immap_t, member)); \ addr; \ }) diff --git a/arch/ppc/kernel/head.S b/arch/ppc/kernel/head.S index 1b0ec7202dd..e7e642b9513 100644 --- a/arch/ppc/kernel/head.S +++ b/arch/ppc/kernel/head.S @@ -701,23 +701,6 @@ load_up_altivec: b fast_exception_return /* - * AltiVec unavailable trap from kernel - print a message, but let - * the task use AltiVec in the kernel until it returns to user mode. - */ -KernelAltiVec: - lwz r3,_MSR(r1) - oris r3,r3,MSR_VEC@h - stw r3,_MSR(r1) /* enable use of AltiVec after return */ - lis r3,87f@h - ori r3,r3,87f@l - mr r4,r2 /* current */ - lwz r5,_NIP(r1) - bl printk - b ret_from_except -87: .string "AltiVec used in kernel (task=%p, pc=%x) \n" - .align 4,0 - -/* * giveup_altivec(tsk) * Disable AltiVec for the task given as the argument, * and save the AltiVec registers in its thread_struct. diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 463d1be32c9..2667a9dee11 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -16,6 +16,7 @@ config SPARC64 bool default y select HAVE_IDE + select HAVE_LMB help SPARC is a family of RISC microprocessors designed and marketed by Sun Microsystems, incorporated. This port covers the newer 64-bit diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 3b1ea321dc0..4b442739e7b 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -218,7 +218,8 @@ obj-$(CONFIG_SMC911X) += smc911x.o obj-$(CONFIG_BFIN_MAC) += bfin_mac.o obj-$(CONFIG_DM9000) += dm9000.o obj-$(CONFIG_FEC_8XX) += fec_8xx/ -obj-$(CONFIG_PASEMI_MAC) += pasemi_mac.o +obj-$(CONFIG_PASEMI_MAC) += pasemi_mac_driver.o +pasemi_mac_driver-objs := pasemi_mac.o pasemi_mac_ethtool.o obj-$(CONFIG_MLX4_CORE) += mlx4/ obj-$(CONFIG_ENC28J60) += enc28j60.o diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index 2e39e0285d8..c50f0f4de6d 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -55,15 +55,10 @@ * - Multiqueue RX/TX */ - -/* Must be a power of two */ -#define RX_RING_SIZE 2048 -#define TX_RING_SIZE 4096 - #define LRO_MAX_AGGR 64 #define PE_MIN_MTU 64 -#define PE_MAX_MTU 1500 +#define PE_MAX_MTU 9000 #define PE_DEF_MTU ETH_DATA_LEN #define DEFAULT_MSG_ENABLE \ @@ -76,16 +71,6 @@ NETIF_MSG_RX_ERR | \ NETIF_MSG_TX_ERR) -#define TX_DESC(tx, num) ((tx)->chan.ring_virt[(num) & (TX_RING_SIZE-1)]) -#define TX_DESC_INFO(tx, num) ((tx)->ring_info[(num) & (TX_RING_SIZE-1)]) -#define RX_DESC(rx, num) ((rx)->chan.ring_virt[(num) & (RX_RING_SIZE-1)]) -#define RX_DESC_INFO(rx, num) ((rx)->ring_info[(num) & (RX_RING_SIZE-1)]) -#define RX_BUFF(rx, num) ((rx)->buffers[(num) & (RX_RING_SIZE-1)]) - -#define RING_USED(ring) (((ring)->next_to_fill - (ring)->next_to_clean) \ - & ((ring)->size - 1)) -#define RING_AVAIL(ring) ((ring->size) - RING_USED(ring)) - MODULE_LICENSE("GPL"); MODULE_AUTHOR ("Olof Johansson <olof@lixom.net>"); MODULE_DESCRIPTION("PA Semi PWRficient Ethernet driver"); @@ -94,6 +79,8 @@ static int debug = -1; /* -1 == use DEFAULT_MSG_ENABLE as value */ module_param(debug, int, 0); MODULE_PARM_DESC(debug, "PA Semi MAC bitmapped debugging message enable value"); +extern const struct ethtool_ops pasemi_mac_ethtool_ops; + static int translation_enabled(void) { #if defined(CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE) @@ -322,6 +309,103 @@ static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac, return (nfrags + 3) & ~1; } +static struct pasemi_mac_csring *pasemi_mac_setup_csring(struct pasemi_mac *mac) +{ + struct pasemi_mac_csring *ring; + u32 val; + unsigned int cfg; + int chno; + + ring = pasemi_dma_alloc_chan(TXCHAN, sizeof(struct pasemi_mac_csring), + offsetof(struct pasemi_mac_csring, chan)); + + if (!ring) { + dev_err(&mac->pdev->dev, "Can't allocate checksum channel\n"); + goto out_chan; + } + + chno = ring->chan.chno; + + ring->size = CS_RING_SIZE; + ring->next_to_fill = 0; + + /* Allocate descriptors */ + if (pasemi_dma_alloc_ring(&ring->chan, CS_RING_SIZE)) + goto out_ring_desc; + + write_dma_reg(PAS_DMA_TXCHAN_BASEL(chno), + PAS_DMA_TXCHAN_BASEL_BRBL(ring->chan.ring_dma)); + val = PAS_DMA_TXCHAN_BASEU_BRBH(ring->chan.ring_dma >> 32); + val |= PAS_DMA_TXCHAN_BASEU_SIZ(CS_RING_SIZE >> 3); + + write_dma_reg(PAS_DMA_TXCHAN_BASEU(chno), val); + + ring->events[0] = pasemi_dma_alloc_flag(); + ring->events[1] = pasemi_dma_alloc_flag(); + if (ring->events[0] < 0 || ring->events[1] < 0) + goto out_flags; + + pasemi_dma_clear_flag(ring->events[0]); + pasemi_dma_clear_flag(ring->events[1]); + + ring->fun = pasemi_dma_alloc_fun(); + if (ring->fun < 0) + goto out_fun; + + cfg = PAS_DMA_TXCHAN_CFG_TY_FUNC | PAS_DMA_TXCHAN_CFG_UP | + PAS_DMA_TXCHAN_CFG_TATTR(ring->fun) | + PAS_DMA_TXCHAN_CFG_LPSQ | PAS_DMA_TXCHAN_CFG_LPDQ; + + if (translation_enabled()) + cfg |= PAS_DMA_TXCHAN_CFG_TRD | PAS_DMA_TXCHAN_CFG_TRR; + + write_dma_reg(PAS_DMA_TXCHAN_CFG(chno), cfg); + + /* enable channel */ + pasemi_dma_start_chan(&ring->chan, PAS_DMA_TXCHAN_TCMDSTA_SZ | + PAS_DMA_TXCHAN_TCMDSTA_DB | + PAS_DMA_TXCHAN_TCMDSTA_DE | + PAS_DMA_TXCHAN_TCMDSTA_DA); + + return ring; + +out_fun: +out_flags: + if (ring->events[0] >= 0) + pasemi_dma_free_flag(ring->events[0]); + if (ring->events[1] >= 0) + pasemi_dma_free_flag(ring->events[1]); + pasemi_dma_free_ring(&ring->chan); +out_ring_desc: + pasemi_dma_free_chan(&ring->chan); +out_chan: + + return NULL; +} + +static void pasemi_mac_setup_csrings(struct pasemi_mac *mac) +{ + int i; + mac->cs[0] = pasemi_mac_setup_csring(mac); + if (mac->type == MAC_TYPE_XAUI) + mac->cs[1] = pasemi_mac_setup_csring(mac); + else + mac->cs[1] = 0; + + for (i = 0; i < MAX_CS; i++) + if (mac->cs[i]) + mac->num_cs++; +} + +static void pasemi_mac_free_csring(struct pasemi_mac_csring *csring) +{ + pasemi_dma_stop_chan(&csring->chan); + pasemi_dma_free_flag(csring->events[0]); + pasemi_dma_free_flag(csring->events[1]); + pasemi_dma_free_ring(&csring->chan); + pasemi_dma_free_chan(&csring->chan); +} + static int pasemi_mac_setup_rx_resources(const struct net_device *dev) { struct pasemi_mac_rxring *ring; @@ -445,7 +529,7 @@ pasemi_mac_setup_tx_resources(const struct net_device *dev) cfg = PAS_DMA_TXCHAN_CFG_TY_IFACE | PAS_DMA_TXCHAN_CFG_TATTR(mac->dma_if) | PAS_DMA_TXCHAN_CFG_UP | - PAS_DMA_TXCHAN_CFG_WT(2); + PAS_DMA_TXCHAN_CFG_WT(4); if (translation_enabled()) cfg |= PAS_DMA_TXCHAN_CFG_TRD | PAS_DMA_TXCHAN_CFG_TRR; @@ -810,13 +894,21 @@ restart: u64 mactx = TX_DESC(txring, i); struct sk_buff *skb; - skb = TX_DESC_INFO(txring, i+1).skb; - nr_frags = TX_DESC_INFO(txring, i).dma; - if ((mactx & XCT_MACTX_E) || (*chan->status & PAS_STATUS_ERROR)) pasemi_mac_tx_error(mac, mactx); + /* Skip over control descriptors */ + if (!(mactx & XCT_MACTX_LLEN_M)) { + TX_DESC(txring, i) = 0; + TX_DESC(txring, i+1) = 0; + buf_count = 2; + continue; + } + + skb = TX_DESC_INFO(txring, i+1).skb; + nr_frags = TX_DESC_INFO(txring, i).dma; + if (unlikely(mactx & XCT_MACTX_O)) /* Not yet transmitted */ break; @@ -1041,13 +1133,7 @@ static int pasemi_mac_open(struct net_device *dev) { struct pasemi_mac *mac = netdev_priv(dev); unsigned int flags; - int ret; - - /* enable rx section */ - write_dma_reg(PAS_DMA_COM_RXCMD, PAS_DMA_COM_RXCMD_EN); - - /* enable tx section */ - write_dma_reg(PAS_DMA_COM_TXCMD, PAS_DMA_COM_TXCMD_EN); + int i, ret; flags = PAS_MAC_CFG_TXP_FCE | PAS_MAC_CFG_TXP_FPC(3) | PAS_MAC_CFG_TXP_SL(3) | PAS_MAC_CFG_TXP_COB(0xf) | @@ -1064,6 +1150,16 @@ static int pasemi_mac_open(struct net_device *dev) if (!mac->tx) goto out_tx_ring; + if (dev->mtu > 1500) { + pasemi_mac_setup_csrings(mac); + if (!mac->num_cs) + goto out_tx_ring; + } + + /* Zero out rmon counters */ + for (i = 0; i < 32; i++) + write_mac_reg(mac, PAS_MAC_RMON(i), 0); + /* 0x3ff with 33MHz clock is about 31us */ write_iob_reg(PAS_IOB_DMA_COM_TIMEOUTCFG, PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(0x3ff)); @@ -1247,7 +1343,7 @@ static int pasemi_mac_close(struct net_device *dev) { struct pasemi_mac *mac = netdev_priv(dev); unsigned int sta; - int rxch, txch; + int rxch, txch, i; rxch = rx_ring(mac)->chan.chno; txch = tx_ring(mac)->chan.chno; @@ -1292,6 +1388,9 @@ static int pasemi_mac_close(struct net_device *dev) free_irq(mac->tx->chan.irq, mac->tx); free_irq(mac->rx->chan.irq, mac->rx); + for (i = 0; i < mac->num_cs; i++) + pasemi_mac_free_csring(mac->cs[i]); + /* Free resources */ pasemi_mac_free_rx_resources(mac); pasemi_mac_free_tx_resources(mac); @@ -1299,35 +1398,113 @@ static int pasemi_mac_close(struct net_device *dev) return 0; } +static void pasemi_mac_queue_csdesc(const struct sk_buff *skb, + const dma_addr_t *map, + const unsigned int *map_size, + struct pasemi_mac_txring *txring, + struct pasemi_mac_csring *csring) +{ + u64 fund; + dma_addr_t cs_dest; + const int nh_off = skb_network_offset(skb); + const int nh_len = skb_network_header_len(skb); + const int nfrags = skb_shinfo(skb)->nr_frags; + int cs_size, i, fill, hdr, cpyhdr, evt; + dma_addr_t csdma; + + fund = XCT_FUN_ST | XCT_FUN_RR_8BRES | + XCT_FUN_O | XCT_FUN_FUN(csring->fun) | + XCT_FUN_CRM_SIG | XCT_FUN_LLEN(skb->len - nh_off) | + XCT_FUN_SHL(nh_len >> 2) | XCT_FUN_SE; + + switch (ip_hdr(skb)->protocol) { + case IPPROTO_TCP: + fund |= XCT_FUN_SIG_TCP4; + /* TCP checksum is 16 bytes into the header */ + cs_dest = map[0] + skb_transport_offset(skb) + 16; + break; + case IPPROTO_UDP: + fund |= XCT_FUN_SIG_UDP4; + /* UDP checksum is 6 bytes into the header */ + cs_dest = map[0] + skb_transport_offset(skb) + 6; + break; + default: + BUG(); + } + + /* Do the checksum offloaded */ + fill = csring->next_to_fill; + hdr = fill; + + CS_DESC(csring, fill++) = fund; + /* Room for 8BRES. Checksum result is really 2 bytes into it */ + csdma = csring->chan.ring_dma + (fill & (CS_RING_SIZE-1)) * 8 + 2; + CS_DESC(csring, fill++) = 0; + + CS_DESC(csring, fill) = XCT_PTR_LEN(map_size[0]-nh_off) | XCT_PTR_ADDR(map[0]+nh_off); + for (i = 1; i <= nfrags; i++) + CS_DESC(csring, fill+i) = XCT_PTR_LEN(map_size[i]) | XCT_PTR_ADDR(map[i]); + + fill += i; + if (fill & 1) + fill++; + + /* Copy the result into the TCP packet */ + cpyhdr = fill; + CS_DESC(csring, fill++) = XCT_FUN_O | XCT_FUN_FUN(csring->fun) | + XCT_FUN_LLEN(2) | XCT_FUN_SE; + CS_DESC(csring, fill++) = XCT_PTR_LEN(2) | XCT_PTR_ADDR(cs_dest) | XCT_PTR_T; + CS_DESC(csring, fill++) = XCT_PTR_LEN(2) | XCT_PTR_ADDR(csdma); + fill++; + + evt = !csring->last_event; + csring->last_event = evt; + + /* Event handshaking with MAC TX */ + CS_DESC(csring, fill++) = CTRL_CMD_T | CTRL_CMD_META_EVT | CTRL_CMD_O | + CTRL_CMD_ETYPE_SET | CTRL_CMD_REG(csring->events[evt]); + CS_DESC(csring, fill++) = 0; + CS_DESC(csring, fill++) = CTRL_CMD_T | CTRL_CMD_META_EVT | CTRL_CMD_O | + CTRL_CMD_ETYPE_WCLR | CTRL_CMD_REG(csring->events[!evt]); + CS_DESC(csring, fill++) = 0; + csring->next_to_fill = fill & (CS_RING_SIZE-1); + + cs_size = fill - hdr; + write_dma_reg(PAS_DMA_TXCHAN_INCR(csring->chan.chno), (cs_size) >> 1); + + /* TX-side event handshaking */ + fill = txring->next_to_fill; + TX_DESC(txring, fill++) = CTRL_CMD_T | CTRL_CMD_META_EVT | CTRL_CMD_O | + CTRL_CMD_ETYPE_WSET | CTRL_CMD_REG(csring->events[evt]); + TX_DESC(txring, fill++) = 0; + TX_DESC(txring, fill++) = CTRL_CMD_T | CTRL_CMD_META_EVT | CTRL_CMD_O | + CTRL_CMD_ETYPE_CLR | CTRL_CMD_REG(csring->events[!evt]); + TX_DESC(txring, fill++) = 0; + txring->next_to_fill = fill; + + write_dma_reg(PAS_DMA_TXCHAN_INCR(txring->chan.chno), 2); + + return; +} + static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev) { - struct pasemi_mac *mac = netdev_priv(dev); - struct pasemi_mac_txring *txring; - u64 dflags, mactx; + struct pasemi_mac * const mac = netdev_priv(dev); + struct pasemi_mac_txring * const txring = tx_ring(mac); + struct pasemi_mac_csring *csring; + u64 dflags = 0; + u64 mactx; dma_addr_t map[MAX_SKB_FRAGS+1]; unsigned int map_size[MAX_SKB_FRAGS+1]; unsigned long flags; int i, nfrags; int fill; + const int nh_off = skb_network_offset(skb); + const int nh_len = skb_network_header_len(skb); - dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_CRC_PAD; - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - const unsigned char *nh = skb_network_header(skb); + prefetch(&txring->ring_info); - switch (ip_hdr(skb)->protocol) { - case IPPROTO_TCP: - dflags |= XCT_MACTX_CSUM_TCP; - dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2); - dflags |= XCT_MACTX_IPO(nh - skb->data); - break; - case IPPROTO_UDP: - dflags |= XCT_MACTX_CSUM_UDP; - dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2); - dflags |= XCT_MACTX_IPO(nh - skb->data); - break; - } - } + dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_CRC_PAD; nfrags = skb_shinfo(skb)->nr_frags; @@ -1350,24 +1527,46 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev) } } - mactx = dflags | XCT_MACTX_LLEN(skb->len); + if (skb->ip_summed == CHECKSUM_PARTIAL && skb->len <= 1540) { + switch (ip_hdr(skb)->protocol) { + case IPPROTO_TCP: + dflags |= XCT_MACTX_CSUM_TCP; + dflags |= XCT_MACTX_IPH(nh_len >> 2); + dflags |= XCT_MACTX_IPO(nh_off); + break; + case IPPROTO_UDP: + dflags |= XCT_MACTX_CSUM_UDP; + dflags |= XCT_MACTX_IPH(nh_len >> 2); + dflags |= XCT_MACTX_IPO(nh_off); + break; + default: + WARN_ON(1); + } + } - txring = tx_ring(mac); + mactx = dflags | XCT_MACTX_LLEN(skb->len); spin_lock_irqsave(&txring->lock, flags); - fill = txring->next_to_fill; - /* Avoid stepping on the same cache line that the DMA controller * is currently about to send, so leave at least 8 words available. * Total free space needed is mactx + fragments + 8 */ - if (RING_AVAIL(txring) < nfrags + 10) { + if (RING_AVAIL(txring) < nfrags + 14) { /* no room -- stop the queue and wait for tx intr */ netif_stop_queue(dev); goto out_err; } + /* Queue up checksum + event descriptors, if needed */ + if (mac->num_cs && skb->ip_summed == CHECKSUM_PARTIAL && skb->len > 1540) { + csring = mac->cs[mac->last_cs]; + mac->last_cs = (mac->last_cs + 1) % mac->num_cs; + + pasemi_mac_queue_csdesc(skb, map, map_size, txring, csring); + } + + fill = txring->next_to_fill; TX_DESC(txring, fill) = mactx; TX_DESC_INFO(txring, fill).dma = nfrags; fill++; @@ -1445,8 +1644,9 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu) { struct pasemi_mac *mac = netdev_priv(dev); unsigned int reg; - unsigned int rcmdsta; + unsigned int rcmdsta = 0; int running; + int ret = 0; if (new_mtu < PE_MIN_MTU || new_mtu > PE_MAX_MTU) return -EINVAL; @@ -1468,6 +1668,16 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu) pasemi_mac_pause_rxint(mac); pasemi_mac_clean_rx(rx_ring(mac), RX_RING_SIZE); pasemi_mac_free_rx_buffers(mac); + + } + + /* Setup checksum channels if large MTU and none already allocated */ + if (new_mtu > 1500 && !mac->num_cs) { + pasemi_mac_setup_csrings(mac); + if (!mac->num_cs) { + ret = -ENOMEM; + goto out; + } } /* Change maxf, i.e. what size frames are accepted. @@ -1482,6 +1692,7 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu) /* MTU + ETH_HLEN + VLAN_HLEN + 2 64B cachelines */ mac->bufsz = new_mtu + ETH_HLEN + ETH_FCS_LEN + LOCAL_SKB_ALIGN + 128; +out: if (running) { write_dma_reg(PAS_DMA_RXINT_RCMDSTA(mac->dma_if), rcmdsta | PAS_DMA_RXINT_RCMDSTA_EN); @@ -1494,7 +1705,7 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu) pasemi_mac_intf_enable(mac); } - return 0; + return ret; } static int __devinit @@ -1528,7 +1739,7 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(dev, &mac->napi, pasemi_mac_poll, 64); dev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX | NETIF_F_SG | - NETIF_F_HIGHDMA; + NETIF_F_HIGHDMA | NETIF_F_GSO; mac->lro_mgr.max_aggr = LRO_MAX_AGGR; mac->lro_mgr.max_desc = MAX_LRO_DESCRIPTORS; @@ -1590,6 +1801,7 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mac->bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + LOCAL_SKB_ALIGN + 128; dev->change_mtu = pasemi_mac_change_mtu; + dev->ethtool_ops = &pasemi_mac_ethtool_ops; if (err) goto out; diff --git a/drivers/net/pasemi_mac.h b/drivers/net/pasemi_mac.h index 99e7b9329a6..1a115ec60b5 100644 --- a/drivers/net/pasemi_mac.h +++ b/drivers/net/pasemi_mac.h @@ -26,7 +26,14 @@ #include <linux/spinlock.h> #include <linux/phy.h> +/* Must be a power of two */ +#define RX_RING_SIZE 2048 +#define TX_RING_SIZE 4096 +#define CS_RING_SIZE (TX_RING_SIZE*2) + + #define MAX_LRO_DESCRIPTORS 8 +#define MAX_CS 2 struct pasemi_mac_txring { struct pasemi_dmachan chan; /* Must be first */ @@ -51,6 +58,15 @@ struct pasemi_mac_rxring { struct pasemi_mac *mac; /* Needed in intr handler */ }; +struct pasemi_mac_csring { + struct pasemi_dmachan chan; + unsigned int size; + unsigned int next_to_fill; + int events[2]; + int last_event; + int fun; +}; + struct pasemi_mac { struct net_device *netdev; struct pci_dev *pdev; @@ -60,10 +76,12 @@ struct pasemi_mac { struct napi_struct napi; int bufsz; /* RX ring buffer size */ + int last_cs; + int num_cs; + u32 dma_if; u8 type; #define MAC_TYPE_GMAC 1 #define MAC_TYPE_XAUI 2 - u32 dma_if; u8 mac_addr[6]; @@ -74,6 +92,7 @@ struct pasemi_mac { struct pasemi_mac_txring *tx; struct pasemi_mac_rxring *rx; + struct pasemi_mac_csring *cs[MAX_CS]; char tx_irq_name[10]; /* "eth%d tx" */ char rx_irq_name[10]; /* "eth%d rx" */ int link; @@ -90,6 +109,16 @@ struct pasemi_mac_buffer { dma_addr_t dma; }; +#define TX_DESC(tx, num) ((tx)->chan.ring_virt[(num) & (TX_RING_SIZE-1)]) +#define TX_DESC_INFO(tx, num) ((tx)->ring_info[(num) & (TX_RING_SIZE-1)]) +#define RX_DESC(rx, num) ((rx)->chan.ring_virt[(num) & (RX_RING_SIZE-1)]) +#define RX_DESC_INFO(rx, num) ((rx)->ring_info[(num) & (RX_RING_SIZE-1)]) +#define RX_BUFF(rx, num) ((rx)->buffers[(num) & (RX_RING_SIZE-1)]) +#define CS_DESC(cs, num) ((cs)->chan.ring_virt[(num) & (CS_RING_SIZE-1)]) + +#define RING_USED(ring) (((ring)->next_to_fill - (ring)->next_to_clean) \ + & ((ring)->size - 1)) +#define RING_AVAIL(ring) ((ring->size) - RING_USED(ring)) /* PCI register offsets and formats */ @@ -101,6 +130,7 @@ enum { PAS_MAC_CFG_ADR0 = 0x8c, PAS_MAC_CFG_ADR1 = 0x90, PAS_MAC_CFG_TXP = 0x98, + PAS_MAC_CFG_RMON = 0x100, PAS_MAC_IPC_CHNL = 0x208, }; @@ -172,6 +202,8 @@ enum { #define PAS_MAC_CFG_TXP_TIFG(x) (((x) << PAS_MAC_CFG_TXP_TIFG_S) & \ PAS_MAC_CFG_TXP_TIFG_M) +#define PAS_MAC_RMON(r) (0x100+(r)*4) + #define PAS_MAC_IPC_CHNL_DCHNO_M 0x003f0000 #define PAS_MAC_IPC_CHNL_DCHNO_S 16 #define PAS_MAC_IPC_CHNL_DCHNO(x) (((x) << PAS_MAC_IPC_CHNL_DCHNO_S) & \ @@ -181,4 +213,5 @@ enum { #define PAS_MAC_IPC_CHNL_BCH(x) (((x) << PAS_MAC_IPC_CHNL_BCH_S) & \ PAS_MAC_IPC_CHNL_BCH_M) + #endif /* PASEMI_MAC_H */ diff --git a/drivers/net/pasemi_mac_ethtool.c b/drivers/net/pasemi_mac_ethtool.c new file mode 100644 index 00000000000..5e8df3afea6 --- /dev/null +++ b/drivers/net/pasemi_mac_ethtool.c @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2006-2008 PA Semi, Inc + * + * Ethtool hooks for the PA Semi PWRficient onchip 1G/10G Ethernet MACs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include <linux/netdevice.h> +#include <linux/ethtool.h> +#include <linux/pci.h> +#include <linux/inet_lro.h> + +#include <asm/pasemi_dma.h> +#include "pasemi_mac.h" + +static struct { + const char str[ETH_GSTRING_LEN]; +} ethtool_stats_keys[] = { + { "rx-drops" }, + { "rx-bytes" }, + { "rx-packets" }, + { "rx-broadcast-packets" }, + { "rx-multicast-packets" }, + { "rx-crc-errors" }, + { "rx-undersize-errors" }, + { "rx-oversize-errors" }, + { "rx-short-fragment-errors" }, + { "rx-jabber-errors" }, + { "rx-64-byte-packets" }, + { "rx-65-127-byte-packets" }, + { "rx-128-255-byte-packets" }, + { "rx-256-511-byte-packets" }, + { "rx-512-1023-byte-packets" }, + { "rx-1024-1518-byte-packets" }, + { "rx-pause-frames" }, + { "tx-bytes" }, + { "tx-packets" }, + { "tx-broadcast-packets" }, + { "tx-multicast-packets" }, + { "tx-collisions" }, + { "tx-late-collisions" }, + { "tx-excessive-collisions" }, + { "tx-crc-errors" }, + { "tx-undersize-errors" }, + { "tx-oversize-errors" }, + { "tx-64-byte-packets" }, + { "tx-65-127-byte-packets" }, + { "tx-128-255-byte-packets" }, + { "tx-256-511-byte-packets" }, + { "tx-512-1023-byte-packets" }, + { "tx-1024-1518-byte-packets" }, +}; + +static int +pasemi_mac_ethtool_get_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) +{ + struct pasemi_mac *mac = netdev_priv(netdev); + struct phy_device *phydev = mac->phydev; + + return phy_ethtool_gset(phydev, cmd); +} + +static void +pasemi_mac_ethtool_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct pasemi_mac *mac; + mac = netdev_priv(netdev); + + /* clear and fill out info */ + memset(drvinfo, 0, sizeof(struct ethtool_drvinfo)); + strncpy(drvinfo->driver, "pasemi_mac", 12); + strcpy(drvinfo->version, "N/A"); + strcpy(drvinfo->fw_version, "N/A"); + strncpy(drvinfo->bus_info, pci_name(mac->pdev), 32); +} + +static u32 +pasemi_mac_ethtool_get_msglevel(struct net_device *netdev) +{ + struct pasemi_mac *mac = netdev_priv(netdev); + return mac->msg_enable; +} + +static void +pasemi_mac_ethtool_set_msglevel(struct net_device *netdev, + u32 level) +{ + struct pasemi_mac *mac = netdev_priv(netdev); + mac->msg_enable = level; +} + + +static void +pasemi_mac_ethtool_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ering) +{ + struct pasemi_mac *mac = netdev->priv; + + ering->tx_max_pending = TX_RING_SIZE/2; + ering->tx_pending = RING_USED(mac->tx)/2; + ering->rx_max_pending = RX_RING_SIZE/4; + ering->rx_pending = RING_USED(mac->rx)/4; +} + +static int pasemi_mac_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ethtool_stats_keys); + default: + return -EOPNOTSUPP; + } +} + +static void pasemi_mac_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct pasemi_mac *mac = netdev->priv; + int i; + + data[0] = pasemi_read_dma_reg(PAS_DMA_RXINT_RCMDSTA(mac->dma_if)) + >> PAS_DMA_RXINT_RCMDSTA_DROPS_S; + for (i = 0; i < 32; i++) + data[1+i] = pasemi_read_mac_reg(mac->dma_if, PAS_MAC_RMON(i)); +} + +static void pasemi_mac_get_strings(struct net_device *netdev, u32 stringset, + u8 *data) +{ + memcpy(data, ethtool_stats_keys, sizeof(ethtool_stats_keys)); +} + +const struct ethtool_ops pasemi_mac_ethtool_ops = { + .get_settings = pasemi_mac_ethtool_get_settings, + .get_drvinfo = pasemi_mac_ethtool_get_drvinfo, + .get_msglevel = pasemi_mac_ethtool_get_msglevel, + .set_msglevel = pasemi_mac_ethtool_set_msglevel, + .get_link = ethtool_op_get_link, + .get_ringparam = pasemi_mac_ethtool_get_ringparam, + .get_strings = pasemi_mac_get_strings, + .get_sset_count = pasemi_mac_get_sset_count, + .get_ethtool_stats = pasemi_mac_get_ethtool_stats, +}; + diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild index 5f640e54247..7381916dfcb 100644 --- a/include/asm-powerpc/Kbuild +++ b/include/asm-powerpc/Kbuild @@ -1,5 +1,6 @@ include include/asm-generic/Kbuild.asm +header-y += a.out.h header-y += auxvec.h header-y += ioctls.h header-y += mman.h @@ -23,7 +24,6 @@ header-y += sigcontext.h header-y += statfs.h header-y += ps3fb.h -unifdef-y += a.out.h unifdef-y += asm-compat.h unifdef-y += bootx.h unifdef-y += byteorder.h diff --git a/include/asm-powerpc/abs_addr.h b/include/asm-powerpc/abs_addr.h index 4aa220718b1..98324c5a828 100644 --- a/include/asm-powerpc/abs_addr.h +++ b/include/asm-powerpc/abs_addr.h @@ -12,10 +12,11 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/lmb.h> + #include <asm/types.h> #include <asm/page.h> #include <asm/prom.h> -#include <asm/lmb.h> #include <asm/firmware.h> struct mschunks_map { diff --git a/include/asm-powerpc/cputhreads.h b/include/asm-powerpc/cputhreads.h index 8485c28b5f4..fb11b0c459b 100644 --- a/include/asm-powerpc/cputhreads.h +++ b/include/asm-powerpc/cputhreads.h @@ -35,7 +35,7 @@ static inline cpumask_t cpu_thread_mask_to_cores(cpumask_t threads) res = CPU_MASK_NONE; for (i = 0; i < NR_CPUS; i += threads_per_core) { - cpus_shift_right(tmp, threads_core_mask, i); + cpus_shift_left(tmp, threads_core_mask, i); if (cpus_intersects(threads, tmp)) cpu_set(i, res); } diff --git a/include/asm-powerpc/lmb.h b/include/asm-powerpc/lmb.h index 5d1dc48a0bb..028184b6a16 100644 --- a/include/asm-powerpc/lmb.h +++ b/include/asm-powerpc/lmb.h @@ -1,81 +1,15 @@ #ifndef _ASM_POWERPC_LMB_H #define _ASM_POWERPC_LMB_H -#ifdef __KERNEL__ -/* - * Definitions for talking to the Open Firmware PROM on - * Power Macintosh computers. - * - * Copyright (C) 2001 Peter Bergner, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ +#include <asm/udbg.h> -#include <linux/init.h> -#include <asm/prom.h> +#define LMB_DBG(fmt...) udbg_printf(fmt) -#define MAX_LMB_REGIONS 128 +#ifdef CONFIG_PPC32 +extern unsigned long __max_low_memory; +#define LMB_REAL_LIMIT __max_low_memory +#else +#define LMB_REAL_LIMIT 0 +#endif -struct lmb_property { - unsigned long base; - unsigned long size; -}; - -struct lmb_region { - unsigned long cnt; - unsigned long size; - struct lmb_property region[MAX_LMB_REGIONS+1]; -}; - -struct lmb { - unsigned long debug; - unsigned long rmo_size; - struct lmb_region memory; - struct lmb_region reserved; -}; - -extern struct lmb lmb; - -extern void __init lmb_init(void); -extern void __init lmb_analyze(void); -extern long __init lmb_add(unsigned long base, unsigned long size); -extern long __init lmb_reserve(unsigned long base, unsigned long size); -extern unsigned long __init lmb_alloc(unsigned long size, unsigned long align); -extern unsigned long __init lmb_alloc_base(unsigned long size, - unsigned long align, unsigned long max_addr); -extern unsigned long __init __lmb_alloc_base(unsigned long size, - unsigned long align, unsigned long max_addr); -extern unsigned long __init lmb_phys_mem_size(void); -extern unsigned long __init lmb_end_of_DRAM(void); -extern void __init lmb_enforce_memory_limit(unsigned long memory_limit); -extern int __init lmb_is_reserved(unsigned long addr); - -extern void lmb_dump_all(void); - -static inline unsigned long -lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) -{ - return type->region[region_nr].size; -} -static inline unsigned long -lmb_size_pages(struct lmb_region *type, unsigned long region_nr) -{ - return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; -} -static inline unsigned long -lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) -{ - return type->region[region_nr].base >> PAGE_SHIFT; -} -static inline unsigned long -lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) -{ - return lmb_start_pfn(type, region_nr) + - lmb_size_pages(type, region_nr); -} - -#endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_LMB_H */ diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index 0872ec228c1..b95386aed50 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -68,6 +68,8 @@ struct machdep_calls { unsigned long vflags, int psize, int ssize); long (*hpte_remove)(unsigned long hpte_group); + void (*hpte_removebolted)(unsigned long ea, + int psize, int ssize); void (*flush_hash_range)(unsigned long number, int local); /* special for kexec, to be called in real mode, linar mapping is diff --git a/include/asm-powerpc/pasemi_dma.h b/include/asm-powerpc/pasemi_dma.h index b4526ff3a50..19fd7933e2d 100644 --- a/include/asm-powerpc/pasemi_dma.h +++ b/include/asm-powerpc/pasemi_dma.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 PA Semi, Inc + * Copyright (C) 2006-2008 PA Semi, Inc * * Hardware register layout and descriptor formats for the on-board * DMA engine on PA Semi PWRficient. Used by ethernet, function and security @@ -40,6 +40,11 @@ enum { PAS_DMA_COM_TXSTA = 0x104, /* Transmit Status Register */ PAS_DMA_COM_RXCMD = 0x108, /* Receive Command Register */ PAS_DMA_COM_RXSTA = 0x10c, /* Receive Status Register */ + PAS_DMA_COM_CFG = 0x114, /* Common config reg */ + PAS_DMA_TXF_SFLG0 = 0x140, /* Set flags */ + PAS_DMA_TXF_SFLG1 = 0x144, /* Set flags */ + PAS_DMA_TXF_CFLG0 = 0x148, /* Set flags */ + PAS_DMA_TXF_CFLG1 = 0x14c, /* Set flags */ }; @@ -123,11 +128,16 @@ enum { #define PAS_DMA_TXCHAN_TCMDSTA_DA 0x00000100 #define PAS_DMA_TXCHAN_CFG(c) (0x304+(c)*_PAS_DMA_TXCHAN_STRIDE) #define PAS_DMA_TXCHAN_CFG_TY_IFACE 0x00000000 /* Type = interface */ +#define PAS_DMA_TXCHAN_CFG_TY_COPY 0x00000001 /* Type = copy only */ +#define PAS_DMA_TXCHAN_CFG_TY_FUNC 0x00000002 /* Type = function */ +#define PAS_DMA_TXCHAN_CFG_TY_XOR 0x00000003 /* Type = xor only */ #define PAS_DMA_TXCHAN_CFG_TATTR_M 0x0000003c #define PAS_DMA_TXCHAN_CFG_TATTR_S 2 #define PAS_DMA_TXCHAN_CFG_TATTR(x) (((x) << PAS_DMA_TXCHAN_CFG_TATTR_S) & \ PAS_DMA_TXCHAN_CFG_TATTR_M) -#define PAS_DMA_TXCHAN_CFG_WT_M 0x000001c0 +#define PAS_DMA_TXCHAN_CFG_LPDQ 0x00000800 +#define PAS_DMA_TXCHAN_CFG_LPSQ 0x00000400 +#define PAS_DMA_TXCHAN_CFG_WT_M 0x000003c0 #define PAS_DMA_TXCHAN_CFG_WT_S 6 #define PAS_DMA_TXCHAN_CFG_WT(x) (((x) << PAS_DMA_TXCHAN_CFG_WT_S) & \ PAS_DMA_TXCHAN_CFG_WT_M) @@ -394,11 +404,62 @@ enum { XCT_COPY_LLEN_M) #define XCT_COPY_SE 0x0000000000000001ull +/* Function descriptor fields */ +#define XCT_FUN_T 0x8000000000000000ull +#define XCT_FUN_ST 0x4000000000000000ull +#define XCT_FUN_RR_M 0x3000000000000000ull +#define XCT_FUN_RR_NORES 0x0000000000000000ull +#define XCT_FUN_RR_8BRES 0x1000000000000000ull +#define XCT_FUN_RR_24BRES 0x2000000000000000ull +#define XCT_FUN_RR_40BRES 0x3000000000000000ull +#define XCT_FUN_I 0x0800000000000000ull +#define XCT_FUN_O 0x0400000000000000ull +#define XCT_FUN_E 0x0200000000000000ull +#define XCT_FUN_FUN_M 0x01c0000000000000ull +#define XCT_FUN_FUN_S 54 +#define XCT_FUN_FUN(x) ((((long)(x)) << XCT_FUN_FUN_S) & XCT_FUN_FUN_M) +#define XCT_FUN_CRM_M 0x0038000000000000ull +#define XCT_FUN_CRM_NOP 0x0000000000000000ull +#define XCT_FUN_CRM_SIG 0x0008000000000000ull +#define XCT_FUN_LLEN_M 0x0007ffff00000000ull +#define XCT_FUN_LLEN_S 32 +#define XCT_FUN_LLEN(x) ((((long)(x)) << XCT_FUN_LLEN_S) & XCT_FUN_LLEN_M) +#define XCT_FUN_SHL_M 0x00000000f8000000ull +#define XCT_FUN_SHL_S 27 +#define XCT_FUN_SHL(x) ((((long)(x)) << XCT_FUN_SHL_S) & XCT_FUN_SHL_M) +#define XCT_FUN_CHL_M 0x0000000007c00000ull +#define XCT_FUN_HSZ_M 0x00000000003c0000ull +#define XCT_FUN_ALG_M 0x0000000000038000ull +#define XCT_FUN_HP 0x0000000000004000ull +#define XCT_FUN_BCM_M 0x0000000000003800ull +#define XCT_FUN_BCP_M 0x0000000000000600ull +#define XCT_FUN_SIG_M 0x00000000000001f0ull +#define XCT_FUN_SIG_TCP4 0x0000000000000140ull +#define XCT_FUN_SIG_TCP6 0x0000000000000150ull +#define XCT_FUN_SIG_UDP4 0x0000000000000160ull +#define XCT_FUN_SIG_UDP6 0x0000000000000170ull +#define XCT_FUN_A 0x0000000000000008ull +#define XCT_FUN_C 0x0000000000000004ull +#define XCT_FUN_AL2 0x0000000000000002ull +#define XCT_FUN_SE 0x0000000000000001ull + +/* Function descriptor 8byte result fields */ +#define XCT_FUNRES_8B_CS_M 0x0000ffff00000000ull +#define XCT_FUNRES_8B_CS_S 32 +#define XCT_FUNRES_8B_CRC_M 0x00000000ffffffffull +#define XCT_FUNRES_8B_CRC_S 0 + /* Control descriptor fields */ #define CTRL_CMD_T 0x8000000000000000ull #define CTRL_CMD_META_EVT 0x2000000000000000ull #define CTRL_CMD_O 0x0400000000000000ull -#define CTRL_CMD_REG_M 0x000000000000000full +#define CTRL_CMD_ETYPE_M 0x0038000000000000ull +#define CTRL_CMD_ETYPE_EXT 0x0000000000000000ull +#define CTRL_CMD_ETYPE_WSET 0x0020000000000000ull +#define CTRL_CMD_ETYPE_WCLR 0x0028000000000000ull +#define CTRL_CMD_ETYPE_SET 0x0030000000000000ull +#define CTRL_CMD_ETYPE_CLR 0x0038000000000000ull +#define CTRL_CMD_REG_M 0x000000000000007full #define CTRL_CMD_REG_S 0 #define CTRL_CMD_REG(x) ((((long)(x)) << CTRL_CMD_REG_S) & \ CTRL_CMD_REG_M) @@ -461,6 +522,16 @@ extern void *pasemi_dma_alloc_buf(struct pasemi_dmachan *chan, int size, extern void pasemi_dma_free_buf(struct pasemi_dmachan *chan, int size, dma_addr_t *handle); +/* Routines to allocate flags (events) for channel syncronization */ +extern int pasemi_dma_alloc_flag(void); +extern void pasemi_dma_free_flag(int flag); +extern void pasemi_dma_set_flag(int flag); +extern void pasemi_dma_clear_flag(int flag); + +/* Routines to allocate function engines */ +extern int pasemi_dma_alloc_fun(void); +extern void pasemi_dma_free_fun(int fun); + /* Initialize the library, must be called before any other functions */ extern int pasemi_dma_init(void); diff --git a/include/asm-powerpc/phyp_dump.h b/include/asm-powerpc/phyp_dump.h new file mode 100644 index 00000000000..209a98913d9 --- /dev/null +++ b/include/asm-powerpc/phyp_dump.h @@ -0,0 +1,45 @@ +/* + * Hypervisor-assisted dump + * + * Linas Vepstas, Manish Ahuja 2008 + * Copyright 2008 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _PPC64_PHYP_DUMP_H +#define _PPC64_PHYP_DUMP_H + +#ifdef CONFIG_PHYP_DUMP + +/* The RMR region will be saved for later dumping + * whenever the kernel crashes. Set this to 256MB. */ +#define PHYP_DUMP_RMR_START 0x0 +#define PHYP_DUMP_RMR_END (1UL<<28) + +struct phyp_dump { + /* Memory that is reserved during very early boot. */ + unsigned long init_reserve_start; + unsigned long init_reserve_size; + /* Check status during boot if dump supported, active & present*/ + unsigned long phyp_dump_at_boot; + unsigned long phyp_dump_configured; + unsigned long phyp_dump_is_active; + /* store cpu & hpte size */ + unsigned long cpu_state_size; + unsigned long hpte_region_size; + /* previous scratch area values */ + unsigned long reserved_scratch_addr; + unsigned long reserved_scratch_size; +}; + +extern struct phyp_dump *phyp_dump_info; + +int early_init_dt_scan_phyp_dump(unsigned long node, + const char *uname, int depth, void *data); + +#endif /* CONFIG_PHYP_DUMP */ +#endif /* _PPC64_PHYP_DUMP_H */ diff --git a/include/asm-powerpc/sparsemem.h b/include/asm-powerpc/sparsemem.h index e8b493d52b4..c5acf4ccf57 100644 --- a/include/asm-powerpc/sparsemem.h +++ b/include/asm-powerpc/sparsemem.h @@ -15,6 +15,7 @@ #ifdef CONFIG_MEMORY_HOTPLUG extern void create_section_mapping(unsigned long start, unsigned long end); +extern void remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); #else diff --git a/include/asm-sparc64/lmb.h b/include/asm-sparc64/lmb.h new file mode 100644 index 00000000000..6a352cbcf52 --- /dev/null +++ b/include/asm-sparc64/lmb.h @@ -0,0 +1,10 @@ +#ifndef _SPARC64_LMB_H +#define _SPARC64_LMB_H + +#include <asm/oplib.h> + +#define LMB_DBG(fmt...) prom_printf(fmt) + +#define LMB_REAL_LIMIT 0 + +#endif /* !(_SPARC64_LMB_H) */ diff --git a/include/linux/lmb.h b/include/linux/lmb.h new file mode 100644 index 00000000000..632717c6a2b --- /dev/null +++ b/include/linux/lmb.h @@ -0,0 +1,83 @@ +#ifndef _LINUX_LMB_H +#define _LINUX_LMB_H +#ifdef __KERNEL__ + +/* + * Logical memory blocks. + * + * Copyright (C) 2001 Peter Bergner, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/mm.h> + +#define MAX_LMB_REGIONS 128 + +struct lmb_property { + u64 base; + u64 size; +}; + +struct lmb_region { + unsigned long cnt; + u64 size; + struct lmb_property region[MAX_LMB_REGIONS+1]; +}; + +struct lmb { + unsigned long debug; + u64 rmo_size; + struct lmb_region memory; + struct lmb_region reserved; +}; + +extern struct lmb lmb; + +extern void __init lmb_init(void); +extern void __init lmb_analyze(void); +extern long __init lmb_add(u64 base, u64 size); +extern long __init lmb_reserve(u64 base, u64 size); +extern u64 __init lmb_alloc(u64 size, u64 align); +extern u64 __init lmb_alloc_base(u64 size, + u64, u64 max_addr); +extern u64 __init __lmb_alloc_base(u64 size, + u64 align, u64 max_addr); +extern u64 __init lmb_phys_mem_size(void); +extern u64 __init lmb_end_of_DRAM(void); +extern void __init lmb_enforce_memory_limit(u64 memory_limit); +extern int __init lmb_is_reserved(u64 addr); + +extern void lmb_dump_all(void); + +static inline u64 +lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].size; +} +static inline u64 +lmb_size_pages(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; +} +static inline u64 +lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].base >> PAGE_SHIFT; +} +static inline u64 +lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_start_pfn(type, region_nr) + + lmb_size_pages(type, region_nr); +} + +#include <asm/lmb.h> + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_LMB_H */ diff --git a/lib/Kconfig b/lib/Kconfig index ba3d104994d..2d53dc092e8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -141,4 +141,7 @@ config HAS_DMA config CHECK_SIGNATURE bool +config HAVE_LMB + boolean + endmenu diff --git a/lib/Makefile b/lib/Makefile index 23de261a4c8..61bba16a0a2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -70,6 +70,8 @@ obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o +obj-$(CONFIG_HAVE_LMB) += lmb.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/arch/powerpc/mm/lmb.c b/lib/lmb.c index 4ce23bcf8a5..3c43b95fef4 100644 --- a/arch/powerpc/mm/lmb.c +++ b/lib/lmb.c @@ -3,7 +3,7 @@ * * Peter Bergner, IBM Corp. June 2001. * Copyright (C) 2001 Peter Bergner. - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -13,19 +13,12 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/bitops.h> -#include <asm/types.h> -#include <asm/page.h> -#include <asm/prom.h> -#include <asm/lmb.h> -#ifdef CONFIG_PPC32 -#include "mmu_decl.h" /* for __max_low_memory */ -#endif +#include <linux/lmb.h> #undef DEBUG #ifdef DEBUG -#include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) +#define DBG(fmt...) LMB_DBG(fmt) #else #define DBG(fmt...) #endif @@ -41,33 +34,34 @@ void lmb_dump_all(void) DBG("lmb_dump_all:\n"); DBG(" memory.cnt = 0x%lx\n", lmb.memory.cnt); - DBG(" memory.size = 0x%lx\n", lmb.memory.size); + DBG(" memory.size = 0x%llx\n", + (unsigned long long)lmb.memory.size); for (i=0; i < lmb.memory.cnt ;i++) { - DBG(" memory.region[0x%x].base = 0x%lx\n", - i, lmb.memory.region[i].base); - DBG(" .size = 0x%lx\n", - lmb.memory.region[i].size); + DBG(" memory.region[0x%x].base = 0x%llx\n", + i, (unsigned long long)lmb.memory.region[i].base); + DBG(" .size = 0x%llx\n", + (unsigned long long)lmb.memory.region[i].size); } DBG("\n reserved.cnt = 0x%lx\n", lmb.reserved.cnt); DBG(" reserved.size = 0x%lx\n", lmb.reserved.size); for (i=0; i < lmb.reserved.cnt ;i++) { - DBG(" reserved.region[0x%x].base = 0x%lx\n", - i, lmb.reserved.region[i].base); - DBG(" .size = 0x%lx\n", - lmb.reserved.region[i].size); + DBG(" reserved.region[0x%x].base = 0x%llx\n", + i, (unsigned long long)lmb.reserved.region[i].base); + DBG(" .size = 0x%llx\n", + (unsigned long long)lmb.reserved.region[i].size); } #endif /* DEBUG */ } -static unsigned long __init lmb_addrs_overlap(unsigned long base1, - unsigned long size1, unsigned long base2, unsigned long size2) +static unsigned long __init lmb_addrs_overlap(u64 base1, + u64 size1, u64 base2, u64 size2) { return ((base1 < (base2+size2)) && (base2 < (base1+size1))); } -static long __init lmb_addrs_adjacent(unsigned long base1, unsigned long size1, - unsigned long base2, unsigned long size2) +static long __init lmb_addrs_adjacent(u64 base1, u64 size1, + u64 base2, u64 size2) { if (base2 == base1 + size1) return 1; @@ -80,10 +74,10 @@ static long __init lmb_addrs_adjacent(unsigned long base1, unsigned long size1, static long __init lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2) { - unsigned long base1 = rgn->region[r1].base; - unsigned long size1 = rgn->region[r1].size; - unsigned long base2 = rgn->region[r2].base; - unsigned long size2 = rgn->region[r2].size; + u64 base1 = rgn->region[r1].base; + u64 size1 = rgn->region[r1].size; + u64 base2 = rgn->region[r2].base; + u64 size2 = rgn->region[r2].size; return lmb_addrs_adjacent(base1, size1, base2, size2); } @@ -135,16 +129,21 @@ void __init lmb_analyze(void) } /* This routine called with relocation disabled. */ -static long __init lmb_add_region(struct lmb_region *rgn, unsigned long base, - unsigned long size) +static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) { unsigned long coalesced = 0; long adjacent, i; + if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) { + rgn->region[0].base = base; + rgn->region[0].size = size; + return 0; + } + /* First try and coalesce this LMB with another. */ for (i=0; i < rgn->cnt; i++) { - unsigned long rgnbase = rgn->region[i].base; - unsigned long rgnsize = rgn->region[i].size; + u64 rgnbase = rgn->region[i].base; + u64 rgnsize = rgn->region[i].size; if ((rgnbase == base) && (rgnsize == size)) /* Already have this region, so we're done */ @@ -185,13 +184,18 @@ static long __init lmb_add_region(struct lmb_region *rgn, unsigned long base, break; } } + + if (base < rgn->region[0].base) { + rgn->region[0].base = base; + rgn->region[0].size = size; + } rgn->cnt++; return 0; } /* This routine may be called with relocation disabled. */ -long __init lmb_add(unsigned long base, unsigned long size) +long __init lmb_add(u64 base, u64 size) { struct lmb_region *_rgn = &(lmb.memory); @@ -203,7 +207,7 @@ long __init lmb_add(unsigned long base, unsigned long size) } -long __init lmb_reserve(unsigned long base, unsigned long size) +long __init lmb_reserve(u64 base, u64 size) { struct lmb_region *_rgn = &(lmb.reserved); @@ -212,14 +216,14 @@ long __init lmb_reserve(unsigned long base, unsigned long size) return lmb_add_region(_rgn, base, size); } -long __init lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, - unsigned long size) +long __init lmb_overlaps_region(struct lmb_region *rgn, u64 base, + u64 size) { unsigned long i; for (i=0; i < rgn->cnt; i++) { - unsigned long rgnbase = rgn->region[i].base; - unsigned long rgnsize = rgn->region[i].size; + u64 rgnbase = rgn->region[i].base; + u64 rgnsize = rgn->region[i].size; if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) { break; } @@ -228,54 +232,61 @@ long __init lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, return (i < rgn->cnt) ? i : -1; } -unsigned long __init lmb_alloc(unsigned long size, unsigned long align) +u64 __init lmb_alloc(u64 size, u64 align) { return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); } -unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align, - unsigned long max_addr) +u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr) { - unsigned long alloc; + u64 alloc; alloc = __lmb_alloc_base(size, align, max_addr); if (alloc == 0) - panic("ERROR: Failed to allocate 0x%lx bytes below 0x%lx.\n", - size, max_addr); + panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", + (unsigned long long) size, (unsigned long long) max_addr); return alloc; } -unsigned long __init __lmb_alloc_base(unsigned long size, unsigned long align, - unsigned long max_addr) +static u64 lmb_align_down(u64 addr, u64 size) +{ + return addr & ~(size - 1); +} + +static u64 lmb_align_up(u64 addr, u64 size) +{ + return (addr + (size - 1)) & ~(size - 1); +} + +u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr) { long i, j; - unsigned long base = 0; + u64 base = 0; BUG_ON(0 == size); -#ifdef CONFIG_PPC32 - /* On 32-bit, make sure we allocate lowmem */ + /* On some platforms, make sure we allocate lowmem */ if (max_addr == LMB_ALLOC_ANYWHERE) - max_addr = __max_low_memory; -#endif + max_addr = LMB_REAL_LIMIT; + for (i = lmb.memory.cnt-1; i >= 0; i--) { - unsigned long lmbbase = lmb.memory.region[i].base; - unsigned long lmbsize = lmb.memory.region[i].size; + u64 lmbbase = lmb.memory.region[i].base; + u64 lmbsize = lmb.memory.region[i].size; if (max_addr == LMB_ALLOC_ANYWHERE) - base = _ALIGN_DOWN(lmbbase + lmbsize - size, align); + base = lmb_align_down(lmbbase + lmbsize - size, align); else if (lmbbase < max_addr) { base = min(lmbbase + lmbsize, max_addr); - base = _ALIGN_DOWN(base - size, align); + base = lmb_align_down(base - size, align); } else continue; while ((lmbbase <= base) && ((j = lmb_overlaps_region(&lmb.reserved, base, size)) >= 0) ) - base = _ALIGN_DOWN(lmb.reserved.region[j].base - size, - align); + base = lmb_align_down(lmb.reserved.region[j].base - size, + align); if ((base != 0) && (lmbbase <= base)) break; @@ -284,18 +295,19 @@ unsigned long __init __lmb_alloc_base(unsigned long size, unsigned long align, if (i < 0) return 0; - lmb_add_region(&lmb.reserved, base, size); + if (lmb_add_region(&lmb.reserved, base, lmb_align_up(size, align)) < 0) + return 0; return base; } /* You must call lmb_analyze() before this. */ -unsigned long __init lmb_phys_mem_size(void) +u64 __init lmb_phys_mem_size(void) { return lmb.memory.size; } -unsigned long __init lmb_end_of_DRAM(void) +u64 __init lmb_end_of_DRAM(void) { int idx = lmb.memory.cnt - 1; @@ -303,9 +315,10 @@ unsigned long __init lmb_end_of_DRAM(void) } /* You must call lmb_analyze() after this. */ -void __init lmb_enforce_memory_limit(unsigned long memory_limit) +void __init lmb_enforce_memory_limit(u64 memory_limit) { - unsigned long i, limit; + unsigned long i; + u64 limit; struct lmb_property *p; if (! memory_limit) @@ -343,13 +356,13 @@ void __init lmb_enforce_memory_limit(unsigned long memory_limit) } } -int __init lmb_is_reserved(unsigned long addr) +int __init lmb_is_reserved(u64 addr) { int i; for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long upper = lmb.reserved.region[i].base + - lmb.reserved.region[i].size - 1; + u64 upper = lmb.reserved.region[i].base + + lmb.reserved.region[i].size - 1; if ((addr >= lmb.reserved.region[i].base) && (addr <= upper)) return 1; } |