diff options
74 files changed, 606 insertions, 272 deletions
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt index 06fc7602593..37b2cafa4e5 100644 --- a/Documentation/devicetree/bindings/arm/arch_timer.txt +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt @@ -19,6 +19,9 @@ to deliver its interrupts via SPIs. - clock-frequency : The frequency of the main counter, in Hz. Optional. +- always-on : a boolean property. If present, the timer is powered through an + always-on power domain, therefore it never loses context. + Example: timer { diff --git a/Documentation/devicetree/bindings/ata/apm-xgene.txt b/Documentation/devicetree/bindings/ata/apm-xgene.txt index 7bcfbf59810..a668f0e7d00 100644 --- a/Documentation/devicetree/bindings/ata/apm-xgene.txt +++ b/Documentation/devicetree/bindings/ata/apm-xgene.txt @@ -24,6 +24,7 @@ Required properties: * "sata-phy" for the SATA 6.0Gbps PHY Optional properties: +- dma-coherent : Present if dma operations are coherent - status : Shall be "ok" if enabled or "disabled" if disabled. Default is "ok". @@ -55,6 +56,7 @@ Example: <0x0 0x1f22e000 0x0 0x1000>, <0x0 0x1f227000 0x0 0x1000>; interrupts = <0x0 0x87 0x4>; + dma-coherent; status = "ok"; clocks = <&sataclk 0>; phys = <&phy2 0>; @@ -69,6 +71,7 @@ Example: <0x0 0x1f23e000 0x0 0x1000>, <0x0 0x1f237000 0x0 0x1000>; interrupts = <0x0 0x88 0x4>; + dma-coherent; status = "ok"; clocks = <&sataclk 0>; phys = <&phy3 0>; diff --git a/MAINTAINERS b/MAINTAINERS index 37a36aa9417..10662644212 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5114,14 +5114,19 @@ F: drivers/s390/kvm/ KERNEL VIRTUAL MACHINE (KVM) FOR ARM M: Christoffer Dall <christoffer.dall@linaro.org> +M: Marc Zyngier <marc.zyngier@arm.com> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu W: http://systems.cs.columbia.edu/projects/kvm-arm S: Supported F: arch/arm/include/uapi/asm/kvm* F: arch/arm/include/asm/kvm* F: arch/arm/kvm/ +F: virt/kvm/arm/ +F: include/kvm/arm_* KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) +M: Christoffer Dall <christoffer.dall@linaro.org> M: Marc Zyngier <marc.zyngier@arm.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd299b1a..4be5bb150bd 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,7 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST - depends on ARM_VIRT_EXT && ARM_LPAE + depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN ---help--- Support hosting virtualized guest machines. You will also need to select one or more of the processor modules below. diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 80bb1e6c2c2..16f804938b8 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -42,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) @@ -293,14 +295,14 @@ void free_boot_hyp_pgd(void) if (boot_hyp_pgd) { unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(boot_hyp_pgd); + free_pages((unsigned long)boot_hyp_pgd, pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(init_bounce_page); + free_page((unsigned long)init_bounce_page); init_bounce_page = NULL; mutex_unlock(&kvm_hyp_pgd_mutex); @@ -330,7 +332,7 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); - kfree(hyp_pgd); + free_pages((unsigned long)hyp_pgd, pgd_order); hyp_pgd = NULL; } @@ -1024,7 +1026,7 @@ int kvm_mmu_init(void) size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; phys_addr_t phys_base; - init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + init_bounce_page = (void *)__get_free_page(GFP_KERNEL); if (!init_bounce_page) { kvm_err("Couldn't allocate HYP init bounce page\n"); err = -ENOMEM; @@ -1050,8 +1052,9 @@ int kvm_mmu_init(void) (unsigned long)phys_base); } - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); err = -ENOMEM; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index 93f4b2dd924..f8c40a66e65 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -307,6 +307,7 @@ <0x0 0x1f21e000 0x0 0x1000>, <0x0 0x1f217000 0x0 0x1000>; interrupts = <0x0 0x86 0x4>; + dma-coherent; status = "disabled"; clocks = <&sata01clk 0>; phys = <&phy1 0>; @@ -321,6 +322,7 @@ <0x0 0x1f22e000 0x0 0x1000>, <0x0 0x1f227000 0x0 0x1000>; interrupts = <0x0 0x87 0x4>; + dma-coherent; status = "ok"; clocks = <&sata23clk 0>; phys = <&phy2 0>; @@ -334,6 +336,7 @@ <0x0 0x1f23d000 0x0 0x1000>, <0x0 0x1f23e000 0x0 0x1000>; interrupts = <0x0 0x88 0x4>; + dma-coherent; status = "ok"; clocks = <&sata45clk 0>; phys = <&phy3 0>; diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c index ffbbdde7aba..2dc36d00add 100644 --- a/arch/arm64/kernel/early_printk.c +++ b/arch/arm64/kernel/early_printk.c @@ -143,10 +143,8 @@ static int __init setup_early_printk(char *buf) } /* no options parsing yet */ - if (paddr) { - set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr); - early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE); - } + if (paddr) + early_base = (void __iomem *)set_fixmap_offset_io(FIX_EARLYCON_MEM_BASE, paddr); printch = match->printch; early_console = &early_console_dev; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93e7df8968f..7ec784653b2 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -396,7 +396,7 @@ static int __init arm64_device_init(void) of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } -arch_initcall(arm64_device_init); +arch_initcall_sync(arm64_device_init); static DEFINE_PER_CPU(struct cpu, cpu_data); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0ba347e59f0..c851eb44dc5 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -22,8 +22,11 @@ #include <linux/slab.h> #include <linux/dma-mapping.h> #include <linux/dma-contiguous.h> +#include <linux/of.h> +#include <linux/platform_device.h> #include <linux/vmalloc.h> #include <linux/swiotlb.h> +#include <linux/amba/bus.h> #include <asm/cacheflush.h> @@ -305,17 +308,45 @@ struct dma_map_ops coherent_swiotlb_dma_ops = { }; EXPORT_SYMBOL(coherent_swiotlb_dma_ops); +static int dma_bus_notifier(struct notifier_block *nb, + unsigned long event, void *_dev) +{ + struct device *dev = _dev; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + if (of_property_read_bool(dev->of_node, "dma-coherent")) + set_dma_ops(dev, &coherent_swiotlb_dma_ops); + + return NOTIFY_OK; +} + +static struct notifier_block platform_bus_nb = { + .notifier_call = dma_bus_notifier, +}; + +static struct notifier_block amba_bus_nb = { + .notifier_call = dma_bus_notifier, +}; + extern int swiotlb_late_init_with_default_size(size_t default_size); static int __init swiotlb_late_init(void) { size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); - dma_ops = &coherent_swiotlb_dma_ops; + /* + * These must be registered before of_platform_populate(). + */ + bus_register_notifier(&platform_bus_type, &platform_bus_nb); + bus_register_notifier(&amba_bustype, &amba_bus_nb); + + dma_ops = &noncoherent_swiotlb_dma_ops; return swiotlb_late_init_with_default_size(swiotlb_size); } -subsys_initcall(swiotlb_late_init); +arch_initcall(swiotlb_late_init); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6b7e89569a3..0a472c41a67 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -374,6 +374,9 @@ int kern_addr_valid(unsigned long addr) if (pmd_none(*pmd)) return 0; + if (pmd_sect(*pmd)) + return pfn_valid(pmd_pfn(*pmd)); + pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) return 0; diff --git a/arch/hexagon/include/asm/barrier.h b/arch/hexagon/include/asm/barrier.h deleted file mode 100644 index 4e863daea25..00000000000 --- a/arch/hexagon/include/asm/barrier.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Memory barrier definitions for the Hexagon architecture - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef _ASM_BARRIER_H -#define _ASM_BARRIER_H - -#define rmb() barrier() -#define read_barrier_depends() barrier() -#define wmb() barrier() -#define mb() barrier() -#define smp_rmb() barrier() -#define smp_read_barrier_depends() barrier() -#define smp_wmb() barrier() -#define smp_mb() barrier() - -/* Set a value and use a memory barrier. Used by the scheduler somewhere. */ -#define set_mb(var, value) \ - do { var = value; mb(); } while (0) - -#endif /* _ASM_BARRIER_H */ diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild index a580642555b..348356c9951 100644 --- a/arch/parisc/include/uapi/asm/Kbuild +++ b/arch/parisc/include/uapi/asm/Kbuild @@ -1,6 +1,8 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += resource.h + header-y += bitsperlong.h header-y += byteorder.h header-y += errno.h @@ -13,7 +15,6 @@ header-y += msgbuf.h header-y += pdc.h header-y += posix_types.h header-y += ptrace.h -header-y += resource.h header-y += sembuf.h header-y += setup.h header-y += shmbuf.h diff --git a/arch/parisc/include/uapi/asm/resource.h b/arch/parisc/include/uapi/asm/resource.h deleted file mode 100644 index 8b06343b62e..00000000000 --- a/arch/parisc/include/uapi/asm/resource.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _ASM_PARISC_RESOURCE_H -#define _ASM_PARISC_RESOURCE_H - -#define _STK_LIM_MAX 10 * _STK_LIM -#include <asm-generic/resource.h> - -#endif diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9c36dc398f9..452d3ebd9d0 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -276,7 +276,6 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) case BPF_S_LD_W_IND: case BPF_S_LD_H_IND: case BPF_S_LD_B_IND: - case BPF_S_LDX_B_MSH: case BPF_S_LD_IMM: case BPF_S_LD_MEM: case BPF_S_MISC_TXA: diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d1b7c377a23..ce6ad7e6a7d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -83,7 +83,9 @@ else KBUILD_CFLAGS += -m64 # Don't autogenerate traditional x87, MMX or SSE instructions - KBUILD_CFLAGS += -mno-mmx -mno-sse -mno-80387 -mno-fp-ret-in-387 + KBUILD_CFLAGS += -mno-mmx -mno-sse + KBUILD_CFLAGS += $(call cc-option,-mno-80387) + KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6ad4658de70..d23aa82e7a7 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3425,6 +3425,11 @@ int get_nr_irqs_gsi(void) return nr_irqs_gsi; } +unsigned int arch_dynirq_lower_bound(unsigned int from) +{ + return from < nr_irqs_gsi ? nr_irqs_gsi : from; +} + int __init arch_probe_nr_irqs(void) { int nr; diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 7c87424d414..619f7699487 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -543,7 +543,8 @@ static int rapl_cpu_prepare(int cpu) if (phys_id < 0) return -1; - if (!rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) + /* protect rdmsrl() to handle virtualization */ + if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) return -1; pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index f6584a90aba..5edc34b5b95 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -26,6 +26,9 @@ #define TOPOLOGY_REGISTER_OFFSET 0x10 +/* Flag below is initialized once during vSMP PCI initialization. */ +static int irq_routing_comply = 1; + #if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* * Interrupt control on vSMPowered systems: @@ -101,6 +104,10 @@ static void __init set_vsmp_pv_ops(void) #ifdef CONFIG_SMP if (cap & ctl & BIT(8)) { ctl &= ~BIT(8); + + /* Interrupt routing set to ignore */ + irq_routing_comply = 0; + #ifdef CONFIG_PROC_FS /* Don't let users change irq affinity via procfs */ no_irq_affinity = 1; @@ -218,7 +225,9 @@ static void vsmp_apic_post_init(void) { /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; - apic->vector_allocation_domain = fill_vector_allocation_domain; + + if (!irq_routing_comply) + apic->vector_allocation_domain = fill_vector_allocation_domain; } void __init vsmp_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1f68c583192..33e8c028842 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -503,7 +503,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) [number##_HIGH] = VMCS12_OFFSET(name)+4 -static const unsigned long shadow_read_only_fields[] = { +static unsigned long shadow_read_only_fields[] = { /* * We do NOT shadow fields that are modified when L0 * traps and emulates any vmx instruction (e.g. VMPTRLD, @@ -526,10 +526,10 @@ static const unsigned long shadow_read_only_fields[] = { GUEST_LINEAR_ADDRESS, GUEST_PHYSICAL_ADDRESS }; -static const int max_shadow_read_only_fields = +static int max_shadow_read_only_fields = ARRAY_SIZE(shadow_read_only_fields); -static const unsigned long shadow_read_write_fields[] = { +static unsigned long shadow_read_write_fields[] = { GUEST_RIP, GUEST_RSP, GUEST_CR0, @@ -558,7 +558,7 @@ static const unsigned long shadow_read_write_fields[] = { HOST_FS_SELECTOR, HOST_GS_SELECTOR }; -static const int max_shadow_read_write_fields = +static int max_shadow_read_write_fields = ARRAY_SIZE(shadow_read_write_fields); static const unsigned short vmcs_field_to_offset_table[] = { @@ -3009,6 +3009,41 @@ static void free_kvm_area(void) } } +static void init_vmcs_shadow_fields(void) +{ + int i, j; + + /* No checks for read only fields yet */ + + for (i = j = 0; i < max_shadow_read_write_fields; i++) { + switch (shadow_read_write_fields[i]) { + case GUEST_BNDCFGS: + if (!vmx_mpx_supported()) + continue; + break; + default: + break; + } + + if (j < i) + shadow_read_write_fields[j] = + shadow_read_write_fields[i]; + j++; + } + max_shadow_read_write_fields = j; + + /* shadowed fields guest access without vmexit */ + for (i = 0; i < max_shadow_read_write_fields; i++) { + clear_bit(shadow_read_write_fields[i], + vmx_vmwrite_bitmap); + clear_bit(shadow_read_write_fields[i], + vmx_vmread_bitmap); + } + for (i = 0; i < max_shadow_read_only_fields; i++) + clear_bit(shadow_read_only_fields[i], + vmx_vmread_bitmap); +} + static __init int alloc_kvm_area(void) { int cpu; @@ -3039,6 +3074,8 @@ static __init int hardware_setup(void) enable_vpid = 0; if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; + if (enable_shadow_vmcs) + init_vmcs_shadow_fields(); if (!cpu_has_vmx_ept() || !cpu_has_vmx_ept_4levels()) { @@ -8803,14 +8840,6 @@ static int __init vmx_init(void) memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* shadowed read/write fields */ - for (i = 0; i < max_shadow_read_write_fields; i++) { - clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap); - clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap); - } - /* shadowed read only fields */ - for (i = 0; i < max_shadow_read_only_fields; i++) - clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap); /* * Allow direct access to the PC debug port (it is often used for I/O diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index c29c2c3ec0a..b06f5f55ada 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -170,6 +170,9 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) acpi_status status; int ret; + if (pr->apic_id == -1) + return -ENODEV; + status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta); if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_PRESENT)) return -ENODEV; @@ -260,10 +263,8 @@ static int acpi_processor_get_info(struct acpi_device *device) } apic_id = acpi_get_apicid(pr->handle, device_declaration, pr->acpi_id); - if (apic_id < 0) { + if (apic_id < 0) acpi_handle_debug(pr->handle, "failed to get CPU APIC ID.\n"); - return -ENODEV; - } pr->apic_id = apic_id; cpu_index = acpi_map_cpuid(pr->apic_id, pr->acpi_id); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index d7d32c28829..ad11ba4a412 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -206,13 +206,13 @@ unlock: spin_unlock_irqrestore(&ec->lock, flags); } -static int acpi_ec_sync_query(struct acpi_ec *ec); +static int acpi_ec_sync_query(struct acpi_ec *ec, u8 *data); static int ec_check_sci_sync(struct acpi_ec *ec, u8 state) { if (state & ACPI_EC_FLAG_SCI) { if (!test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) - return acpi_ec_sync_query(ec); + return acpi_ec_sync_query(ec, NULL); } return 0; } @@ -443,10 +443,8 @@ acpi_handle ec_get_handle(void) EXPORT_SYMBOL(ec_get_handle); -static int acpi_ec_query_unlocked(struct acpi_ec *ec, u8 *data); - /* - * Clears stale _Q events that might have accumulated in the EC. + * Process _Q events that might have accumulated in the EC. * Run with locked ec mutex. */ static void acpi_ec_clear(struct acpi_ec *ec) @@ -455,7 +453,7 @@ static void acpi_ec_clear(struct acpi_ec *ec) u8 value = 0; for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) { - status = acpi_ec_query_unlocked(ec, &value); + status = acpi_ec_sync_query(ec, &value); if (status || !value) break; } @@ -582,13 +580,18 @@ static void acpi_ec_run(void *cxt) kfree(handler); } -static int acpi_ec_sync_query(struct acpi_ec *ec) +static int acpi_ec_sync_query(struct acpi_ec *ec, u8 *data) { u8 value = 0; int status; struct acpi_ec_query_handler *handler, *copy; - if ((status = acpi_ec_query_unlocked(ec, &value))) + + status = acpi_ec_query_unlocked(ec, &value); + if (data) + *data = value; + if (status) return status; + list_for_each_entry(handler, &ec->list, node) { if (value == handler->query_bit) { /* have custom handler for this bit */ @@ -612,7 +615,7 @@ static void acpi_ec_gpe_query(void *ec_cxt) if (!ec) return; mutex_lock(&ec->mutex); - acpi_ec_sync_query(ec); + acpi_ec_sync_query(ec, NULL); mutex_unlock(&ec->mutex); } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 8986b9f2278..62ec61e8f84 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -52,6 +52,7 @@ static DEFINE_MUTEX(deferred_probe_mutex); static LIST_HEAD(deferred_probe_pending_list); static LIST_HEAD(deferred_probe_active_list); static struct workqueue_struct *deferred_wq; +static atomic_t deferred_trigger_count = ATOMIC_INIT(0); /** * deferred_probe_work_func() - Retry probing devices in the active list. @@ -135,6 +136,17 @@ static bool driver_deferred_probe_enable = false; * This functions moves all devices from the pending list to the active * list and schedules the deferred probe workqueue to process them. It * should be called anytime a driver is successfully bound to a device. + * + * Note, there is a race condition in multi-threaded probe. In the case where + * more than one device is probing at the same time, it is possible for one + * probe to complete successfully while another is about to defer. If the second + * depends on the first, then it will get put on the pending list after the + * trigger event has already occured and will be stuck there. + * + * The atomic 'deferred_trigger_count' is used to determine if a successful + * trigger has occurred in the midst of probing a driver. If the trigger count + * changes in the midst of a probe, then deferred processing should be triggered + * again. */ static void driver_deferred_probe_trigger(void) { @@ -147,6 +159,7 @@ static void driver_deferred_probe_trigger(void) * into the active list so they can be retried by the workqueue */ mutex_lock(&deferred_probe_mutex); + atomic_inc(&deferred_trigger_count); list_splice_tail_init(&deferred_probe_pending_list, &deferred_probe_active_list); mutex_unlock(&deferred_probe_mutex); @@ -265,6 +278,7 @@ static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue); static int really_probe(struct device *dev, struct device_driver *drv) { int ret = 0; + int local_trigger_count = atomic_read(&deferred_trigger_count); atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", @@ -310,6 +324,9 @@ probe_failed: /* Driver requested deferred probing */ dev_info(dev, "Driver %s requests probe deferral\n", drv->name); driver_deferred_probe_add(dev); + /* Did a trigger occur while probing? Need to re-trigger if yes */ + if (local_trigger_count != atomic_read(&deferred_trigger_count)) + driver_deferred_probe_trigger(); } else if (ret != -ENODEV && ret != -ENXIO) { /* driver matched but the probe failed */ printk(KERN_WARNING diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c index a535c7bf857..422391242b3 100644 --- a/drivers/clk/versatile/clk-vexpress-osc.c +++ b/drivers/clk/versatile/clk-vexpress-osc.c @@ -100,6 +100,8 @@ void __init vexpress_osc_of_setup(struct device_node *node) struct clk *clk; u32 range[2]; + vexpress_sysreg_of_early_init(); + osc = kzalloc(sizeof(*osc), GFP_KERNEL); if (!osc) return; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 57e823c44d2..5163ec13429 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -66,6 +66,7 @@ static int arch_timer_ppi[MAX_TIMER_PPI]; static struct clock_event_device __percpu *arch_timer_evt; static bool arch_timer_use_virtual = true; +static bool arch_timer_c3stop; static bool arch_timer_mem_use_virtual; /* @@ -263,7 +264,8 @@ static void __arch_timer_setup(unsigned type, clk->features = CLOCK_EVT_FEAT_ONESHOT; if (type == ARCH_CP15_TIMER) { - clk->features |= CLOCK_EVT_FEAT_C3STOP; + if (arch_timer_c3stop) + clk->features |= CLOCK_EVT_FEAT_C3STOP; clk->name = "arch_sys_timer"; clk->rating = 450; clk->cpumask = cpumask_of(smp_processor_id()); @@ -665,6 +667,8 @@ static void __init arch_timer_init(struct device_node *np) } } + arch_timer_c3stop = !of_property_read_bool(np, "always-on"); + arch_timer_register(); arch_timer_common_init(); } diff --git a/drivers/clocksource/zevio-timer.c b/drivers/clocksource/zevio-timer.c index ca81809d159..7ce442148c3 100644 --- a/drivers/clocksource/zevio-timer.c +++ b/drivers/clocksource/zevio-timer.c @@ -212,4 +212,9 @@ error_free: return ret; } -CLOCKSOURCE_OF_DECLARE(zevio_timer, "lsi,zevio-timer", zevio_timer_add); +static void __init zevio_timer_init(struct device_node *node) +{ + BUG_ON(zevio_timer_add(node)); +} + +CLOCKSOURCE_OF_DECLARE(zevio_timer, "lsi,zevio-timer", zevio_timer_init); diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index d00e5d1abd2..5c4369b5d83 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -242,7 +242,7 @@ static void do_powersaver(int cx_address, unsigned int mults_index, * Sets a new clock ratio. */ -static void longhaul_setstate(struct cpufreq_policy *policy, +static int longhaul_setstate(struct cpufreq_policy *policy, unsigned int table_index) { unsigned int mults_index; @@ -258,10 +258,12 @@ static void longhaul_setstate(struct cpufreq_policy *policy, /* Safety precautions */ mult = mults[mults_index & 0x1f]; if (mult == -1) - return; + return -EINVAL; + speed = calc_speed(mult); if ((speed > highest_speed) || (speed < lowest_speed)) - return; + return -EINVAL; + /* Voltage transition before frequency transition? */ if (can_scale_voltage && longhaul_index < table_index) dir = 1; @@ -269,8 +271,6 @@ static void longhaul_setstate(struct cpufreq_policy *policy, freqs.old = calc_speed(longhaul_get_cpu_mult()); freqs.new = speed; - cpufreq_freq_transition_begin(policy, &freqs); - pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", fsb, mult/10, mult%10, print_speed(speed/1000)); retry_loop: @@ -385,12 +385,14 @@ retry_loop: goto retry_loop; } } - /* Report true CPU frequency */ - cpufreq_freq_transition_end(policy, &freqs, 0); - if (!bm_timeout) + if (!bm_timeout) { printk(KERN_INFO PFX "Warning: Timeout while waiting for " "idle PCI bus.\n"); + return -EBUSY; + } + + return 0; } /* @@ -631,9 +633,10 @@ static int longhaul_target(struct cpufreq_policy *policy, unsigned int i; unsigned int dir = 0; u8 vid, current_vid; + int retval = 0; if (!can_scale_voltage) - longhaul_setstate(policy, table_index); + retval = longhaul_setstate(policy, table_index); else { /* On test system voltage transitions exceeding single * step up or down were turning motherboard off. Both @@ -648,7 +651,7 @@ static int longhaul_target(struct cpufreq_policy *policy, while (i != table_index) { vid = (longhaul_table[i].driver_data >> 8) & 0x1f; if (vid != current_vid) { - longhaul_setstate(policy, i); + retval = longhaul_setstate(policy, i); current_vid = vid; msleep(200); } @@ -657,10 +660,11 @@ static int longhaul_target(struct cpufreq_policy *policy, else i--; } - longhaul_setstate(policy, table_index); + retval = longhaul_setstate(policy, table_index); } + longhaul_index = table_index; - return 0; + return retval; } @@ -968,7 +972,15 @@ static void __exit longhaul_exit(void) for (i = 0; i < numscales; i++) { if (mults[i] == maxmult) { + struct cpufreq_freqs freqs; + + freqs.old = policy->cur; + freqs.new = longhaul_table[i].frequency; + freqs.flags = 0; + + cpufreq_freq_transition_begin(policy, &freqs); longhaul_setstate(policy, i); + cpufreq_freq_transition_end(policy, &freqs, 0); break; } } diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c index 49f120e1bc7..78904e6ca4a 100644 --- a/drivers/cpufreq/powernow-k6.c +++ b/drivers/cpufreq/powernow-k6.c @@ -138,22 +138,14 @@ static void powernow_k6_set_cpu_multiplier(unsigned int best_i) static int powernow_k6_target(struct cpufreq_policy *policy, unsigned int best_i) { - struct cpufreq_freqs freqs; if (clock_ratio[best_i].driver_data > max_multiplier) { printk(KERN_ERR PFX "invalid target frequency\n"); return -EINVAL; } - freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); - freqs.new = busfreq * clock_ratio[best_i].driver_data; - - cpufreq_freq_transition_begin(policy, &freqs); - powernow_k6_set_cpu_multiplier(best_i); - cpufreq_freq_transition_end(policy, &freqs, 0); - return 0; } @@ -227,9 +219,20 @@ have_busfreq: static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) { unsigned int i; - for (i = 0; i < 8; i++) { - if (i == max_multiplier) + + for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + if (clock_ratio[i].driver_data == max_multiplier) { + struct cpufreq_freqs freqs; + + freqs.old = policy->cur; + freqs.new = clock_ratio[i].frequency; + freqs.flags = 0; + + cpufreq_freq_transition_begin(policy, &freqs); powernow_k6_target(policy, i); + cpufreq_freq_transition_end(policy, &freqs, 0); + break; + } } return 0; } diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index f911645c3f6..e61e224475a 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -269,8 +269,6 @@ static int powernow_target(struct cpufreq_policy *policy, unsigned int index) freqs.new = powernow_table[index].frequency; - cpufreq_freq_transition_begin(policy, &freqs); - /* Now do the magic poking into the MSRs. */ if (have_a0 == 1) /* A0 errata 5 */ @@ -290,8 +288,6 @@ static int powernow_target(struct cpufreq_policy *policy, unsigned int index) if (have_a0 == 1) local_irq_enable(); - cpufreq_freq_transition_end(policy, &freqs, 0); - return 0; } diff --git a/drivers/cpufreq/ppc-corenet-cpufreq.c b/drivers/cpufreq/ppc-corenet-cpufreq.c index a1ca3dd04a8..0af618abeba 100644 --- a/drivers/cpufreq/ppc-corenet-cpufreq.c +++ b/drivers/cpufreq/ppc-corenet-cpufreq.c @@ -138,6 +138,7 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy) struct cpufreq_frequency_table *table; struct cpu_data *data; unsigned int cpu = policy->cpu; + u64 transition_latency_hz; np = of_get_cpu_node(cpu, NULL); if (!np) @@ -205,8 +206,10 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy) for_each_cpu(i, per_cpu(cpu_mask, cpu)) per_cpu(cpu_data, i) = data; + transition_latency_hz = 12ULL * NSEC_PER_SEC; policy->cpuinfo.transition_latency = - (12ULL * NSEC_PER_SEC) / fsl_get_sys_freq(); + do_div(transition_latency_hz, fsl_get_sys_freq()); + of_node_put(np); return 0; diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 6d02e3b0637..d76f0b70c6e 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -365,12 +365,12 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) if (cpu_has_tjmax(c)) dev_warn(dev, "Unable to read TjMax from CPU %u\n", id); } else { - val = (eax >> 16) & 0x7f; + val = (eax >> 16) & 0xff; /* * If the TjMax is not plausible, an assumption * will be used */ - if (val >= 85) { + if (val) { dev_dbg(dev, "TjMax is %d degrees C\n", val); return val * 1000; } diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index d4e8983fba5..23f38cf2c5c 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_CXGB4 - tristate "Chelsio T4 RDMA Driver" + tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) select GENERIC_ALLOCATOR ---help--- - This is an iWARP/RDMA driver for the Chelsio T4 1GbE and - 10GbE adapters. + This is an iWARP/RDMA driver for the Chelsio T4 and T5 + 1GbE, 10GbE adapters and T5 40GbE adapter. For general information about Chelsio and our products, visit our website at <http://www.chelsio.com>. diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 185452abf32..1f863a96a48 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -587,6 +587,10 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + } t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { @@ -996,7 +1000,7 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status) static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -1154,7 +1158,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) return credits; } -static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) +static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; @@ -1164,6 +1168,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; int err; + int disconnect = 0; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); @@ -1173,7 +1178,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * will abort the connection. */ if (stop_ep_timer(ep)) - return; + return 0; /* * If we get more than the supported amount of private data @@ -1195,7 +1200,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * if we don't even have the mpa message, then bail. */ if (ep->mpa_pkt_len < sizeof(*mpa)) - return; + return 0; mpa = (struct mpa_message *) ep->mpa_pkt; /* Validate MPA header. */ @@ -1235,7 +1240,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; + return 0; if (mpa->flags & MPA_REJECT) { err = -ECONNREFUSED; @@ -1337,9 +1342,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_NOMATCH_RTR; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; + disconnect = 1; goto out; } @@ -1355,9 +1362,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_INSUFF_IRD; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; + disconnect = 1; goto out; } goto out; @@ -1366,7 +1375,7 @@ err: send_abort(ep, skb, GFP_KERNEL); out: connect_reply_upcall(ep, err); - return; + return disconnect; } static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) @@ -1524,6 +1533,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; __u8 status = hdr->status; + int disconnect = 0; ep = lookup_tid(t, tid); if (!ep) @@ -1539,7 +1549,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) switch (ep->com.state) { case MPA_REQ_SENT: ep->rcv_seq += dlen; - process_mpa_reply(ep, skb); + disconnect = process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: ep->rcv_seq += dlen; @@ -1555,13 +1565,16 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) ep->com.state, ep->hwtid, status); attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + disconnect = 1; break; } default: break; } mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); return 0; } @@ -2009,6 +2022,10 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN(1); } + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + } rpl = cplhdr(skb); INIT_TP_WR(rpl, ep->hwtid); @@ -3482,9 +3499,9 @@ static void process_timeout(struct c4iw_ep *ep) __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - mutex_unlock(&ep->com.mutex); if (abort) abort_connection(ep, NULL, GFP_KERNEL); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7b8c5806a09..7474b490760 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -435,6 +435,7 @@ struct c4iw_qp_attributes { u8 ecode; u16 sq_db_inc; u16 rq_db_inc; + u8 send_term; }; struct c4iw_qp { diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 7b5114cb486..086f62f5dc9 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1388,11 +1388,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; ep = qhp->ep; - disconnect = 1; - c4iw_get_ep(&qhp->ep->com); - if (!internal) + if (!internal) { + c4iw_get_ep(&qhp->ep->com); terminate = 1; - else { + disconnect = 1; + } else { + terminate = qhp->attr.send_term; ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; @@ -1776,11 +1777,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /* * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for * ringing the queue db when we're in DB_FULL mode. + * Only allow this on T4 devices. */ attrs.sq_db_inc = attr->sq_psn; attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) + return -EINVAL; return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index dc193c29267..6121ca08fe5 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -836,4 +836,18 @@ struct ulptx_idata { #define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) #define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) +enum { /* TCP congestion control algorithms */ + CONG_ALG_RENO, + CONG_ALG_TAHOE, + CONG_ALG_NEWRENO, + CONG_ALG_HIGHSPEED +}; + +#define S_CONG_CNTRL 14 +#define M_CONG_CNTRL 0x3 +#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) +#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) + +#define T5_OPT_2_VALID (1 << 31) + #endif /* _T4FW_RI_API_H_ */ diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 41be897df8d..3899ba7821c 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -41,6 +41,7 @@ #define ARMADA_370_XP_INT_SET_ENABLE_OFFS (0x30) #define ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS (0x34) #define ARMADA_370_XP_INT_SOURCE_CTL(irq) (0x100 + irq*4) +#define ARMADA_370_XP_INT_SOURCE_CPU_MASK 0xF #define ARMADA_370_XP_CPU_INTACK_OFFS (0x44) #define ARMADA_375_PPI_CAUSE (0x10) @@ -132,8 +133,7 @@ static int armada_370_xp_setup_msi_irq(struct msi_chip *chip, struct msi_desc *desc) { struct msi_msg msg; - irq_hw_number_t hwirq; - int virq; + int virq, hwirq; hwirq = armada_370_xp_alloc_msi(); if (hwirq < 0) @@ -159,8 +159,19 @@ static void armada_370_xp_teardown_msi_irq(struct msi_chip *chip, unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); + unsigned long hwirq = d->hwirq; + irq_dispose_mapping(irq); - armada_370_xp_free_msi(d->hwirq); + armada_370_xp_free_msi(hwirq); +} + +static int armada_370_xp_check_msi_device(struct msi_chip *chip, struct pci_dev *dev, + int nvec, int type) +{ + /* We support MSI, but not MSI-X */ + if (type == PCI_CAP_ID_MSI) + return 0; + return -EINVAL; } static struct irq_chip armada_370_xp_msi_irq_chip = { @@ -201,6 +212,7 @@ static int armada_370_xp_msi_init(struct device_node *node, msi_chip->setup_irq = armada_370_xp_setup_msi_irq; msi_chip->teardown_irq = armada_370_xp_teardown_msi_irq; + msi_chip->check_device = armada_370_xp_check_msi_device; msi_chip->of_node = node; armada_370_xp_msi_domain = @@ -244,35 +256,18 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock); static int armada_xp_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { - unsigned long reg; - unsigned long new_mask = 0; - unsigned long online_mask = 0; - unsigned long count = 0; irq_hw_number_t hwirq = irqd_to_hwirq(d); + unsigned long reg, mask; int cpu; - for_each_cpu(cpu, mask_val) { - new_mask |= 1 << cpu_logical_map(cpu); - count++; - } - - /* - * Forbid mutlicore interrupt affinity - * This is required since the MPIC HW doesn't limit - * several CPUs from acknowledging the same interrupt. - */ - if (count > 1) - return -EINVAL; - - for_each_cpu(cpu, cpu_online_mask) - online_mask |= 1 << cpu_logical_map(cpu); + /* Select a single core from the affinity mask which is online */ + cpu = cpumask_any_and(mask_val, cpu_online_mask); + mask = 1UL << cpu_logical_map(cpu); raw_spin_lock(&irq_controller_lock); - reg = readl(main_int_base + ARMADA_370_XP_INT_SOURCE_CTL(hwirq)); - reg = (reg & (~online_mask)) | new_mask; + reg = (reg & (~ARMADA_370_XP_INT_SOURCE_CPU_MASK)) | mask; writel(reg, main_int_base + ARMADA_370_XP_INT_SOURCE_CTL(hwirq)); - raw_spin_unlock(&irq_controller_lock); return 0; @@ -494,15 +489,6 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, #ifdef CONFIG_SMP armada_xp_mpic_smp_cpu_init(); - - /* - * Set the default affinity from all CPUs to the boot cpu. - * This is required since the MPIC doesn't limit several CPUs - * from acknowledging the same interrupt. - */ - cpumask_clear(irq_default_affinity); - cpumask_set_cpu(smp_processor_id(), irq_default_affinity); - #endif armada_370_xp_msi_init(node, main_int_res.start); diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index fc817d28d1f..3d15d16a708 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -107,7 +107,7 @@ static int __init crossbar_of_init(struct device_node *node) int i, size, max, reserved = 0, entry; const __be32 *irqsr; - cb = kzalloc(sizeof(struct cb_device *), GFP_KERNEL); + cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1bf4a71919e..9380be7b189 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2488,6 +2488,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) } else { inc_hit_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && !is_dirty(cache, lookup_result.cblock)) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 53728be84de..13abade76ad 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -232,6 +232,13 @@ struct thin_c { struct bio_list deferred_bio_list; struct bio_list retry_on_resume_list; struct rb_root sort_bio_list; /* sorted list of deferred bios */ + + /* + * Ensures the thin is not destroyed until the worker has finished + * iterating the active_thins list. + */ + atomic_t refcount; + struct completion can_destroy; }; /*----------------------------------------------------------------*/ @@ -1486,6 +1493,45 @@ static void process_thin_deferred_bios(struct thin_c *tc) blk_finish_plug(&plug); } +static void thin_get(struct thin_c *tc); +static void thin_put(struct thin_c *tc); + +/* + * We can't hold rcu_read_lock() around code that can block. So we + * find a thin with the rcu lock held; bump a refcount; then drop + * the lock. + */ +static struct thin_c *get_first_thin(struct pool *pool) +{ + struct thin_c *tc = NULL; + + rcu_read_lock(); + if (!list_empty(&pool->active_thins)) { + tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list); + thin_get(tc); + } + rcu_read_unlock(); + + return tc; +} + +static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc) +{ + struct thin_c *old_tc = tc; + + rcu_read_lock(); + list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) { + thin_get(tc); + thin_put(old_tc); + rcu_read_unlock(); + return tc; + } + thin_put(old_tc); + rcu_read_unlock(); + + return NULL; +} + static void process_deferred_bios(struct pool *pool) { unsigned long flags; @@ -1493,10 +1539,11 @@ static void process_deferred_bios(struct pool *pool) struct bio_list bios; struct thin_c *tc; - rcu_read_lock(); - list_for_each_entry_rcu(tc, &pool->active_thins, list) + tc = get_first_thin(pool); + while (tc) { process_thin_deferred_bios(tc); - rcu_read_unlock(); + tc = get_next_thin(pool, tc); + } /* * If there are any deferred flush bios, we must commit @@ -1578,7 +1625,7 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) { struct noflush_work w; - INIT_WORK(&w.worker, fn); + INIT_WORK_ONSTACK(&w.worker, fn); w.tc = tc; atomic_set(&w.complete, 0); init_waitqueue_head(&w.wait); @@ -3061,11 +3108,25 @@ static struct target_type pool_target = { /*---------------------------------------------------------------- * Thin target methods *--------------------------------------------------------------*/ +static void thin_get(struct thin_c *tc) +{ + atomic_inc(&tc->refcount); +} + +static void thin_put(struct thin_c *tc) +{ + if (atomic_dec_and_test(&tc->refcount)) + complete(&tc->can_destroy); +} + static void thin_dtr(struct dm_target *ti) { struct thin_c *tc = ti->private; unsigned long flags; + thin_put(tc); + wait_for_completion(&tc->can_destroy); + spin_lock_irqsave(&tc->pool->lock, flags); list_del_rcu(&tc->list); spin_unlock_irqrestore(&tc->pool->lock, flags); @@ -3101,6 +3162,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) struct thin_c *tc; struct dm_dev *pool_dev, *origin_dev; struct mapped_device *pool_md; + unsigned long flags; mutex_lock(&dm_thin_pool_table.mutex); @@ -3191,9 +3253,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) mutex_unlock(&dm_thin_pool_table.mutex); - spin_lock(&tc->pool->lock); + atomic_set(&tc->refcount, 1); + init_completion(&tc->can_destroy); + + spin_lock_irqsave(&tc->pool->lock, flags); list_add_tail_rcu(&tc->list, &tc->pool->active_thins); - spin_unlock(&tc->pool->lock); + spin_unlock_irqrestore(&tc->pool->lock, flags); /* * This synchronize_rcu() call is needed here otherwise we risk a * wake_worker() call finding no bios to process (because the newly diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 796007a5e0e..7a7bab8947a 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -330,15 +330,17 @@ test_block_hash: return r; } } - todo = 1 << v->data_dev_block_bits; - while (io->iter.bi_size) { + do { u8 *page; + unsigned len; struct bio_vec bv = bio_iter_iovec(bio, io->iter); page = kmap_atomic(bv.bv_page); - r = crypto_shash_update(desc, page + bv.bv_offset, - bv.bv_len); + len = bv.bv_len; + if (likely(len >= todo)) + len = todo; + r = crypto_shash_update(desc, page + bv.bv_offset, len); kunmap_atomic(page); if (r < 0) { @@ -346,8 +348,9 @@ test_block_hash: return r; } - bio_advance_iter(bio, &io->iter, bv.bv_len); - } + bio_advance_iter(bio, &io->iter, len); + todo -= len; + } while (todo); if (!v->version) { r = crypto_shash_update(desc, v->salt, v->salt_size); diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c index 92ed4b2e3c0..c862f9c0e9c 100644 --- a/drivers/pinctrl/pinctrl-as3722.c +++ b/drivers/pinctrl/pinctrl-as3722.c @@ -64,7 +64,6 @@ struct as3722_pin_function { }; struct as3722_gpio_pin_control { - bool enable_gpio_invert; unsigned mode_prop; int io_function; }; @@ -320,10 +319,8 @@ static int as3722_pinctrl_gpio_set_direction(struct pinctrl_dev *pctldev, return mode; } - if (as_pci->gpio_control[offset].enable_gpio_invert) - mode |= AS3722_GPIO_INV; - - return as3722_write(as3722, AS3722_GPIOn_CONTROL_REG(offset), mode); + return as3722_update_bits(as3722, AS3722_GPIOn_CONTROL_REG(offset), + AS3722_GPIO_MODE_MASK, mode); } static const struct pinmux_ops as3722_pinmux_ops = { @@ -496,10 +493,18 @@ static void as3722_gpio_set(struct gpio_chip *chip, unsigned offset, { struct as3722_pctrl_info *as_pci = to_as_pci(chip); struct as3722 *as3722 = as_pci->as3722; - int en_invert = as_pci->gpio_control[offset].enable_gpio_invert; + int en_invert; u32 val; int ret; + ret = as3722_read(as3722, AS3722_GPIOn_CONTROL_REG(offset), &val); + if (ret < 0) { + dev_err(as_pci->dev, + "GPIO_CONTROL%d_REG read failed: %d\n", offset, ret); + return; + } + en_invert = !!(val & AS3722_GPIO_INV); + if (value) val = (en_invert) ? 0 : AS3722_GPIOn_SIGNAL(offset); else diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 81075f2a1d3..2960557bfed 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -810,6 +810,7 @@ static const struct pinconf_ops pcs_pinconf_ops = { static int pcs_add_pin(struct pcs_device *pcs, unsigned offset, unsigned pin_pos) { + struct pcs_soc_data *pcs_soc = &pcs->socdata; struct pinctrl_pin_desc *pin; struct pcs_name *pn; int i; @@ -821,6 +822,18 @@ static int pcs_add_pin(struct pcs_device *pcs, unsigned offset, return -ENOMEM; } + if (pcs_soc->irq_enable_mask) { + unsigned val; + + val = pcs->read(pcs->base + offset); + if (val & pcs_soc->irq_enable_mask) { + dev_dbg(pcs->dev, "irq enabled at boot for pin at %lx (%x), clearing\n", + (unsigned long)pcs->res->start + offset, val); + val &= ~pcs_soc->irq_enable_mask; + pcs->write(val, pcs->base + offset); + } + } + pin = &pcs->pins.pa[i]; pn = &pcs->names[i]; sprintf(pn->name, "%lx.%d", diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c index c5e0f6973a3..26ca6855f47 100644 --- a/drivers/pinctrl/pinctrl-tb10x.c +++ b/drivers/pinctrl/pinctrl-tb10x.c @@ -629,9 +629,8 @@ static int tb10x_gpio_request_enable(struct pinctrl_dev *pctl, */ for (i = 0; i < state->pinfuncgrpcnt; i++) { const struct tb10x_pinfuncgrp *pfg = &state->pingroups[i]; - unsigned int port = pfg->port; unsigned int mode = pfg->mode; - int j; + int j, port = pfg->port; /* * Skip pin groups which are always mapped and don't need diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7790.c b/drivers/pinctrl/sh-pfc/pfc-r8a7790.c index 48093719167..f5cd3f96180 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7790.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7790.c @@ -4794,8 +4794,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_MSIOF0_SCK_B, 0, /* IP5_23_21 [3] */ FN_WE1_N, FN_IERX, FN_CAN1_RX, FN_VI1_G4, - FN_VI1_G4_B, FN_VI2_R6, FN_SCIFA0_CTS_N_B, - FN_IERX_C, 0, + FN_VI1_G4_B, FN_VI2_R6, FN_SCIFA0_CTS_N_B, FN_IERX_C, /* IP5_20_18 [3] */ FN_WE0_N, FN_IECLK, FN_CAN_CLK, FN_VI2_VSYNC_N, FN_SCIFA0_TXD_B, FN_VI2_VSYNC_N_B, 0, 0, diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c index 5186d70c49d..7868bf3a0f9 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c @@ -5288,7 +5288,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { /* SEL_SCIF3 [2] */ FN_SEL_SCIF3_0, FN_SEL_SCIF3_1, FN_SEL_SCIF3_2, FN_SEL_SCIF3_3, /* SEL_IEB [2] */ - FN_SEL_IEB_0, FN_SEL_IEB_1, FN_SEL_IEB_2, + FN_SEL_IEB_0, FN_SEL_IEB_1, FN_SEL_IEB_2, 0, /* SEL_MMC [1] */ FN_SEL_MMC_0, FN_SEL_MMC_1, /* SEL_SCIF5 [1] */ diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 9f611cbbc29..c31aa07b3ba 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -83,8 +83,7 @@ static int pnpacpi_set_resources(struct pnp_dev *dev) { struct acpi_device *acpi_dev; acpi_handle handle; - struct acpi_buffer buffer; - int ret; + int ret = 0; pnp_dbg(&dev->dev, "set resources\n"); @@ -97,19 +96,26 @@ static int pnpacpi_set_resources(struct pnp_dev *dev) if (WARN_ON_ONCE(acpi_dev != dev->data)) dev->data = acpi_dev; - ret = pnpacpi_build_resource_template(dev, &buffer); - if (ret) - return ret; - ret = pnpacpi_encode_resources(dev, &buffer); - if (ret) { + if (acpi_has_method(handle, METHOD_NAME__SRS)) { + struct acpi_buffer buffer; + + ret = pnpacpi_build_resource_template(dev, &buffer); + if (ret) + return ret; + + ret = pnpacpi_encode_resources(dev, &buffer); + if (!ret) { + acpi_status status; + + status = acpi_set_current_resources(handle, &buffer); + if (ACPI_FAILURE(status)) + ret = -EIO; + } kfree(buffer.pointer); - return ret; } - if (ACPI_FAILURE(acpi_set_current_resources(handle, &buffer))) - ret = -EINVAL; - else if (acpi_bus_power_manageable(handle)) + if (!ret && acpi_bus_power_manageable(handle)) ret = acpi_bus_set_power(handle, ACPI_STATE_D0); - kfree(buffer.pointer); + return ret; } @@ -117,7 +123,7 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev) { struct acpi_device *acpi_dev; acpi_handle handle; - int ret; + acpi_status status; dev_dbg(&dev->dev, "disable resources\n"); @@ -128,13 +134,15 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev) } /* acpi_unregister_gsi(pnp_irq(dev, 0)); */ - ret = 0; if (acpi_bus_power_manageable(handle)) acpi_bus_set_power(handle, ACPI_STATE_D3_COLD); - /* continue even if acpi_bus_set_power() fails */ - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_DIS", NULL, NULL))) - ret = -ENODEV; - return ret; + + /* continue even if acpi_bus_set_power() fails */ + status = acpi_evaluate_object(handle, "_DIS", NULL, NULL); + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) + return -ENODEV; + + return 0; } #ifdef CONFIG_ACPI_SLEEP diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 3736bc408ad..ebf0d6710b5 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -335,7 +335,7 @@ static void quirk_amd_mmconfig_area(struct pnp_dev *dev) } #endif -#ifdef CONFIG_X86 +#ifdef CONFIG_PCI /* Device IDs of parts that have 32KB MCH space */ static const unsigned int mch_quirk_devices[] = { 0x0154, /* Ivy Bridge */ @@ -440,7 +440,7 @@ static struct pnp_fixup pnp_fixups[] = { #ifdef CONFIG_AMD_NB {"PNP0c01", quirk_amd_mmconfig_area}, #endif -#ifdef CONFIG_X86 +#ifdef CONFIG_PCI {"PNP0c02", quirk_intel_mch}, #endif {""} diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 9f0ea6cb692..e3bf885f4a6 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -541,18 +541,27 @@ static void chsc_process_sei_nt0(struct chsc_sei_nt0_area *sei_area) static void chsc_process_event_information(struct chsc_sei *sei, u64 ntsm) { - do { + static int ntsm_unsupported; + + while (true) { memset(sei, 0, sizeof(*sei)); sei->request.length = 0x0010; sei->request.code = 0x000e; - sei->ntsm = ntsm; + if (!ntsm_unsupported) + sei->ntsm = ntsm; if (chsc(sei)) break; if (sei->response.code != 0x0001) { - CIO_CRW_EVENT(2, "chsc: sei failed (rc=%04x)\n", - sei->response.code); + CIO_CRW_EVENT(2, "chsc: sei failed (rc=%04x, ntsm=%llx)\n", + sei->response.code, sei->ntsm); + + if (sei->response.code == 3 && sei->ntsm) { + /* Fallback for old firmware. */ + ntsm_unsupported = 1; + continue; + } break; } @@ -568,7 +577,10 @@ static void chsc_process_event_information(struct chsc_sei *sei, u64 ntsm) CIO_CRW_EVENT(2, "chsc: unhandled nt: %d\n", sei->nt); break; } - } while (sei->u.nt0_area.flags & 0x80); + + if (!(sei->u.nt0_area.flags & 0x80)) + break; + } } /* diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 7f0af4fcc00..6fd7d40b2c4 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -8293,7 +8293,6 @@ _scsih_suspend(struct pci_dev *pdev, pm_message_t state) mpt2sas_base_free_resources(ioc); pci_save_state(pdev); - pci_disable_device(pdev); pci_set_power_state(pdev, device_state); return 0; } diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 16bfd50cd3f..db3b494e592 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -750,8 +750,12 @@ static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) vscsi->affinity_hint_set = true; } else { - for (i = 0; i < vscsi->num_queues; i++) + for (i = 0; i < vscsi->num_queues; i++) { + if (!vscsi->req_vqs[i].vq) + continue; + virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1); + } vscsi->affinity_hint_set = false; } @@ -112,6 +112,11 @@ struct kioctx { struct work_struct free_work; + /* + * signals when all in-flight requests are done + */ + struct completion *requests_done; + struct { /* * This counts the number of available slots in the ringbuffer, @@ -508,6 +513,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, reqs); + /* At this point we know that there are no any in-flight requests */ + if (ctx->requests_done) + complete(ctx->requests_done); + INIT_WORK(&ctx->free_work, free_ioctx); schedule_work(&ctx->free_work); } @@ -718,7 +727,8 @@ err: * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) +static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, + struct completion *requests_done) { if (!atomic_xchg(&ctx->dead, 1)) { struct kioctx_table *table; @@ -747,7 +757,11 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); + ctx->requests_done = requests_done; percpu_ref_kill(&ctx->users); + } else { + if (requests_done) + complete(requests_done); } } @@ -809,7 +823,7 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - kill_ioctx(mm, ctx); + kill_ioctx(mm, ctx, NULL); } } @@ -1185,7 +1199,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); if (ret) - kill_ioctx(current->mm, ioctx); + kill_ioctx(current->mm, ioctx, NULL); percpu_ref_put(&ioctx->users); } @@ -1203,8 +1217,22 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - kill_ioctx(current->mm, ioctx); + struct completion requests_done = + COMPLETION_INITIALIZER_ONSTACK(requests_done); + + /* Pass requests_done to kill_ioctx() where it can be set + * in a thread-safe way. If we try to set it here then we have + * a race condition if two io_destroy() called simultaneously. + */ + kill_ioctx(current->mm, ioctx, &requests_done); percpu_ref_put(&ioctx->users); + + /* Wait until all IO for the context are done. Otherwise kernel + * keep using user-space buffers even if user thinks the context + * is destroyed. + */ + wait_for_completion(&requests_done); + return 0; } pr_debug("EINVAL: io_destroy: invalid context id\n"); @@ -1299,10 +1327,8 @@ rw_common: &iovec, compat) : aio_setup_single_vector(req, rw, buf, &nr_segs, iovec); - if (ret) - return ret; - - ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); + if (!ret) + ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); if (ret < 0) { if (iovec != &inline_vec) kfree(iovec); diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 5a64ca4621f..f23174fb9ec 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -93,5 +93,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) #define set_fixmap_io(idx, phys) \ __set_fixmap(idx, phys, FIXMAP_PAGE_IO) +#define set_fixmap_offset_io(idx, phys) \ + __set_fixmap_offset(idx, phys, FIXMAP_PAGE_IO) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_GENERIC_FIXMAP_H */ diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index d96deb443f1..94f9ea8abca 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -50,7 +50,7 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct } #ifndef zero_bytemask -#define zero_bytemask(mask) (~0ul << __fls(mask) << 1) +#define zero_bytemask(mask) (~1ul << __fls(mask)) #endif #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 8834a7e5b94..97ac926c78a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -210,7 +210,7 @@ extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, /** * irq_set_affinity - Set the irq affinity of a given irq * @irq: Interrupt to set affinity - * @mask: cpumask + * @cpumask: cpumask * * Fails if cpumask does not contain an online CPU */ @@ -223,7 +223,7 @@ irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) /** * irq_force_affinity - Force the irq affinity of a given irq * @irq: Interrupt to set affinity - * @mask: cpumask + * @cpumask: cpumask * * Same as irq_set_affinity, but without checking the mask against * online cpus. diff --git a/include/linux/irq.h b/include/linux/irq.h index 10a0b1ac4ea..5c57efb863d 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -603,6 +603,8 @@ static inline u32 irq_get_trigger_type(unsigned int irq) return d ? irqd_get_trigger_type(d) : 0; } +unsigned int arch_dynirq_lower_bound(unsigned int from); + int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, struct module *owner); diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 11fd51b413d..ed0b2c599a6 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -25,7 +25,7 @@ struct module; { (1UL << TAINT_OOT_MODULE), "O" }, \ { (1UL << TAINT_FORCED_MODULE), "F" }, \ { (1UL << TAINT_CRAP), "C" }, \ - { (1UL << TAINT_UNSIGNED_MODULE), "X" }) + { (1UL << TAINT_UNSIGNED_MODULE), "E" }) TRACE_EVENT(module_load, diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d55092ceee2..6b715c0af1b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -234,6 +234,11 @@ again: goto again; } timer->base = new_base; + } else { + if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { + cpu = this_cpu; + goto again; + } } return new_base; } @@ -569,6 +574,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) cpu_base->expires_next.tv64 = expires_next.tv64; + /* + * If a hang was detected in the last timer interrupt then we + * leave the hang delay active in the hardware. We want the + * system to make progress. That also prevents the following + * scenario: + * T1 expires 50ms from now + * T2 expires 5s from now + * + * T1 is removed, so this code is called and would reprogram + * the hardware to 5s from now. Any hrtimer_start after that + * will not reprogram the hardware due to hang_detected being + * set. So we'd effectivly block all timers until the T2 event + * fires. + */ + if (cpu_base->hang_detected) + return; + if (cpu_base->expires_next.tv64 != KTIME_MAX) tick_program_event(cpu_base->expires_next, 1); } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a7174617616..bb07f2928f4 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, if (from > irq) return -EINVAL; from = irq; + } else { + /* + * For interrupts which are freely allocated the + * architecture can force a lower bound to the @from + * argument. x86 uses this to exclude the GSI space. + */ + from = arch_dynirq_lower_bound(from); } mutex_lock(&sparse_irq_lock); diff --git a/kernel/module.c b/kernel/module.c index 5f14fec9f82..079c4615607 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -815,9 +815,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - if (!(flags & O_NONBLOCK)) - pr_warn("waiting module removal not supported: please upgrade\n"); - if (mutex_lock_interruptible(&module_mutex) != 0) return -EINTR; diff --git a/kernel/softirq.c b/kernel/softirq.c index b50990a5bea..33e4648ae0e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void) { return 0; } + +unsigned int __weak arch_dynirq_lower_bound(unsigned int from) +{ + return from; +} diff --git a/kernel/timer.c b/kernel/timer.c index 87bd529879c..3bb01a323b2 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -838,7 +838,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) bit = find_last_bit(&mask, BITS_PER_LONG); - mask = (1 << bit) - 1; + mask = (1UL << bit) - 1; expires_limit = expires_limit & ~(mask); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 925f537f07d..4747b476a03 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec) data->ops->func(data); continue; } - filter = rcu_dereference(data->filter); + filter = rcu_dereference_sched(data->filter); if (filter && !filter_match_preds(filter, rec)) continue; if (data->cmd_ops->post_trigger) { diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index 7c434796235..a74fba6d774 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -12,8 +12,8 @@ char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug"; static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug/", - "/debug/", + "/sys/kernel/debug", + "/debug", 0, }; diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index baec7d887da..b83184f2d48 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4344,6 +4344,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event format, len_arg, arg); trace_seq_terminate(&p); trace_seq_puts(s, p.buffer); + trace_seq_destroy(&p); arg = arg->next; break; default: diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 791c539374c..feab9428163 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -876,8 +876,8 @@ struct event_filter { struct event_filter *pevent_filter_alloc(struct pevent *pevent); /* for backward compatibility */ -#define FILTER_NONE PEVENT_ERRNO__FILTER_NOT_FOUND -#define FILTER_NOEXIST PEVENT_ERRNO__NO_FILTER +#define FILTER_NONE PEVENT_ERRNO__NO_FILTER +#define FILTER_NOEXIST PEVENT_ERRNO__FILTER_NOT_FOUND #define FILTER_MISS PEVENT_ERRNO__FILTER_MISS #define FILTER_MATCH PEVENT_ERRNO__FILTER_MATCH diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e96923310d5..895edd32930 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -589,7 +589,7 @@ $(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $< $(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) -o $@ -shared $(ALL_LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) + $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) $(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index b602ad93ce6..83bc2385e6d 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -23,9 +23,10 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_X86_SP]; - map = map_groups__find(&thread->mg, MAP__FUNCTION, (u64) sp); + map = map_groups__find(&thread->mg, MAP__VARIABLE, (u64) sp); if (!map) { pr_debug("failed to get stack map\n"); + free(buf); return -1; } diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S index 99167bf644e..60875d5c556 100644 --- a/tools/perf/arch/x86/tests/regs_load.S +++ b/tools/perf/arch/x86/tests/regs_load.S @@ -1,4 +1,3 @@ - #include <linux/linkage.h> #define AX 0 @@ -90,3 +89,10 @@ ENTRY(perf_regs_load) ret ENDPROC(perf_regs_load) #endif + +/* + * We need to provide note.GNU-stack section, saying that we want + * NOT executable stack. Otherwise the final linking will assume that + * the ELF stack should not be restricted at all and set it RWX. + */ +.section .note.GNU-stack,"",@progbits diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index ee21fa95ebc..802cf544202 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -34,6 +34,14 @@ ifeq ($(ARCH),arm) LIBUNWIND_LIBS = -lunwind -lunwind-arm endif +# So far there's only x86 libdw unwind support merged in perf. +# Disable it on all other architectures in case libdw unwind +# support is detected in system. Add supported architectures +# to the check. +ifneq ($(ARCH),x86) + NO_LIBDW_DWARF_UNWIND := 1 +endif + ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 else @@ -109,6 +117,10 @@ CFLAGS += -Wall CFLAGS += -Wextra CFLAGS += -std=gnu99 +# Enforce a non-executable stack, as we may regress (again) in the future by +# adding assembler files missing the .GNU-stack linker note. +LDFLAGS += -Wl,-z,noexecstack + EXTLIBS = -lelf -lpthread -lrt -lm -ldl ifneq ($(OUTPUT),) @@ -186,7 +198,10 @@ VF_FEATURE_TESTS = \ stackprotector-all \ timerfd \ libunwind-debug-frame \ - bionic + bionic \ + liberty \ + liberty-z \ + cplus-demangle # Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not @@ -504,7 +519,21 @@ else endif ifeq ($(feature-libbfd), 1) - EXTLIBS += -lbfd -lz -liberty + EXTLIBS += -lbfd + + # call all detections now so we get correct + # status in VF output + $(call feature_check,liberty) + $(call feature_check,liberty-z) + $(call feature_check,cplus-demangle) + + ifeq ($(feature-liberty), 1) + EXTLIBS += -liberty + else + ifeq ($(feature-liberty-z), 1) + EXTLIBS += -liberty -lz + endif + endif endif ifdef NO_DEMANGLE @@ -515,15 +544,10 @@ else CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else ifneq ($(feature-libbfd), 1) - $(call feature_check,liberty) - ifeq ($(feature-liberty), 1) - EXTLIBS += -lbfd -liberty - else - $(call feature_check,liberty-z) - ifeq ($(feature-liberty-z), 1) - EXTLIBS += -lbfd -liberty -lz - else - $(call feature_check,cplus-demangle) + ifneq ($(feature-liberty), 1) + ifneq ($(feature-liberty-z), 1) + # we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT + # or any of 'bfd iberty z' trinity ifeq ($(feature-cplus-demangle), 1) EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 5daeae1cb4c..2f92d6e7ee0 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -46,6 +46,7 @@ make_install_man := install-man make_install_html := install-html make_install_info := install-info make_install_pdf := install-pdf +make_static := LDFLAGS=-static # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 @@ -87,6 +88,7 @@ run += make_install_bin # run += make_install_info # run += make_install_pdf run += make_minimal +run += make_static ifneq ($(call has,ctags),) run += make_tags diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index a53cd0b8c15..27c2a5efe45 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -717,7 +717,7 @@ static char *get_kernel_version(const char *root_dir) } static int map_groups__set_modules_path_dir(struct map_groups *mg, - const char *dir_name) + const char *dir_name, int depth) { struct dirent *dent; DIR *dir = opendir(dir_name); @@ -742,7 +742,15 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, !strcmp(dent->d_name, "..")) continue; - ret = map_groups__set_modules_path_dir(mg, path); + /* Do not follow top-level source and build symlinks */ + if (depth == 0) { + if (!strcmp(dent->d_name, "source") || + !strcmp(dent->d_name, "build")) + continue; + } + + ret = map_groups__set_modules_path_dir(mg, path, + depth + 1); if (ret < 0) goto out; } else { @@ -786,11 +794,11 @@ static int machine__set_modules_path(struct machine *machine) if (!version) return -1; - snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel", + snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s", machine->root_dir, version); free(version); - return map_groups__set_modules_path_dir(&machine->kmaps, modules_path); + return map_groups__set_modules_path_dir(&machine->kmaps, modules_path, 0); } static int machine__create_module(void *arg, const char *name, u64 start) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 47b29834a6b..56ff9bebb57 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, u32 val; u32 *reg; - offset >>= 1; reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset); + vcpu->vcpu_id, offset >> 1); - if (offset & 2) + if (offset & 4) val = *reg >> 16; else val = *reg & 0xffff; @@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 4) { + if (offset < 8) { *reg = ~0U; /* Force PPIs/SGIs to 1 */ return false; } val = vgic_cfg_compress(val); - if (offset & 2) { + if (offset & 4) { *reg &= 0xffff; *reg |= val << 16; } else { @@ -916,6 +915,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) case 0: if (!target_cpus) return; + break; case 1: target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; @@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, if (addr + size < addr) return -EINVAL; + *ioaddr = addr; ret = vgic_ioaddr_overlap(kvm); if (ret) - return ret; - *ioaddr = addr; + *ioaddr = VGIC_ADDR_UNDEF; + return ret; } diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 8db43701016..bf06577fea5 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -395,7 +395,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, if (dev->entries_nr == 0) return r; - r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); + r = pci_enable_msix_exact(dev->dev, + dev->host_msix_entries, dev->entries_nr); if (r) return r; diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 10df100c451..06e6401d6ef 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,7 +101,7 @@ static void async_pf_execute(struct work_struct *work) if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - mmdrop(mm); + mmput(mm); kvm_put_kvm(vcpu->kvm); } @@ -118,7 +118,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) flush_work(&work->work); #else if (cancel_work_sync(&work->work)) { - mmdrop(work->mm); + mmput(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } @@ -183,7 +183,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->addr = hva; work->arch = *arch; work->mm = current->mm; - atomic_inc(&work->mm->mm_count); + atomic_inc(&work->mm->mm_users); kvm_get_kvm(work->vcpu->kvm); /* this can't really happen otherwise gfn_to_pfn_async @@ -201,7 +201,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, return 1; retry_sync: kvm_put_kvm(work->vcpu->kvm); - mmdrop(work->mm); + mmput(work->mm); kmem_cache_free(async_pf_cache, work); return 0; } |