diff options
91 files changed, 803 insertions, 358 deletions
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index e6f80fcf013..a4acddad0c7 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -259,7 +259,7 @@ start_ap: * Switch into virtual mode: */ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ - |IA64_PSR_DI|IA64_PSR_AC) + |IA64_PSR_DI) ;; mov cr.ipsr=r16 movl r17=1f diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 689ffcaa284..18e794a5724 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -58,7 +58,7 @@ #include <asm/unistd.h> #include <asm/errno.h> -#if 1 +#if 0 # define PSR_DEFAULT_BITS psr.ac #else # define PSR_DEFAULT_BITS 0 diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S index 24018484c6e..397e34a63e1 100644 --- a/arch/ia64/kvm/vmm_ivt.S +++ b/arch/ia64/kvm/vmm_ivt.S @@ -64,7 +64,7 @@ #include "kvm_minstate.h" #include "vti.h" -#if 1 +#if 0 # define PSR_DEFAULT_BITS psr.ac #else # define PSR_DEFAULT_BITS 0 diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index d091aa1aaf1..bf9c823d402 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -31,4 +31,23 @@ #define SIGP_STATUS_INCORRECT_STATE 0x00000200UL #define SIGP_STATUS_NOT_RUNNING 0x00000400UL +#ifndef __ASSEMBLY__ + +static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +{ + register unsigned int reg1 asm ("1") = parm; + int cc; + + asm volatile( + " sigp %1,%2,0(%3)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + if (status && cc == 1) + *status = reg1; + return cc; +} + +#endif /* __ASSEMBLY__ */ + #endif /* __S390_ASM_SIGP_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 16077939409..21703f85b48 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -7,6 +7,8 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +#include <asm/sigp.h> + #ifdef CONFIG_SMP #include <asm/lowcore.h> @@ -50,9 +52,18 @@ static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } static inline void smp_yield(void) { } -static inline void smp_stop_cpu(void) { } static inline void smp_fill_possible_mask(void) { } +static inline void smp_stop_cpu(void) +{ + u16 pcpu = stap(); + + for (;;) { + __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL); + cpu_relax(); + } +} + #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 5eb5c9ddb12..3802d2d3a18 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -282,7 +282,8 @@ #define __NR_finit_module 344 #define __NR_sched_setattr 345 #define __NR_sched_getattr 346 -#define NR_syscalls 345 +#define __NR_renameat2 347 +#define NR_syscalls 348 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 824c39dfddf..45cdb37aa6f 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -1,5 +1,5 @@ /* - * Compat sytem call wrappers. + * Compat system call wrappers. * * Copyright IBM Corp. 2014 */ @@ -213,3 +213,4 @@ COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, i COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); +COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index e6af9406987..acb412442e5 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -144,10 +144,10 @@ void show_registers(struct pt_regs *regs) char *mode; mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %p %p (%pSR)\n", - mode, (void *) regs->psw.mask, - (void *) regs->psw.addr, - (void *) regs->psw.addr); + printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); + if (!user_mode(regs)) + printk(" (%pSR)", (void *)regs->psw.addr); + printk("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 4ac8fafec95..1c82619eb4f 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -64,7 +64,7 @@ void update_cr_regs(struct task_struct *task) if (task->thread.per_flags & PER_FLAG_NO_TE) cr_new &= ~(1UL << 55); if (cr_new != cr) - __ctl_load(cr, 0, 0); + __ctl_load(cr_new, 0, 0); /* Set or clear transaction execution TDC bits 62 and 63. */ __ctl_store(cr, 2, 2); cr_new = cr & ~3UL; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f70f2489fa5..88d1ca81e2d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1027,3 +1027,35 @@ void __init setup_arch(char **cmdline_p) /* Setup zfcpdump support */ setup_zfcpdump(); } + +#ifdef CONFIG_32BIT +static int no_removal_warning __initdata; + +static int __init parse_no_removal_warning(char *str) +{ + no_removal_warning = 1; + return 0; +} +__setup("no_removal_warning", parse_no_removal_warning); + +static int __init removal_warning(void) +{ + if (no_removal_warning) + return 0; + printk(KERN_ALERT "\n\n"); + printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n"); + printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n"); + printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n"); + printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n"); + printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n"); + printk(KERN_CONT "please let us know. Please write to:\n"); + printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n"); + printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n"); + printk(KERN_CONT "Thank you!\n\n"); + printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n"); + printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n"); + schedule_timeout_uninterruptible(300 * HZ); + return 0; +} +early_initcall(removal_warning); +#endif diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 512ce1cde2a..86e65ec3422 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -82,21 +82,6 @@ DEFINE_MUTEX(smp_cpu_state_mutex); /* * Signal processor helper functions. */ -static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) -{ - register unsigned int reg1 asm ("1") = parm; - int cc; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); - if (status && cc == 1) - *status = reg1; - return cc; -} - static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) { int cc; diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 542ef488bac..fe5cdf29a00 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -355,3 +355,4 @@ SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp) SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */ SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) +SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 23f866b4c7f..7416efe8eae 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -338,9 +338,6 @@ static inline unsigned long strnlen_user_srst(const char __user *src, register unsigned long reg0 asm("0") = 0; unsigned long tmp1, tmp2; - if (unlikely(!size)) - return 0; - update_primary_asce(current); asm volatile( " la %2,0(%1)\n" " la %3,0(%0,%1)\n" @@ -359,6 +356,8 @@ static inline unsigned long strnlen_user_srst(const char __user *src, unsigned long __strnlen_user(const char __user *src, unsigned long size) { + if (unlikely(!size)) + return 0; update_primary_asce(current); return strnlen_user_srst(src, size); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 19f623f1f21..2f51a998a67 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -126,6 +126,133 @@ static inline int user_space_fault(struct pt_regs *regs) return 0; } +static int bad_address(void *p) +{ + unsigned long dummy; + + return probe_kernel_address((unsigned long *)p, dummy); +} + +#ifdef CONFIG_64BIT +static void dump_pagetable(unsigned long asce, unsigned long address) +{ + unsigned long *table = __va(asce & PAGE_MASK); + + pr_alert("AS:%016lx ", asce); + switch (asce & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + table = table + ((address >> 53) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R1:%016lx ", *table); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION2: + table = table + ((address >> 42) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R2:%016lx ", *table); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION3: + table = table + ((address >> 31) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R3:%016lx ", *table); + if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_SEGMENT: + table = table + ((address >> 20) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont(KERN_CONT "S:%016lx ", *table); + if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) + goto out; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + } + table = table + ((address >> 12) & 0xff); + if (bad_address(table)) + goto bad; + pr_cont("P:%016lx ", *table); +out: + pr_cont("\n"); + return; +bad: + pr_cont("BAD\n"); +} + +#else /* CONFIG_64BIT */ + +static void dump_pagetable(unsigned long asce, unsigned long address) +{ + unsigned long *table = __va(asce & PAGE_MASK); + + pr_alert("AS:%08lx ", asce); + table = table + ((address >> 20) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("S:%08lx ", *table); + if (*table & _SEGMENT_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + if (bad_address(table)) + goto bad; + pr_cont("P:%08lx ", *table); +out: + pr_cont("\n"); + return; +bad: + pr_cont("BAD\n"); +} + +#endif /* CONFIG_64BIT */ + +static void dump_fault_info(struct pt_regs *regs) +{ + unsigned long asce; + + pr_alert("Fault in "); + switch (regs->int_parm_long & 3) { + case 3: + pr_cont("home space "); + break; + case 2: + pr_cont("secondary space "); + break; + case 1: + pr_cont("access register "); + break; + case 0: + pr_cont("primary space "); + break; + } + pr_cont("mode while using "); + if (!user_space_fault(regs)) { + asce = S390_lowcore.kernel_asce; + pr_cont("kernel "); + } +#ifdef CONFIG_PGSTE + else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { + struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + asce = gmap->asce; + pr_cont("gmap "); + } +#endif + else { + asce = S390_lowcore.user_asce; + pr_cont("user "); + } + pr_cont("ASCE.\n"); + dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); +} + static inline void report_user_fault(struct pt_regs *regs, long signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) @@ -138,8 +265,9 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) regs->int_code); print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); printk(KERN_CONT "\n"); - printk(KERN_ALERT "failing address: %lX\n", - regs->int_parm_long & __FAIL_ADDR_MASK); + printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + dump_fault_info(regs); show_regs(regs); } @@ -177,11 +305,13 @@ static noinline void do_no_context(struct pt_regs *regs) address = regs->int_parm_long & __FAIL_ADDR_MASK; if (!user_space_fault(regs)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" - " at virtual kernel address %p\n", (void *)address); + " in virtual kernel address space\n"); else printk(KERN_ALERT "Unable to handle kernel paging request" - " at virtual user address %p\n", (void *)address); - + " in virtual user address space\n"); + printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + dump_fault_info(regs); die(regs, "Oops"); do_exit(SIGKILL); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fcaf9c96126..7de069afb38 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -60,7 +60,7 @@ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bea60671ef8..f47a104a749 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -308,7 +308,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word9_x86_features = F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | - F(ADX); + F(ADX) | F(SMAP); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index a2a1bb7ed8c..eeecbed26ac 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_SMEP)); } +static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_SMAP)); +} + static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f5704d9e5dd..813d31038b9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3601,20 +3601,27 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, } } -static void update_permission_bitmask(struct kvm_vcpu *vcpu, +void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, bool ept) { unsigned bit, byte, pfec; u8 map; - bool fault, x, w, u, wf, uf, ff, smep; + bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0; - smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { pfec = byte << 1; map = 0; wf = pfec & PFERR_WRITE_MASK; uf = pfec & PFERR_USER_MASK; ff = pfec & PFERR_FETCH_MASK; + /* + * PFERR_RSVD_MASK bit is set in PFEC if the access is not + * subject to SMAP restrictions, and cleared otherwise. The + * bit is only meaningful if the SMAP bit is set in CR4. + */ + smapf = !(pfec & PFERR_RSVD_MASK); for (bit = 0; bit < 8; ++bit) { x = bit & ACC_EXEC_MASK; w = bit & ACC_WRITE_MASK; @@ -3626,12 +3633,33 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, /* Allow supervisor writes if !cr0.wp */ w |= !is_write_protection(vcpu) && !uf; /* Disallow supervisor fetches of user code if cr4.smep */ - x &= !(smep && u && !uf); + x &= !(cr4_smep && u && !uf); + + /* + * SMAP:kernel-mode data accesses from user-mode + * mappings should fault. A fault is considered + * as a SMAP violation if all of the following + * conditions are ture: + * - X86_CR4_SMAP is set in CR4 + * - An user page is accessed + * - Page fault in kernel mode + * - if CPL = 3 or X86_EFLAGS_AC is clear + * + * Here, we cover the first three conditions. + * The fourth is computed dynamically in + * permission_fault() and is in smapf. + * + * Also, SMAP does not affect instruction + * fetches, add the !ff check here to make it + * clearer. + */ + smap = cr4_smap && u && !uf && !ff; } else /* Not really needed: no U/S accesses on ept */ u = 1; - fault = (ff && !x) || (uf && !u) || (wf && !w); + fault = (ff && !x) || (uf && !u) || (wf && !w) || + (smapf && smap); map |= fault << bit; } mmu->permissions[byte] = map; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 29261527435..3842e70bdb7 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -44,11 +44,17 @@ #define PT_DIRECTORY_LEVEL 2 #define PT_PAGE_TABLE_LEVEL 1 -#define PFERR_PRESENT_MASK (1U << 0) -#define PFERR_WRITE_MASK (1U << 1) -#define PFERR_USER_MASK (1U << 2) -#define PFERR_RSVD_MASK (1U << 3) -#define PFERR_FETCH_MASK (1U << 4) +#define PFERR_PRESENT_BIT 0 +#define PFERR_WRITE_BIT 1 +#define PFERR_USER_BIT 2 +#define PFERR_RSVD_BIT 3 +#define PFERR_FETCH_BIT 4 + +#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) +#define PFERR_USER_MASK (1U << PFERR_USER_BIT) +#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); @@ -73,6 +79,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly); +void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + bool ept); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { @@ -110,10 +118,30 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) * Will a fault with a given page-fault error code (pfec) cause a permission * fault with the given access (in ACC_* format)? */ -static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, - unsigned pfec) +static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + unsigned pte_access, unsigned pfec) { - return (mmu->permissions[pfec >> 1] >> pte_access) & 1; + int cpl = kvm_x86_ops->get_cpl(vcpu); + unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); + + /* + * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1. + * + * If CPL = 3, SMAP applies to all supervisor-mode data accesses + * (these are implicit supervisor accesses) regardless of the value + * of EFLAGS.AC. + * + * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving + * the result in X86_EFLAGS_AC. We then insert it in place of + * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec, + * but it will be one in index if SMAP checks are being overridden. + * It is important to keep this branchless. + */ + unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); + int index = (pfec >> 1) + + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); + + return (mmu->permissions[index] >> pte_access) & 1; } void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b1e6c1bf68d..123efd3ec29 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -353,7 +353,7 @@ retry_walk: walker->ptes[walker->level - 1] = pte; } while (!is_last_gpte(mmu, walker->level, pte)); - if (unlikely(permission_fault(mmu, pte_access, access))) { + if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) { errcode |= PFERR_PRESENT_MASK; goto error; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1320e0f8e61..1f68c583192 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3484,13 +3484,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) hw_cr4 &= ~X86_CR4_PAE; hw_cr4 |= X86_CR4_PSE; /* - * SMEP is disabled if CPU is in non-paging mode in - * hardware. However KVM always uses paging mode to + * SMEP/SMAP is disabled if CPU is in non-paging mode + * in hardware. However KVM always uses paging mode to * emulate guest non-paging mode with TDP. - * To emulate this behavior, SMEP needs to be manually - * disabled when guest switches to non-paging mode. + * To emulate this behavior, SMEP/SMAP needs to be + * manually disabled when guest switches to non-paging + * mode. */ - hw_cr4 &= ~X86_CR4_SMEP; + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); } else if (!(cr4 & X86_CR4_PAE)) { hw_cr4 &= ~X86_CR4_PAE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9d1b5cd4d34..8b8fc0b792b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -652,6 +652,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) return 1; + if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP)) + return 1; + if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) return 1; @@ -680,6 +683,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); + if ((cr4 ^ old_cr4) & X86_CR4_SMAP) + update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false); + if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) kvm_update_cpuid(vcpu); @@ -1117,7 +1123,6 @@ static inline u64 get_kernel_ns(void) { struct timespec ts; - WARN_ON(preemptible()); ktime_get_ts(&ts); monotonic_to_bootbased(&ts); return timespec_to_ns(&ts); @@ -4164,7 +4169,8 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | (write ? PFERR_WRITE_MASK : 0); if (vcpu_match_mmio_gva(vcpu, gva) - && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) { + && !permission_fault(vcpu, vcpu->arch.walk_mmu, + vcpu->arch.access, access)) { *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | (gva & (PAGE_SIZE - 1)); trace_vcpu_match_mmio(gva, *gpa, write, false); diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index 8c3b255e629..e900961cdd2 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -61,18 +61,18 @@ static int bcm2835_rng_probe(struct platform_device *pdev) } bcm2835_rng_ops.priv = (unsigned long)rng_base; + /* set warm-up count & enable */ + __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS); + __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL); + /* register driver */ err = hwrng_register(&bcm2835_rng_ops); if (err) { dev_err(dev, "hwrng registration failed\n"); iounmap(rng_base); - } else { + } else dev_info(dev, "hwrng registered\n"); - /* set warm-up count & enable */ - __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS); - __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL); - } return err; } diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index a8efb18e42f..0ab83708b6a 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8627,6 +8627,7 @@ bnx2_remove_one(struct pci_dev *pdev) pci_disable_device(pdev); } +#ifdef CONFIG_PM_SLEEP static int bnx2_suspend(struct device *device) { @@ -8665,7 +8666,6 @@ bnx2_resume(struct device *device) return 0; } -#ifdef CONFIG_PM_SLEEP static SIMPLE_DEV_PM_OPS(bnx2_pm_ops, bnx2_suspend, bnx2_resume); #define BNX2_PM_OPS (&bnx2_pm_ops) diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 751d5c7b312..7e49c43b7af 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -4,7 +4,7 @@ config NET_CADENCE bool "Cadence devices" - depends on HAS_IOMEM + depends on HAS_IOMEM && (ARM || AVR32 || COMPILE_TEST) default y ---help--- If you have a network (Ethernet) card belonging to this class, say Y. @@ -22,7 +22,7 @@ if NET_CADENCE config ARM_AT91_ETHER tristate "AT91RM9200 Ethernet support" - depends on HAS_DMA + depends on HAS_DMA && (ARCH_AT91RM9200 || COMPILE_TEST) select MACB ---help--- If you wish to compile a kernel for the AT91RM9200 and enable @@ -30,7 +30,7 @@ config ARM_AT91_ETHER config MACB tristate "Cadence MACB/GEM support" - depends on HAS_DMA + depends on HAS_DMA && (PLATFORM_AT32AP || ARCH_AT91 || ARCH_PICOXCELL || ARCH_ZYNQ || COMPILE_TEST) select PHYLIB ---help--- The Cadence MACB ethernet interface is found on many Atmel AT32 and diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 81e8402a74b..8a96572fdde 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -154,7 +154,7 @@ static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync) req->params = htons(L2T_W_PORT(e->lport) | L2T_W_NOREPLY(!sync)); req->l2t_idx = htons(e->idx); req->vlan = htons(e->vlan); - if (e->neigh) + if (e->neigh && !(e->neigh->dev->flags & IFF_LOOPBACK)) memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac)); memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); @@ -394,6 +394,8 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, if (e) { spin_lock(&e->lock); /* avoid race with t4_l2t_free */ e->state = L2T_STATE_RESOLVING; + if (neigh->dev->flags & IFF_LOOPBACK) + memcpy(e->dmac, physdev->dev_addr, sizeof(e->dmac)); memcpy(e->addr, addr, addr_len); e->ifindex = ifidx; e->hash = hash; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index fb2fe65903c..bba67681aea 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -682,7 +682,7 @@ enum { SF_RD_ID = 0x9f, /* read ID */ SF_ERASE_SECTOR = 0xd8, /* erase sector */ - FW_MAX_SIZE = 512 * 1024, + FW_MAX_SIZE = 16 * SF_SEC_SIZE, }; /** diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 8ccaa2520dc..97db5a7179d 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -374,6 +374,7 @@ enum vf_state { #define BE_FLAGS_NAPI_ENABLED (1 << 9) #define BE_FLAGS_QNQ_ASYNC_EVT_RCVD (1 << 11) #define BE_FLAGS_VXLAN_OFFLOADS (1 << 12) +#define BE_FLAGS_SETUP_DONE (1 << 13) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 3e6df47b697..a18645407d2 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2033,11 +2033,13 @@ static void be_tx_compl_clean(struct be_adapter *adapter) bool dummy_wrb; int i, pending_txqs; - /* Wait for a max of 200ms for all the tx-completions to arrive. */ + /* Stop polling for compls when HW has been silent for 10ms */ do { pending_txqs = adapter->num_tx_qs; for_all_tx_queues(adapter, txo, i) { + cmpl = 0; + num_wrbs = 0; txq = &txo->q; while ((txcp = be_tx_compl_get(&txo->cq))) { end_idx = @@ -2050,14 +2052,13 @@ static void be_tx_compl_clean(struct be_adapter *adapter) if (cmpl) { be_cq_notify(adapter, txo->cq.id, false, cmpl); atomic_sub(num_wrbs, &txq->used); - cmpl = 0; - num_wrbs = 0; + timeo = 0; } if (atomic_read(&txq->used) == 0) pending_txqs--; } - if (pending_txqs == 0 || ++timeo > 200) + if (pending_txqs == 0 || ++timeo > 10 || be_hw_error(adapter)) break; mdelay(1); @@ -2725,6 +2726,12 @@ static int be_close(struct net_device *netdev) struct be_eq_obj *eqo; int i; + /* This protection is needed as be_close() may be called even when the + * adapter is in cleared state (after eeh perm failure) + */ + if (!(adapter->flags & BE_FLAGS_SETUP_DONE)) + return 0; + be_roce_dev_close(adapter); if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { @@ -3055,6 +3062,7 @@ static int be_clear(struct be_adapter *adapter) be_clear_queues(adapter); be_msix_disable(adapter); + adapter->flags &= ~BE_FLAGS_SETUP_DONE; return 0; } @@ -3559,6 +3567,7 @@ static int be_setup(struct be_adapter *adapter) adapter->phy.fc_autoneg = 1; be_schedule_worker(adapter); + adapter->flags |= BE_FLAGS_SETUP_DONE; return 0; err: be_clear(adapter); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d04b1c3c9b8..b248bcbdae6 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -89,9 +89,8 @@ #define MVNETA_TX_IN_PRGRS BIT(1) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c -#define MVNETA_SERDES_CFG 0x24A0 +#define MVNETA_SGMII_SERDES_CFG 0x24A0 #define MVNETA_SGMII_SERDES_PROTO 0x0cc7 -#define MVNETA_RGMII_SERDES_PROTO 0x0667 #define MVNETA_TYPE_PRIO 0x24bc #define MVNETA_FORCE_UNI BIT(21) #define MVNETA_TXQ_CMD_1 0x24e4 @@ -712,6 +711,35 @@ static void mvneta_rxq_bm_disable(struct mvneta_port *pp, mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val); } + + +/* Sets the RGMII Enable bit (RGMIIEn) in port MAC control register */ +static void mvneta_gmac_rgmii_set(struct mvneta_port *pp, int enable) +{ + u32 val; + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + + if (enable) + val |= MVNETA_GMAC2_PORT_RGMII; + else + val &= ~MVNETA_GMAC2_PORT_RGMII; + + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); +} + +/* Config SGMII port */ +static void mvneta_port_sgmii_config(struct mvneta_port *pp) +{ + u32 val; + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val |= MVNETA_GMAC2_PCS_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + + mvreg_write(pp, MVNETA_SGMII_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); +} + /* Start the Ethernet port RX and TX activity */ static void mvneta_port_up(struct mvneta_port *pp) { @@ -2729,15 +2757,12 @@ static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0); if (phy_mode == PHY_INTERFACE_MODE_SGMII) - mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); - else - mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_RGMII_SERDES_PROTO); + mvneta_port_sgmii_config(pp); - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - - val |= MVNETA_GMAC2_PCS_ENABLE | MVNETA_GMAC2_PORT_RGMII; + mvneta_gmac_rgmii_set(pp, 1); /* Cancel Port Reset */ + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); val &= ~MVNETA_GMAC2_PORT_RESET; mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index f0ae95f66ce..cef267e24f9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2301,13 +2301,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) /* Allow large DMA segments, up to the firmware limit of 1 GB */ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - err = -ENOMEM; - goto err_release_regions; - } - - dev = &priv->dev; + dev = pci_get_drvdata(pdev); + priv = mlx4_priv(dev); dev->pdev = pdev; INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); @@ -2374,10 +2369,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) } else { atomic_inc(&pf_loading); err = pci_enable_sriov(pdev, total_vfs); - atomic_dec(&pf_loading); if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); + atomic_dec(&pf_loading); err = 0; } else { mlx4_warn(dev, "Running in master mode\n"); @@ -2535,8 +2530,10 @@ slave_start: mlx4_sense_init(dev); mlx4_start_sense(dev); - priv->pci_dev_data = pci_dev_data; - pci_set_drvdata(pdev, dev); + priv->removed = 0; + + if (mlx4_is_master(dev) && dev->num_vfs) + atomic_dec(&pf_loading); return 0; @@ -2588,6 +2585,9 @@ err_rel_own: if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + if (mlx4_is_master(dev) && dev->num_vfs) + atomic_dec(&pf_loading); + kfree(priv->dev.dev_vfs); err_free_dev: @@ -2604,85 +2604,110 @@ err_disable_pdev: static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { + struct mlx4_priv *priv; + struct mlx4_dev *dev; + printk_once(KERN_INFO "%s", mlx4_version); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + dev = &priv->dev; + pci_set_drvdata(pdev, dev); + priv->pci_dev_data = id->driver_data; + return __mlx4_init_one(pdev, id->driver_data); } -static void mlx4_remove_one(struct pci_dev *pdev) +static void __mlx4_remove_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); + int pci_dev_data; int p; - if (dev) { - /* in SRIOV it is not allowed to unload the pf's - * driver while there are alive vf's */ - if (mlx4_is_master(dev)) { - if (mlx4_how_many_lives_vf(dev)) - printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n"); - } - mlx4_stop_sense(dev); - mlx4_unregister_device(dev); + if (priv->removed) + return; - for (p = 1; p <= dev->caps.num_ports; p++) { - mlx4_cleanup_port_info(&priv->port[p]); - mlx4_CLOSE_PORT(dev, p); - } + pci_dev_data = priv->pci_dev_data; - if (mlx4_is_master(dev)) - mlx4_free_resource_tracker(dev, - RES_TR_FREE_SLAVES_ONLY); - - mlx4_cleanup_counters_table(dev); - mlx4_cleanup_qp_table(dev); - mlx4_cleanup_srq_table(dev); - mlx4_cleanup_cq_table(dev); - mlx4_cmd_use_polling(dev); - mlx4_cleanup_eq_table(dev); - mlx4_cleanup_mcg_table(dev); - mlx4_cleanup_mr_table(dev); - mlx4_cleanup_xrcd_table(dev); - mlx4_cleanup_pd_table(dev); + /* in SRIOV it is not allowed to unload the pf's + * driver while there are alive vf's */ + if (mlx4_is_master(dev) && mlx4_how_many_lives_vf(dev)) + printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n"); + mlx4_stop_sense(dev); + mlx4_unregister_device(dev); - if (mlx4_is_master(dev)) - mlx4_free_resource_tracker(dev, - RES_TR_FREE_STRUCTS_ONLY); - - iounmap(priv->kar); - mlx4_uar_free(dev, &priv->driver_uar); - mlx4_cleanup_uar_table(dev); - if (!mlx4_is_slave(dev)) - mlx4_clear_steering(dev); - mlx4_free_eq_table(dev); - if (mlx4_is_master(dev)) - mlx4_multi_func_cleanup(dev); - mlx4_close_hca(dev); - if (mlx4_is_slave(dev)) - mlx4_multi_func_cleanup(dev); - mlx4_cmd_cleanup(dev); - - if (dev->flags & MLX4_FLAG_MSI_X) - pci_disable_msix(pdev); - if (dev->flags & MLX4_FLAG_SRIOV) { - mlx4_warn(dev, "Disabling SR-IOV\n"); - pci_disable_sriov(pdev); - } + for (p = 1; p <= dev->caps.num_ports; p++) { + mlx4_cleanup_port_info(&priv->port[p]); + mlx4_CLOSE_PORT(dev, p); + } - if (!mlx4_is_slave(dev)) - mlx4_free_ownership(dev); + if (mlx4_is_master(dev)) + mlx4_free_resource_tracker(dev, + RES_TR_FREE_SLAVES_ONLY); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - kfree(dev->dev_vfs); + mlx4_cleanup_counters_table(dev); + mlx4_cleanup_qp_table(dev); + mlx4_cleanup_srq_table(dev); + mlx4_cleanup_cq_table(dev); + mlx4_cmd_use_polling(dev); + mlx4_cleanup_eq_table(dev); + mlx4_cleanup_mcg_table(dev); + mlx4_cleanup_mr_table(dev); + mlx4_cleanup_xrcd_table(dev); + mlx4_cleanup_pd_table(dev); - kfree(priv); - pci_release_regions(pdev); - pci_disable_device(pdev); - pci_set_drvdata(pdev, NULL); + if (mlx4_is_master(dev)) + mlx4_free_resource_tracker(dev, + RES_TR_FREE_STRUCTS_ONLY); + + iounmap(priv->kar); + mlx4_uar_free(dev, &priv->driver_uar); + mlx4_cleanup_uar_table(dev); + if (!mlx4_is_slave(dev)) + mlx4_clear_steering(dev); + mlx4_free_eq_table(dev); + if (mlx4_is_master(dev)) + mlx4_multi_func_cleanup(dev); + mlx4_close_hca(dev); + if (mlx4_is_slave(dev)) + mlx4_multi_func_cleanup(dev); + mlx4_cmd_cleanup(dev); + + if (dev->flags & MLX4_FLAG_MSI_X) + pci_disable_msix(pdev); + if (dev->flags & MLX4_FLAG_SRIOV) { + mlx4_warn(dev, "Disabling SR-IOV\n"); + pci_disable_sriov(pdev); + dev->num_vfs = 0; } + + if (!mlx4_is_slave(dev)) + mlx4_free_ownership(dev); + + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + kfree(dev->dev_vfs); + + pci_release_regions(pdev); + pci_disable_device(pdev); + memset(priv, 0, sizeof(*priv)); + priv->pci_dev_data = pci_dev_data; + priv->removed = 1; +} + +static void mlx4_remove_one(struct pci_dev *pdev) +{ + struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_priv *priv = mlx4_priv(dev); + + __mlx4_remove_one(pdev); + kfree(priv); + pci_set_drvdata(pdev, NULL); } int mlx4_restart_one(struct pci_dev *pdev) @@ -2692,7 +2717,7 @@ int mlx4_restart_one(struct pci_dev *pdev) int pci_dev_data; pci_dev_data = priv->pci_dev_data; - mlx4_remove_one(pdev); + __mlx4_remove_one(pdev); return __mlx4_init_one(pdev, pci_dev_data); } @@ -2747,7 +2772,7 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { - mlx4_remove_one(pdev); + __mlx4_remove_one(pdev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; @@ -2755,11 +2780,11 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - const struct pci_device_id *id; - int ret; + struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_priv *priv = mlx4_priv(dev); + int ret; - id = pci_match_id(mlx4_pci_table, pdev); - ret = __mlx4_init_one(pdev, id->driver_data); + ret = __mlx4_init_one(pdev, priv->pci_dev_data); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index cf8be41abb3..f9c46510196 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -800,6 +800,7 @@ struct mlx4_priv { spinlock_t ctx_lock; int pci_dev_data; + int removed; struct list_head pgdir_list; struct mutex pgdir_mutex; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index b48737dcd3c..ba20c721ee9 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2139,8 +2139,6 @@ static int qlcnic_83xx_get_nic_configuration(struct qlcnic_adapter *adapter) ahw->max_mac_filters = nic_info.max_mac_filters; ahw->max_mtu = nic_info.max_mtu; - adapter->max_tx_rings = ahw->max_tx_ques; - adapter->max_sds_rings = ahw->max_rx_ques; /* eSwitch capability indicates vNIC mode. * vNIC and SRIOV are mutually exclusive operational modes. * If SR-IOV capability is detected, SR-IOV physical function @@ -2161,6 +2159,7 @@ static int qlcnic_83xx_get_nic_configuration(struct qlcnic_adapter *adapter) int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter) { struct qlcnic_hardware_context *ahw = adapter->ahw; + u16 max_sds_rings, max_tx_rings; int ret; ret = qlcnic_83xx_get_nic_configuration(adapter); @@ -2173,18 +2172,21 @@ int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter) if (qlcnic_83xx_config_vnic_opmode(adapter)) return -EIO; - adapter->max_sds_rings = QLCNIC_MAX_VNIC_SDS_RINGS; - adapter->max_tx_rings = QLCNIC_MAX_VNIC_TX_RINGS; + max_sds_rings = QLCNIC_MAX_VNIC_SDS_RINGS; + max_tx_rings = QLCNIC_MAX_VNIC_TX_RINGS; } else if (ret == QLC_83XX_DEFAULT_OPMODE) { ahw->nic_mode = QLCNIC_DEFAULT_MODE; adapter->nic_ops->init_driver = qlcnic_83xx_init_default_driver; ahw->idc.state_entry = qlcnic_83xx_idc_ready_state_entry; - adapter->max_sds_rings = QLCNIC_MAX_SDS_RINGS; - adapter->max_tx_rings = QLCNIC_MAX_TX_RINGS; + max_sds_rings = QLCNIC_MAX_SDS_RINGS; + max_tx_rings = QLCNIC_MAX_TX_RINGS; } else { return -EIO; } + adapter->max_sds_rings = min(ahw->max_rx_ques, max_sds_rings); + adapter->max_tx_rings = min(ahw->max_tx_ques, max_tx_rings); + return 0; } @@ -2348,15 +2350,16 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) goto disable_intr; } + INIT_DELAYED_WORK(&adapter->idc_aen_work, qlcnic_83xx_idc_aen_work); + err = qlcnic_83xx_setup_mbx_intr(adapter); if (err) goto disable_mbx_intr; qlcnic_83xx_clear_function_resources(adapter); - - INIT_DELAYED_WORK(&adapter->idc_aen_work, qlcnic_83xx_idc_aen_work); - + qlcnic_dcb_enable(adapter->dcb); qlcnic_83xx_initialize_nic(adapter, 1); + qlcnic_dcb_get_info(adapter->dcb); /* Configure default, SR-IOV or Virtual NIC mode of operation */ err = qlcnic_83xx_configure_opmode(adapter); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index 64dcbf33d8f..c1e11f5715b 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -883,8 +883,6 @@ int qlcnic_82xx_get_nic_info(struct qlcnic_adapter *adapter, npar_info->max_rx_ques = le16_to_cpu(nic_info->max_rx_ques); npar_info->capabilities = le32_to_cpu(nic_info->capabilities); npar_info->max_mtu = le16_to_cpu(nic_info->max_mtu); - adapter->max_tx_rings = npar_info->max_tx_ques; - adapter->max_sds_rings = npar_info->max_rx_ques; } qlcnic_free_mbx_args(&cmd); @@ -1356,6 +1354,7 @@ int qlcnic_config_switch_port(struct qlcnic_adapter *adapter, arg2 &= ~BIT_3; break; case QLCNIC_ADD_VLAN: + arg1 &= ~(0x0ffff << 16); arg1 |= (BIT_2 | BIT_5); arg1 |= (esw_cfg->vlan_id << 16); break; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c index 7d4f54912ba..a51fe18f09a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c @@ -330,8 +330,6 @@ static int __qlcnic_dcb_attach(struct qlcnic_dcb *dcb) goto out_free_cfg; } - qlcnic_dcb_get_info(dcb); - return 0; out_free_cfg: kfree(dcb->cfg); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 309d0564088..dbf75393f75 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -670,7 +670,7 @@ int qlcnic_setup_tss_rss_intr(struct qlcnic_adapter *adapter) else num_msix += adapter->drv_tx_rings; - if (adapter->drv_rss_rings > 0) + if (adapter->drv_rss_rings > 0) num_msix += adapter->drv_rss_rings; else num_msix += adapter->drv_sds_rings; @@ -686,19 +686,15 @@ int qlcnic_setup_tss_rss_intr(struct qlcnic_adapter *adapter) return -ENOMEM; } -restore: for (vector = 0; vector < num_msix; vector++) adapter->msix_entries[vector].entry = vector; +restore: err = pci_enable_msix(pdev, adapter->msix_entries, num_msix); - if (err == 0) { - adapter->ahw->num_msix = num_msix; - if (adapter->drv_tss_rings > 0) - adapter->drv_tx_rings = adapter->drv_tss_rings; + if (err > 0) { + if (!adapter->drv_tss_rings && !adapter->drv_rss_rings) + return -ENOSPC; - if (adapter->drv_rss_rings > 0) - adapter->drv_sds_rings = adapter->drv_rss_rings; - } else { netdev_info(adapter->netdev, "Unable to allocate %d MSI-X vectors, Available vectors %d\n", num_msix, err); @@ -716,12 +712,20 @@ restore: "Restoring %d Tx, %d SDS rings for total %d vectors.\n", adapter->drv_tx_rings, adapter->drv_sds_rings, num_msix); - goto restore; - err = -EIO; + goto restore; + } else if (err < 0) { + return err; } - return err; + adapter->ahw->num_msix = num_msix; + if (adapter->drv_tss_rings > 0) + adapter->drv_tx_rings = adapter->drv_tss_rings; + + if (adapter->drv_rss_rings > 0) + adapter->drv_sds_rings = adapter->drv_rss_rings; + + return 0; } int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) @@ -2528,8 +2532,6 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_hw; } - qlcnic_dcb_enable(adapter->dcb); - if (qlcnic_read_mac_addr(adapter)) dev_warn(&pdev->dev, "failed to read mac addr\n"); @@ -2549,7 +2551,10 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Device does not support MSI interrupts\n"); if (qlcnic_82xx_check(adapter)) { + qlcnic_dcb_enable(adapter->dcb); + qlcnic_dcb_get_info(adapter->dcb); err = qlcnic_setup_intr(adapter); + if (err) { dev_err(&pdev->dev, "Failed to setup interrupt\n"); goto err_out_disable_msi; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 14f748cbf0d..28013799154 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -461,6 +461,16 @@ static int qlcnic_pci_sriov_disable(struct qlcnic_adapter *adapter) { struct net_device *netdev = adapter->netdev; + if (pci_vfs_assigned(adapter->pdev)) { + netdev_err(adapter->netdev, + "SR-IOV VFs belonging to port %d are assigned to VMs. SR-IOV can not be disabled on this port\n", + adapter->portnum); + netdev_info(adapter->netdev, + "Please detach SR-IOV VFs belonging to port %d from VMs, and then try to disable SR-IOV on this port\n", + adapter->portnum); + return -EPERM; + } + rtnl_lock(); if (netif_running(netdev)) __qlcnic_down(adapter, netdev); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index 448d156c3d0..cd346e27f2e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -354,7 +354,7 @@ int qlcnic_is_valid_nic_func(struct qlcnic_adapter *adapter, u8 pci_func) { int i; - for (i = 0; i < adapter->ahw->max_vnic_func; i++) { + for (i = 0; i < adapter->ahw->total_nic_func; i++) { if (adapter->npars[i].pci_func == pci_func) return i; } @@ -720,6 +720,7 @@ static ssize_t qlcnic_sysfs_read_npar_config(struct file *file, struct qlcnic_adapter *adapter = dev_get_drvdata(dev); struct qlcnic_npar_func_cfg *np_cfg; struct qlcnic_info nic_info; + u8 pci_func; int i, ret; u32 count; @@ -729,26 +730,28 @@ static ssize_t qlcnic_sysfs_read_npar_config(struct file *file, count = size / sizeof(struct qlcnic_npar_func_cfg); for (i = 0; i < adapter->ahw->total_nic_func; i++) { - if (qlcnic_is_valid_nic_func(adapter, i) < 0) - continue; if (adapter->npars[i].pci_func >= count) { dev_dbg(dev, "%s: Total nic functions[%d], App sent function count[%d]\n", __func__, adapter->ahw->total_nic_func, count); continue; } - ret = qlcnic_get_nic_info(adapter, &nic_info, i); - if (ret) - return ret; if (!adapter->npars[i].eswitch_status) continue; - np_cfg[i].pci_func = i; - np_cfg[i].op_mode = (u8)nic_info.op_mode; - np_cfg[i].port_num = nic_info.phys_port; - np_cfg[i].fw_capab = nic_info.capabilities; - np_cfg[i].min_bw = nic_info.min_tx_bw; - np_cfg[i].max_bw = nic_info.max_tx_bw; - np_cfg[i].max_tx_queues = nic_info.max_tx_ques; - np_cfg[i].max_rx_queues = nic_info.max_rx_ques; + pci_func = adapter->npars[i].pci_func; + if (qlcnic_is_valid_nic_func(adapter, pci_func) < 0) + continue; + ret = qlcnic_get_nic_info(adapter, &nic_info, pci_func); + if (ret) + return ret; + + np_cfg[pci_func].pci_func = pci_func; + np_cfg[pci_func].op_mode = (u8)nic_info.op_mode; + np_cfg[pci_func].port_num = nic_info.phys_port; + np_cfg[pci_func].fw_capab = nic_info.capabilities; + np_cfg[pci_func].min_bw = nic_info.min_tx_bw; + np_cfg[pci_func].max_bw = nic_info.max_tx_bw; + np_cfg[pci_func].max_tx_queues = nic_info.max_tx_ques; + np_cfg[pci_func].max_rx_queues = nic_info.max_rx_ques; } return size; } diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 430bb0db9bc..e36f194673a 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -365,7 +365,7 @@ __at86rf230_read_subreg(struct at86rf230_local *lp, dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]); if (status == 0) - *data = buf[1]; + *data = (buf[1] & mask) >> shift; return status; } @@ -1025,14 +1025,6 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) return -EINVAL; } - rc = at86rf230_read_subreg(lp, SR_AVDD_OK, &status); - if (rc) - return rc; - if (!status) { - dev_err(&lp->spi->dev, "AVDD error\n"); - return -EINVAL; - } - return 0; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c55e316373a..82355d5d155 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1755,8 +1755,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, if (err) return err; - return iptunnel_xmit(rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, - false); + return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, + tos, ttl, df, false); } EXPORT_SYMBOL_GPL(vxlan_xmit_skb); diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 84734a80509..83c39e2858b 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -1521,11 +1521,7 @@ static int cosa_reset_and_read_id(struct cosa_data *cosa, char *idstring) cosa_putstatus(cosa, 0); cosa_getdata8(cosa); cosa_putstatus(cosa, SR_RST); -#ifdef MODULE msleep(500); -#else - udelay(5*100000); -#endif /* Disable all IRQs from the card */ cosa_putstatus(cosa, 0); diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 1990285296c..c316051d9bd 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -1252,7 +1252,7 @@ static __init int sclp_initcall(void) return rc; sclp_pdev = platform_device_register_simple("sclp", -1, NULL, 0); - rc = PTR_RET(sclp_pdev); + rc = PTR_ERR_OR_ZERO(sclp_pdev); if (rc) goto fail_platform_driver_unregister; diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 6e8f90f84e4..6e14999f9e8 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -515,7 +515,7 @@ static int __init sclp_detect_standby_memory(void) if (rc) goto out; sclp_pdev = platform_device_register_simple("sclp_mem", -1, NULL, 0); - rc = PTR_RET(sclp_pdev); + rc = PTR_ERR_OR_ZERO(sclp_pdev); if (rc) goto out_driver; sclp_add_standby_memory(); diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index 4eed38cd0af..cd9c9190959 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -97,13 +97,16 @@ static void sclp_vt220_pm_event_fn(struct sclp_register *reg, static int __sclp_vt220_emit(struct sclp_vt220_request *request); static void sclp_vt220_emit_current(void); -/* Registration structure for our interest in SCLP event buffers */ +/* Registration structure for SCLP output event buffers */ static struct sclp_register sclp_vt220_register = { .send_mask = EVTYP_VT220MSG_MASK, + .pm_event_fn = sclp_vt220_pm_event_fn, +}; + +/* Registration structure for SCLP input event buffers */ +static struct sclp_register sclp_vt220_register_input = { .receive_mask = EVTYP_VT220MSG_MASK, - .state_change_fn = NULL, .receiver_fn = sclp_vt220_receiver_fn, - .pm_event_fn = sclp_vt220_pm_event_fn, }; @@ -715,9 +718,14 @@ static int __init sclp_vt220_tty_init(void) rc = tty_register_driver(driver); if (rc) goto out_init; + rc = sclp_register(&sclp_vt220_register_input); + if (rc) + goto out_reg; sclp_vt220_driver = driver; return 0; +out_reg: + tty_unregister_driver(driver); out_init: __sclp_vt220_cleanup(); out_driver: diff --git a/include/linux/filter.h b/include/linux/filter.h index 262dcbb75ff..024fd03e5d1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -220,7 +220,6 @@ enum { BPF_S_ANC_RXHASH, BPF_S_ANC_CPU, BPF_S_ANC_ALU_XOR_X, - BPF_S_ANC_SECCOMP_LD_W, BPF_S_ANC_VLAN_TAG, BPF_S_ANC_VLAN_TAG_PRESENT, BPF_S_ANC_PAY_OFFSET, diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index ec2ffaf418c..df78dc2b552 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -87,7 +87,6 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); -void nf_ct_gre_keymap_flush(struct net *net); void nf_nat_need_gre(void); #endif /* __KERNEL__ */ diff --git a/include/net/dst.h b/include/net/dst.h index 46ed958e0c6..71c60f42be4 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -45,7 +45,7 @@ struct dst_entry { void *__pad1; #endif int (*input)(struct sk_buff *); - int (*output)(struct sk_buff *); + int (*output)(struct sock *sk, struct sk_buff *skb); unsigned short flags; #define DST_HOST 0x0001 @@ -367,7 +367,11 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) return child; } -int dst_discard(struct sk_buff *skb); +int dst_discard_sk(struct sock *sk, struct sk_buff *skb); +static inline int dst_discard(struct sk_buff *skb) +{ + return dst_discard_sk(skb->sk, skb); +} void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags); void __dst_free(struct dst_entry *dst); @@ -449,9 +453,13 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) } /* Output packet to network from transport. */ +static inline int dst_output_sk(struct sock *sk, struct sk_buff *skb) +{ + return skb_dst(skb)->output(sk, skb); +} static inline int dst_output(struct sk_buff *skb) { - return skb_dst(skb)->output(skb); + return dst_output_sk(skb->sk, skb); } /* Input packet from network to transport. */ diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index f981ba7adee..74af137304b 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -40,7 +40,7 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl); +int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c55aeed41ac..7a431388756 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -36,7 +36,7 @@ struct tcp_congestion_ops; * (i.e. things that depend on the address family) */ struct inet_connection_sock_af_ops { - int (*queue_xmit)(struct sk_buff *skb, struct flowi *fl); + int (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl); void (*send_check)(struct sock *sk, struct sk_buff *skb); int (*rebuild_header)(struct sock *sk); void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb); diff --git a/include/net/ip.h b/include/net/ip.h index 25064c28e05..3ec2b0fb9d8 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -104,14 +104,19 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); int ip_local_deliver(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb); -int ip_output(struct sk_buff *skb); -int ip_mc_output(struct sk_buff *skb); +int ip_output(struct sock *sk, struct sk_buff *skb); +int ip_mc_output(struct sock *sk, struct sk_buff *skb); int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); int ip_do_nat(struct sk_buff *skb); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); -int ip_local_out(struct sk_buff *skb); -int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl); +int ip_local_out_sk(struct sock *sk, struct sk_buff *skb); +static inline int ip_local_out(struct sk_buff *skb) +{ + return ip_local_out_sk(skb->sk, skb); +} + +int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); void ip_init(void); int ip_append_data(struct sock *sk, struct flowi4 *fl4, int getfrag(void *from, char *to, int offset, int len, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 3c3bb184eb8..6c4f5eac98e 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -32,6 +32,11 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +/* We do not (yet ?) support IPv6 jumbograms (RFC 2675) + * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header + */ +#define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr)) + /* * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate * between IPV6_ADDR_PREFERENCES socket option values diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index e77c10405d5..a4daf9eb856 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -153,7 +153,7 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, } int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); -int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, +int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 4f541f11ce6..d640925bc45 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -731,7 +731,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, * skb processing functions */ -int ip6_output(struct sk_buff *skb); +int ip6_output(struct sock *sk, struct sk_buff *skb); int ip6_forward(struct sk_buff *skb); int ip6_input(struct sk_buff *skb); int ip6_mc_input(struct sk_buff *skb); diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index cf2b7ae2b9d..a75fc8e27cd 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -13,6 +13,16 @@ struct nft_cmp_fast_expr { u8 len; }; +/* Calculate the mask for the nft_cmp_fast expression. On big endian the + * mask needs to include the *upper* bytes when interpreting that data as + * something smaller than the full u32, therefore a cpu_to_le32 is done. + */ +static inline u32 nft_cmp_fast_mask(unsigned int len) +{ + return cpu_to_le32(~0U >> (FIELD_SIZEOF(struct nft_cmp_fast_expr, + data) * BITS_PER_BYTE - len)); +} + extern const struct nft_expr_ops nft_cmp_fast_ops; int nft_cmp_module_init(void); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6ee76c80489..d992ca3145f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1653,6 +1653,17 @@ struct sctp_association { /* This is the last advertised value of rwnd over a SACK chunk. */ __u32 a_rwnd; + /* Number of bytes by which the rwnd has slopped. The rwnd is allowed + * to slop over a maximum of the association's frag_point. + */ + __u32 rwnd_over; + + /* Keeps treack of rwnd pressure. This happens when we have + * a window, but not recevie buffer (i.e small packets). This one + * is releases slowly (1 PMTU at a time ). + */ + __u32 rwnd_press; + /* This is the sndbuf size in use for the association. * This corresponds to the sndbuf size for the association, * as specified in the sk->sndbuf. @@ -1881,7 +1892,8 @@ void sctp_assoc_update(struct sctp_association *old, __u32 sctp_association_get_next_tsn(struct sctp_association *); void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *); -void sctp_assoc_rwnd_update(struct sctp_association *, bool); +void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); +void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); void sctp_assoc_set_primary(struct sctp_association *, struct sctp_transport *); void sctp_assoc_del_nonprimary_peers(struct sctp_association *, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 32682ae47b3..116e9c7e19c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -333,7 +333,7 @@ struct xfrm_state_afinfo { const xfrm_address_t *saddr); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); - int (*output)(struct sk_buff *skb); + int (*output)(struct sock *sk, struct sk_buff *skb); int (*output_finish)(struct sk_buff *skb); int (*extract_input)(struct xfrm_state *x, struct sk_buff *skb); @@ -1540,7 +1540,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm4_output(struct sk_buff *skb); +int xfrm4_output(struct sock *sk, struct sk_buff *skb); int xfrm4_output_finish(struct sk_buff *skb); int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); @@ -1565,7 +1565,7 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_output(struct sk_buff *skb); +int xfrm6_output(struct sock *sk, struct sk_buff *skb); int xfrm6_output_finish(struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d8d046c0726..590c3792508 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -69,18 +69,17 @@ static void populate_seccomp_data(struct seccomp_data *sd) { struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); + unsigned long args[6]; sd->nr = syscall_get_nr(task, regs); sd->arch = syscall_get_arch(); - - /* Unroll syscall_get_args to help gcc on arm. */ - syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); - syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]); - syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]); - syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]); - syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]); - syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]); - + syscall_get_arguments(task, regs, 0, 6, args); + sd->args[0] = args[0]; + sd->args[1] = args[1]; + sd->args[2] = args[2]; + sd->args[3] = args[3]; + sd->args[4] = args[4]; + sd->args[5] = args[5]; sd->instruction_pointer = KSTK_EIP(task); } diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d8f6023fd8..bf71b4b2d63 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -152,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -176,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -199,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; @@ -615,9 +615,8 @@ static ssize_t map_write(struct file *file, const char __user *buf, * were written before the count of the extents. * * To achieve this smp_wmb() is used on guarantee the write - * order and smp_read_barrier_depends() is guaranteed that we - * don't have crazy architectures returning stale data. - * + * order and smp_rmb() is guaranteed that we don't have crazy + * architectures returning stale data. */ mutex_lock(&id_map_mutex); diff --git a/net/core/dev.c b/net/core/dev.c index 14dac0654f2..5b3042e69f8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2284,7 +2284,7 @@ EXPORT_SYMBOL(skb_checksum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; - int vlan_depth = ETH_HLEN; + int vlan_depth = skb->mac_len; /* Tunnel gso handlers can set protocol to ethernet. */ if (type == htons(ETH_P_TEB)) { diff --git a/net/core/dst.c b/net/core/dst.c index ca4231ec734..80d6286c8b6 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -142,12 +142,12 @@ loop: mutex_unlock(&dst_gc_mutex); } -int dst_discard(struct sk_buff *skb) +int dst_discard_sk(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } -EXPORT_SYMBOL(dst_discard); +EXPORT_SYMBOL(dst_discard_sk); const u32 dst_default_metrics[RTAX_MAX + 1] = { /* This initializer is needed to force linker to place this variable @@ -184,7 +184,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->xfrm = NULL; #endif dst->input = dst_discard; - dst->output = dst_discard; + dst->output = dst_discard_sk; dst->error = 0; dst->obsolete = initial_obsolete; dst->header_len = 0; @@ -209,8 +209,10 @@ static void ___dst_free(struct dst_entry *dst) /* The first case (dev==NULL) is required, when protocol module is unloaded. */ - if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) - dst->input = dst->output = dst_discard; + if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { + dst->input = dst_discard; + dst->output = dst_discard_sk; + } dst->obsolete = DST_OBSOLETE_DEAD; } @@ -361,7 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, return; if (!unregister) { - dst->input = dst->output = dst_discard; + dst->input = dst_discard; + dst->output = dst_discard_sk; } else { dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); diff --git a/net/core/filter.c b/net/core/filter.c index e08b3822c72..cd58614660c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -600,6 +600,9 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb_is_nonlinear(skb)) return 0; + if (skb->len < sizeof(struct nlattr)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) return 0; @@ -618,11 +621,14 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb_is_nonlinear(skb)) return 0; + if (skb->len < sizeof(struct nlattr)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) return 0; nla = (struct nlattr *) &skb->data[A]; - if (nla->nla_len > A - skb->len) + if (nla->nla_len > skb->len - A) return 0; nla = nla_find_nested(nla, X); @@ -1737,7 +1743,6 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, diff --git a/net/dccp/output.c b/net/dccp/output.c index 8876078859d..0248e8a3460 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -138,7 +138,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); return net_xmit_eval(err); } return -ENOBUFS; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index ce0cbbfe0f4..daccc4a36d8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -752,7 +752,7 @@ static int dn_to_neigh_output(struct sk_buff *skb) return n->output(n, skb); } -static int dn_output(struct sk_buff *skb) +static int dn_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; @@ -838,6 +838,18 @@ drop: * Used to catch bugs. This should never normally get * called. */ +static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb) +{ + struct dn_skb_cb *cb = DN_SKB_CB(skb); + + net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n", + le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); + + kfree_skb(skb); + + return NET_RX_DROP; +} + static int dn_rt_bug(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); @@ -1463,7 +1475,7 @@ make_route: rt->n = neigh; rt->dst.lastuse = jiffies; - rt->dst.output = dn_rt_bug; + rt->dst.output = dn_rt_bug_sk; switch (res.type) { case RTN_UNICAST: rt->dst.input = dn_forward; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1a0755fea49..1cbeba5edff 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb) skb_dst(skb)->dev, dst_output); } -int ip_local_out(struct sk_buff *skb) +int ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { int err; err = __ip_local_out(skb); if (likely(err == 1)) - err = dst_output(skb); + err = dst_output_sk(sk, skb); return err; } -EXPORT_SYMBOL_GPL(ip_local_out); +EXPORT_SYMBOL_GPL(ip_local_out_sk); static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) { @@ -226,9 +226,8 @@ static int ip_finish_output(struct sk_buff *skb) return ip_finish_output2(skb); } -int ip_mc_output(struct sk_buff *skb) +int ip_mc_output(struct sock *sk, struct sk_buff *skb) { - struct sock *sk = skb->sk; struct rtable *rt = skb_rtable(skb); struct net_device *dev = rt->dst.dev; @@ -287,7 +286,7 @@ int ip_mc_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -int ip_output(struct sk_buff *skb) +int ip_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; @@ -315,9 +314,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) sizeof(fl4->saddr) + sizeof(fl4->daddr)); } -int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) +/* Note: skb->sk can be different from sk, in case of tunnels */ +int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ip_options_rcu *inet_opt; struct flowi4 *fl4; @@ -389,6 +388,7 @@ packet_routed: ip_select_ident_more(skb, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); + /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e77381d1df9..484d0ce27ef 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index e0c2b1d2ea4..bcf206c7900 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -46,7 +46,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> -int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, +int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) { @@ -76,7 +76,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, iph->ttl = ttl; __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); - err = ip_local_out(skb); + err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) pkt_len = 0; return pkt_len; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index f4b19e5dde5..8210964a9f1 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -252,26 +252,33 @@ int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); - struct group_info *group_info = get_current_groups(); - int i, j, count = group_info->ngroups; + struct group_info *group_info; + int i, j, count; kgid_t low, high; + int ret = 0; inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; + group_info = get_current_groups(); + count = group_info->ngroups; for (i = 0; i < group_info->nblocks; i++) { int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); for (j = 0; j < cp_count; j++) { kgid_t gid = group_info->blocks[i][j]; if (gid_lte(low, gid) && gid_lte(gid, high)) - return 0; + goto out_release_group; } count -= cp_count; } - return -EACCES; + ret = -EACCES; + +out_release_group: + put_group_info(group_info); + return ret; } EXPORT_SYMBOL_GPL(ping_init_sock); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 34d094cadb1..1485aafcad5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1129,7 +1129,7 @@ static void ipv4_link_failure(struct sk_buff *skb) dst_set_expires(&rt->dst, 0); } -static int ip_rt_bug(struct sk_buff *skb) +static int ip_rt_bug(struct sock *sk, struct sk_buff *skb) { pr_debug("%s: %pI4 -> %pI4, %s\n", __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, @@ -2218,7 +2218,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->__use = 1; new->input = dst_discard; - new->output = dst_discard; + new->output = dst_discard_sk; new->dev = ort->dst.dev; if (new->dev) @@ -2357,7 +2357,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, } } else #endif - if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) + if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex)) goto nla_put_failure; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 699fb102e97..025e2509398 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -981,7 +981,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); - err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); if (likely(err <= 0)) return err; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index baa0f63731f..40e701f2e1e 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -86,7 +86,7 @@ int xfrm4_output_finish(struct sk_buff *skb) return xfrm_output(skb); } -int xfrm4_output(struct sk_buff *skb) +int xfrm4_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index c9138189415..d4ade34ab37 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -224,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, return dst; } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused) { - struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); struct flowi6 fl6; struct dst_entry *dst; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c98338b81d3..9d921462b57 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1559,6 +1559,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], return 0; } +static void ip6gre_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + if (dev != ign->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + static size_t ip6gre_get_size(const struct net_device *dev) { return @@ -1636,6 +1645,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .validate = ip6gre_tunnel_validate, .newlink = ip6gre_newlink, .changelink = ip6gre_changelink, + .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3284d61577c..40e7581374f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb) return ip6_finish_output2(skb); } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5015c50a5ba..4011617cca6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -84,9 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *, static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); -static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -290,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, - .output = dst_discard, + .output = dst_discard_sk, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -1058,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new->__use = 1; new->input = dst_discard; - new->output = dst_discard; + new->output = dst_discard_sk; if (dst_metrics_read_only(&ort->dst)) new->_metrics = ort->dst._metrics; @@ -1338,7 +1338,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) - return mtu; + goto out; mtu = IPV6_MIN_MTU; @@ -1348,7 +1348,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) mtu = idev->cnf.mtu6; rcu_read_unlock(); - return mtu; +out: + return min_t(unsigned int, mtu, IP6_MAX_MTU); } static struct dst_entry *icmp6_dst_gc_list; @@ -1576,7 +1577,7 @@ int ip6_route_add(struct fib6_config *cfg) switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; - rt->dst.output = dst_discard; + rt->dst.output = dst_discard_sk; rt->dst.input = dst_discard; break; case RTN_PROHIBIT: @@ -2128,7 +2129,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); @@ -2139,7 +2140,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_prohibit_out(struct sk_buff *skb) +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1693c8d885f..8da8268d65f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -974,8 +974,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto out; } - err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, - ttl, df, !net_eq(tunnel->net, dev_net(dev))); + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df, + !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6cd625e3770..19ef329bdbf 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -163,7 +163,7 @@ static int __xfrm6_output(struct sk_buff *skb) return x->outer_mode->afinfo->output_finish(skb); } -int xfrm6_output(struct sk_buff *skb) +int xfrm6_output(struct sock *sk, struct sk_buff *skb) { return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, skb_dst(skb)->dev, __xfrm6_output); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 47f7a549055..a4e37d7158d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1131,10 +1131,10 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, skb->local_df = 1; #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) - error = inet6_csk_xmit(skb, NULL); + error = inet6_csk_xmit(tunnel->sock, skb, NULL); else #endif - error = ip_queue_xmit(skb, fl); + error = ip_queue_xmit(tunnel->sock, skb, fl); /* Update stats */ if (error >= 0) { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0b44d855269..3397fe6897c 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -487,7 +487,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m xmit: /* Queue the packet to IP for output */ - rc = ip_queue_xmit(skb, &inet->cork.fl); + rc = ip_queue_xmit(sk, skb, &inet->cork.fl); rcu_read_unlock(); error: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6dba48efe01..75421f2ba8b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1795,6 +1795,7 @@ int nf_conntrack_init_net(struct net *net) int cpu; atomic_set(&net->ct.count, 0); + seqcount_init(&net->ct.generation); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); if (!net->ct.pcpu_lists) diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 7bd03decd36..825c3e3f830 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -605,32 +605,14 @@ static struct nf_conntrack_helper pptp __read_mostly = { .expect_policy = &pptp_exp_policy, }; -static void nf_conntrack_pptp_net_exit(struct net *net) -{ - nf_ct_gre_keymap_flush(net); -} - -static struct pernet_operations nf_conntrack_pptp_net_ops = { - .exit = nf_conntrack_pptp_net_exit, -}; - static int __init nf_conntrack_pptp_init(void) { - int rv; - - rv = nf_conntrack_helper_register(&pptp); - if (rv < 0) - return rv; - rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops); - if (rv < 0) - nf_conntrack_helper_unregister(&pptp); - return rv; + return nf_conntrack_helper_register(&pptp); } static void __exit nf_conntrack_pptp_fini(void) { nf_conntrack_helper_unregister(&pptp); - unregister_pernet_subsys(&nf_conntrack_pptp_net_ops); } module_init(nf_conntrack_pptp_init); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9d9c0dade60..d5665739e3b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -66,7 +66,7 @@ static inline struct netns_proto_gre *gre_pernet(struct net *net) return net_generic(net, proto_gre_net_id); } -void nf_ct_gre_keymap_flush(struct net *net) +static void nf_ct_gre_keymap_flush(struct net *net) { struct netns_proto_gre *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km, *tmp; @@ -78,7 +78,6 @@ void nf_ct_gre_keymap_flush(struct net *net) } write_unlock_bh(&net_gre->keymap_lock); } -EXPORT_SYMBOL(nf_ct_gre_keymap_flush); static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, const struct nf_conntrack_tuple *t) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 90998a6ff8b..804105391b9 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -25,9 +25,8 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1]) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); - u32 mask; + u32 mask = nft_cmp_fast_mask(priv->len); - mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len); if ((data[priv->sreg].data[0] & mask) == priv->data) return; data[NFT_REG_VERDICT].verdict = NFT_BREAK; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 954925db414..e2b3f51c81f 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -128,7 +128,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, BUG_ON(err < 0); desc.len *= BITS_PER_BYTE; - mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len); + mask = nft_cmp_fast_mask(desc.len); priv->data = data.data[0] & mask; priv->len = desc.len; return 0; diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index a3d6951602d..ebb6e244255 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -174,7 +174,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) skb->local_df = 1; - return iptunnel_xmit(rt, skb, fl.saddr, + return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, false); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 4f6d6f9d127..39579c3e0d1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1395,35 +1395,44 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc) return false; } -/* Update asoc's rwnd for the approximated state in the buffer, - * and check whether SACK needs to be sent. - */ -void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer) +/* Increase asoc's rwnd by len and send any window update SACK if needed. */ +void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) { - int rx_count; struct sctp_chunk *sack; struct timer_list *timer; - if (asoc->ep->rcvbuf_policy) - rx_count = atomic_read(&asoc->rmem_alloc); - else - rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + if (asoc->rwnd_over) { + if (asoc->rwnd_over >= len) { + asoc->rwnd_over -= len; + } else { + asoc->rwnd += (len - asoc->rwnd_over); + asoc->rwnd_over = 0; + } + } else { + asoc->rwnd += len; + } - if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0) - asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1; - else - asoc->rwnd = 0; + /* If we had window pressure, start recovering it + * once our rwnd had reached the accumulated pressure + * threshold. The idea is to recover slowly, but up + * to the initial advertised window. + */ + if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) { + int change = min(asoc->pathmtu, asoc->rwnd_press); + asoc->rwnd += change; + asoc->rwnd_press -= change; + } - pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n", - __func__, asoc, asoc->rwnd, rx_count, - asoc->base.sk->sk_rcvbuf); + pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->a_rwnd); /* Send a window update SACK if the rwnd has increased by at least the * minimum of the association's PMTU and half of the receive buffer. * The algorithm used is similar to the one described in * Section 4.2.3.3 of RFC 1122. */ - if (update_peer && sctp_peer_needs_update(asoc)) { + if (sctp_peer_needs_update(asoc)) { asoc->a_rwnd = asoc->rwnd; pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u " @@ -1445,6 +1454,45 @@ void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer) } } +/* Decrease asoc's rwnd by len. */ +void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) +{ + int rx_count; + int over = 0; + + if (unlikely(!asoc->rwnd || asoc->rwnd_over)) + pr_debug("%s: association:%p has asoc->rwnd:%u, " + "asoc->rwnd_over:%u!\n", __func__, asoc, + asoc->rwnd, asoc->rwnd_over); + + if (asoc->ep->rcvbuf_policy) + rx_count = atomic_read(&asoc->rmem_alloc); + else + rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + + /* If we've reached or overflowed our receive buffer, announce + * a 0 rwnd if rwnd would still be positive. Store the + * the potential pressure overflow so that the window can be restored + * back to original value. + */ + if (rx_count >= asoc->base.sk->sk_rcvbuf) + over = 1; + + if (asoc->rwnd >= len) { + asoc->rwnd -= len; + if (over) { + asoc->rwnd_press += asoc->rwnd; + asoc->rwnd = 0; + } + } else { + asoc->rwnd_over = len - asoc->rwnd; + asoc->rwnd = 0; + } + + pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->rwnd_press); +} /* Build the bind address list for the association based on info from the * local endpoint and the remote peer. diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4e1d0fcb028..c09757fbf80 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -957,7 +957,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, &transport->fl); + return ip_queue_xmit(&inet->sk, skb, &transport->fl); } static struct sctp_af sctp_af_inet; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 01e002430c8..ae9fbeba40b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6178,7 +6178,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * PMTU. In cases, such as loopback, this might be a rather * large spill over. */ - if ((!chunk->data_accepted) && (!asoc->rwnd || + if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over || (datalen > asoc->rwnd + asoc->frag_point))) { /* If this is the next TSN, consider reneging to make diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e13519e9df8..ff20e2dbbbc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2115,6 +2115,12 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, sctp_skb_pull(skb, copied); skb_queue_head(&sk->sk_receive_queue, skb); + /* When only partial message is copied to the user, increase + * rwnd by that amount. If all the data in the skb is read, + * rwnd is updated when the event is freed. + */ + if (!sctp_ulpevent_is_notification(event)) + sctp_assoc_rwnd_increase(event->asoc, copied); goto out; } else if ((event->msg_flags & MSG_NOTIFICATION) || (event->msg_flags & MSG_EOR)) diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8d198ae0360..85c64658bd0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event, skb = sctp_event2skb(event); /* Set the owner and charge rwnd for bytes received. */ sctp_ulpevent_set_owner(event, asoc); - sctp_assoc_rwnd_update(asoc, false); + sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb)); if (!skb->data_len) return; @@ -1011,7 +1011,6 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) { struct sk_buff *skb, *frag; unsigned int len; - struct sctp_association *asoc; /* Current stack structures assume that the rcv buffer is * per socket. For UDP style sockets this is not true as @@ -1036,11 +1035,8 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) } done: - asoc = event->asoc; - sctp_association_hold(asoc); + sctp_assoc_rwnd_increase(event->asoc, len); sctp_ulpevent_release_owner(event); - sctp_assoc_rwnd_update(asoc, true); - sctp_association_put(asoc); } static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f02f511b710..c08fbd11cef 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1842,7 +1842,7 @@ purge_queue: xfrm_pol_put(pol); } -static int xdst_queue_output(struct sk_buff *skb) +static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) { unsigned long sched_next; struct dst_entry *dst = skb_dst(skb); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index d4b601547f1..2458a1dc2ba 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -97,6 +97,14 @@ static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); } +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); + +static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic) +{ + if (WARN_ON(ioapic->rtc_status.pending_eoi < 0)) + kvm_rtc_eoi_tracking_restore_all(ioapic); +} + static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) { bool new_val, old_val; @@ -120,9 +128,8 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) } else { __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); ioapic->rtc_status.pending_eoi--; + rtc_status_pending_eoi_check_valid(ioapic); } - - WARN_ON(ioapic->rtc_status.pending_eoi < 0); } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) @@ -149,10 +156,10 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) { - if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) + if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) { --ioapic->rtc_status.pending_eoi; - - WARN_ON(ioapic->rtc_status.pending_eoi < 0); + rtc_status_pending_eoi_check_valid(ioapic); + } } static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) @@ -353,10 +360,16 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) ioapic->irr &= ~(1 << irq); if (irq == RTC_GSI && line_status) { + /* + * pending_eoi cannot ever become negative (see + * rtc_status_pending_eoi_check_valid) and the caller + * ensures that it is only called if it is >= zero, namely + * if rtc_irq_check_coalesced returns false). + */ BUG_ON(ioapic->rtc_status.pending_eoi != 0); ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, ioapic->rtc_status.dest_map); - ioapic->rtc_status.pending_eoi = ret; + ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); } else ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); |