summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile9
-rw-r--r--arch/x86/boot/Makefile4
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/include/asm/checksum_64.h9
-rw-r--r--arch/x86/include/asm/hpet.h1
-rw-r--r--arch/x86/include/asm/hugetlb.h1
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h2
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c18
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c46
-rw-r--r--arch/x86/kernel/cpu/rdrand.c1
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c16
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c82
-rw-r--r--arch/x86/kernel/smp.c2
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/vsmp_64.c17
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c2
-rw-r--r--arch/x86/kvm/vmx.c53
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/msr.c2
-rw-r--r--arch/x86/math-emu/errors.c16
-rw-r--r--arch/x86/net/bpf_jit.S77
-rw-r--r--arch/x86/net/bpf_jit_comp.c1401
-rw-r--r--arch/x86/platform/efi/early_printk.c83
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c2
-rw-r--r--arch/x86/power/hibernate_64.c2
-rw-r--r--arch/x86/syscalls/Makefile2
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/tools/Makefile2
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S19
-rw-r--r--arch/x86/vdso/vdso32-setup.c8
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/irq.c6
-rw-r--r--arch/x86/xen/smp.c3
-rw-r--r--arch/x86/xen/spinlock.c5
-rw-r--r--arch/x86/xen/xen-asm_32.S25
48 files changed, 1139 insertions, 846 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 602f57e590b..33f71b01fd2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -79,11 +79,14 @@ else
UTS_MACHINE := x86_64
CHECKFLAGS += -D__x86_64__ -m64
+ biarch := -m64
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
# Don't autogenerate traditional x87, MMX or SSE instructions
- KBUILD_CFLAGS += -mno-mmx -mno-sse -mno-80387 -mno-fp-ret-in-387
+ KBUILD_CFLAGS += -mno-mmx -mno-sse
+ KBUILD_CFLAGS += $(call cc-option,-mno-80387)
+ KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
# Use -mpreferred-stack-boundary=3 if supported.
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
@@ -250,8 +253,8 @@ archclean:
PHONY += kvmconfig
kvmconfig:
$(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target))
- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config arch/x86/configs/kvm_guest.config
- $(Q)yes "" | $(MAKE) oldconfig
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config $(srctree)/arch/x86/configs/kvm_guest.config
+ $(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index abb9eba61b5..dbe8dd2fe24 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,7 +71,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
quiet_cmd_voffset = VOFFSET $@
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
@@ -80,7 +80,7 @@ targets += voffset.h
$(obj)/voffset.h: vmlinux FORCE
$(call if_changed,voffset)
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 17684615374..57ab74df7ee 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -354,7 +354,7 @@ static void parse_elf(void *output)
free(phdrs);
}
-asmlinkage void *decompress_kernel(void *rmode, memptr heap,
+asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index e6fd8a026c7..cd00e177449 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -184,8 +184,15 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b)
asm("addl %2,%0\n\t"
"adcl $0,%0"
: "=r" (a)
- : "0" (a), "r" (b));
+ : "0" (a), "rm" (b));
return a;
}
+#define HAVE_ARCH_CSUM_ADD
+static inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+ return (__force __wsum)add32_with_carry((__force unsigned)csum,
+ (__force unsigned)addend);
+}
+
#endif /* _ASM_X86_CHECKSUM_64_H */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index b18df579c0e..36f7125945e 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -63,6 +63,7 @@
/* hpet memory map physical address */
extern unsigned long hpet_address;
extern unsigned long force_hpet_address;
+extern int boot_hpet_disable;
extern u8 hpet_blockid;
extern int hpet_force_user;
extern u8 hpet_msi_disable;
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index a8091216963..68c05398bba 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+ ptep_clear_flush(vma, addr, ptep);
}
static inline int huge_pte_none(pte_t pte)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index c827ace3121..fcf2b3ae1bf 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -384,7 +384,7 @@
#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
-#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT);
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 3a2ae4c8894..31368207837 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -31,7 +31,7 @@ static char temp_stack[4096];
*
* Wrapper around acpi_enter_sleep_state() to be called by assmebly.
*/
-acpi_status asmlinkage x86_acpi_enter_sleep_state(u8 state)
+acpi_status asmlinkage __visible x86_acpi_enter_sleep_state(u8 state)
{
return acpi_enter_sleep_state(state);
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6ad4658de70..992060e0989 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2189,7 +2189,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
cfg->move_in_progress = 0;
}
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
+asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
@@ -3425,6 +3425,11 @@ int get_nr_irqs_gsi(void)
return nr_irqs_gsi;
}
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+ return from < nr_irqs_gsi ? nr_irqs_gsi : from;
+}
+
int __init arch_probe_nr_irqs(void)
{
int nr;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index eeee23ff75e..68317c80de7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -598,7 +598,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce m;
int i;
- unsigned long *v;
this_cpu_inc(mce_poll_count);
@@ -618,8 +617,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!(m.status & MCI_STATUS_VAL))
continue;
- v = &get_cpu_var(mce_polled_error);
- set_bit(0, v);
+ this_cpu_write(mce_polled_error, 1);
/*
* Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 3bdb95ae8c4..9a316b21df8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
-static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
+static DEFINE_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
#define CMCI_POLL_INTERVAL (30 * HZ)
@@ -144,14 +144,14 @@ static void cmci_storm_disable_banks(void)
int bank;
u64 val;
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
owned = __get_cpu_var(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static bool cmci_storm_detect(void)
@@ -211,7 +211,7 @@ static void cmci_discover(int banks)
int i;
int bios_wrong_thresh = 0;
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
int bios_zero_thresh = 0;
@@ -266,7 +266,7 @@ static void cmci_discover(int banks)
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
pr_info_once(
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
@@ -316,10 +316,10 @@ void cmci_clear(void)
if (!cmci_supported(&banks))
return;
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++)
__cmci_disable_bank(i);
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void cmci_rediscover_work_func(void *arg)
@@ -360,9 +360,9 @@ void cmci_disable_bank(int bank)
if (!cmci_supported(&banks))
return;
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
__cmci_disable_bank(bank);
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void intel_init_cmci(void)
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d921b7ee659..36a1bb6d1ee 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -429,14 +429,14 @@ static inline void __smp_thermal_interrupt(void)
smp_thermal_vector();
}
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
__smp_thermal_interrupt();
exiting_ack_irq();
}
-asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index fe6b1c86645..7245980186e 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -24,14 +24,14 @@ static inline void __smp_threshold_interrupt(void)
mce_threshold_vector();
}
-asmlinkage void smp_threshold_interrupt(void)
+asmlinkage __visible void smp_threshold_interrupt(void)
{
entering_irq();
__smp_threshold_interrupt();
exiting_ack_irq();
}
-asmlinkage void smp_trace_threshold_interrupt(void)
+asmlinkage __visible void smp_trace_threshold_interrupt(void)
{
entering_irq();
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index aa333d96688..adb02aa62af 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 059218ed520..619f7699487 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -59,7 +59,7 @@
#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT 3 /* DRAM */
+#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
/* Clients have PP0, PKG */
@@ -72,6 +72,12 @@
1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT)
+/* Servers have PP0, PKG, RAM, PP1 */
+#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
+ 1<<RAPL_IDX_PKG_NRG_STAT|\
+ 1<<RAPL_IDX_RAM_NRG_STAT|\
+ 1<<RAPL_IDX_PP1_NRG_STAT)
+
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
@@ -425,6 +431,24 @@ static struct attribute *rapl_events_cln_attr[] = {
NULL,
};
+static struct attribute *rapl_events_hsw_attr[] = {
+ EVENT_PTR(rapl_cores),
+ EVENT_PTR(rapl_pkg),
+ EVENT_PTR(rapl_gpu),
+ EVENT_PTR(rapl_ram),
+
+ EVENT_PTR(rapl_cores_unit),
+ EVENT_PTR(rapl_pkg_unit),
+ EVENT_PTR(rapl_gpu_unit),
+ EVENT_PTR(rapl_ram_unit),
+
+ EVENT_PTR(rapl_cores_scale),
+ EVENT_PTR(rapl_pkg_scale),
+ EVENT_PTR(rapl_gpu_scale),
+ EVENT_PTR(rapl_ram_scale),
+ NULL,
+};
+
static struct attribute_group rapl_pmu_events_group = {
.name = "events",
.attrs = NULL, /* patched at runtime */
@@ -511,6 +535,7 @@ static int rapl_cpu_prepare(int cpu)
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
int phys_id = topology_physical_package_id(cpu);
u64 ms;
+ u64 msr_rapl_power_unit_bits;
if (pmu)
return 0;
@@ -518,6 +543,10 @@ static int rapl_cpu_prepare(int cpu)
if (phys_id < 0)
return -1;
+ /* protect rdmsrl() to handle virtualization */
+ if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+ return -1;
+
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
return -1;
@@ -531,8 +560,7 @@ static int rapl_cpu_prepare(int cpu)
*
* we cache in local PMU instance
*/
- rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit);
- pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL;
+ pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
pmu->pmu = &rapl_pmu_class;
/*
@@ -631,11 +659,14 @@ static int __init rapl_pmu_init(void)
switch (boot_cpu_data.x86_model) {
case 42: /* Sandy Bridge */
case 58: /* Ivy Bridge */
- case 60: /* Haswell */
- case 69: /* Haswell-Celeron */
rapl_cntr_mask = RAPL_IDX_CLN;
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
break;
+ case 60: /* Haswell */
+ case 69: /* Haswell-Celeron */
+ rapl_cntr_mask = RAPL_IDX_HSW;
+ rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
+ break;
case 45: /* Sandy Bridge-EP */
case 62: /* IvyTown */
rapl_cntr_mask = RAPL_IDX_SRV;
@@ -650,7 +681,9 @@ static int __init rapl_pmu_init(void)
cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
- rapl_cpu_prepare(cpu);
+ ret = rapl_cpu_prepare(cpu);
+ if (ret)
+ goto out;
rapl_cpu_init(cpu);
}
@@ -673,6 +706,7 @@ static int __init rapl_pmu_init(void)
hweight32(rapl_cntr_mask),
ktime_to_ms(pmu->timer_interval));
+out:
cpu_notifier_register_done();
return 0;
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 384df5105fb..136ac74dee8 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -27,6 +27,7 @@
static int __init x86_rdrand_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_RDRAND);
+ setup_clear_cpu_cap(X86_FEATURE_RDSEED);
return 1;
}
__setup("nordrand", x86_rdrand_setup);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index b0cc3809723..6cda0baeac9 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -17,6 +17,7 @@
#include <asm/dma.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
+#include <asm/hpet.h>
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/irq_remapping.h>
@@ -240,7 +241,7 @@ static u32 __init intel_stolen_base(int num, int slot, int func, size_t stolen_s
return base;
}
-#define KB(x) ((x) * 1024)
+#define KB(x) ((x) * 1024UL)
#define MB(x) (KB (KB (x)))
#define GB(x) (MB (KB (x)))
@@ -530,6 +531,15 @@ static void __init intel_graphics_stolen(int num, int slot, int func)
}
}
+static void __init force_disable_hpet(int num, int slot, int func)
+{
+#ifdef CONFIG_HPET_TIMER
+ boot_hpet_disable = 1;
+ pr_info("x86/hpet: Will disable the HPET for this platform because it's not reliable\n");
+#endif
+}
+
+
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -567,6 +577,12 @@ static struct chipset early_qrk[] __initdata = {
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
QFLAG_APPLY_ONCE, intel_graphics_stolen },
+ /*
+ * HPET on current version of Baytrail platform has accuracy
+ * problems, disable it for now:
+ */
+ { PCI_VENDOR_ID_INTEL, 0x0f00,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{}
};
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c61a14a4a31..d6c1b983699 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,7 +29,7 @@ static void __init i386_default_early_setup(void)
reserve_ebda_region();
}
-asmlinkage void __init i386_start_kernel(void)
+asmlinkage __visible void __init i386_start_kernel(void)
{
sanitize_boot_params(&boot_params);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 85126ccbdf6..068054f4bf2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -137,7 +137,7 @@ static void __init copy_bootdata(char *real_mode_data)
}
}
-asmlinkage void __init x86_64_start_kernel(char * real_mode_data)
+asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8d80ae01160..4177bfbc80b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -88,7 +88,7 @@ static inline void hpet_clear_mapping(void)
/*
* HPET command line enable / disable
*/
-static int boot_hpet_disable;
+int boot_hpet_disable;
int hpet_force_user;
static int hpet_verbose;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 79a3f968287..61b17dc2c27 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -897,9 +897,10 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- switch (kcb->kprobe_status) {
- case KPROBE_HIT_SS:
- case KPROBE_REENTER:
+ if (unlikely(regs->ip == (unsigned long)cur->ainsn.insn)) {
+ /* This must happen on single-stepping */
+ WARN_ON(kcb->kprobe_status != KPROBE_HIT_SS &&
+ kcb->kprobe_status != KPROBE_REENTER);
/*
* We are here because the instruction being single
* stepped caused a page fault. We reset the current
@@ -914,9 +915,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
else
reset_current_kprobe();
preempt_enable_no_resched();
- break;
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
+ } else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
+ kcb->kprobe_status == KPROBE_HIT_SSDONE) {
/*
* We increment the nmissed count for accounting,
* we can also use npre/npostfault count for accounting
@@ -945,10 +945,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* fixup routine could not handle it,
* Let do_page_fault() fix it.
*/
- break;
- default:
- break;
}
+
return 0;
}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index af1d14a9ebd..dcbbaa165bd 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,6 +20,8 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
+int sysctl_ldt16 = 0;
+
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
{
@@ -234,7 +236,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
* IRET leaking the high bits of the kernel stack address.
*/
#ifdef CONFIG_X86_64
- if (!ldt_info.seg_32bit) {
+ if (!ldt_info.seg_32bit && !sysctl_ldt16) {
error = -EINVAL;
goto out_unlock;
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c0280f93d0..898d077617a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
asmlinkage extern void ret_from_fork(void);
-asmlinkage DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, old_rsp);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 654b46574b9..52b1157c53e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -114,8 +114,8 @@ EXPORT_SYMBOL(machine_real_restart);
*/
static int __init set_pci_reboot(const struct dmi_system_id *d)
{
- if (reboot_type != BOOT_CF9) {
- reboot_type = BOOT_CF9;
+ if (reboot_type != BOOT_CF9_FORCE) {
+ reboot_type = BOOT_CF9_FORCE;
pr_info("%s series board detected. Selecting %s-method for reboots.\n",
d->ident, "PCI");
}
@@ -191,6 +191,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
},
},
+ /* Certec */
+ { /* Handle problems with rebooting on Certec BPC600 */
+ .callback = set_pci_reboot,
+ .ident = "Certec BPC600",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Certec"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "BPC600"),
+ },
+ },
+
/* Dell */
{ /* Handle problems with rebooting on Dell DXP061 */
.callback = set_bios_reboot,
@@ -458,20 +468,23 @@ void __attribute__((weak)) mach_reboot_fixups(void)
}
/*
- * Windows compatible x86 hardware expects the following on reboot:
+ * To the best of our knowledge Windows compatible x86 hardware expects
+ * the following on reboot:
*
* 1) If the FADT has the ACPI reboot register flag set, try it
* 2) If still alive, write to the keyboard controller
* 3) If still alive, write to the ACPI reboot register again
* 4) If still alive, write to the keyboard controller again
* 5) If still alive, call the EFI runtime service to reboot
- * 6) If still alive, write to the PCI IO port 0xCF9 to reboot
- * 7) If still alive, inform BIOS to do a proper reboot
+ * 6) If no EFI runtime service, call the BIOS to do a reboot
+ *
+ * We default to following the same pattern. We also have
+ * two other reboot methods: 'triple fault' and 'PCI', which
+ * can be triggered via the reboot= kernel boot option or
+ * via quirks.
*
- * If the machine is still alive at this stage, it gives up. We default to
- * following the same pattern, except that if we're still alive after (7) we'll
- * try to force a triple fault and then cycle between hitting the keyboard
- * controller and doing that
+ * This means that this function can never return, it can misbehave
+ * by not rebooting properly and hanging.
*/
static void native_machine_emergency_restart(void)
{
@@ -492,6 +505,11 @@ static void native_machine_emergency_restart(void)
for (;;) {
/* Could also try the reset bit in the Hammer NB */
switch (reboot_type) {
+ case BOOT_ACPI:
+ acpi_reboot();
+ reboot_type = BOOT_KBD;
+ break;
+
case BOOT_KBD:
mach_reboot_fixups(); /* For board specific fixups */
@@ -509,43 +527,29 @@ static void native_machine_emergency_restart(void)
}
break;
- case BOOT_TRIPLE:
- load_idt(&no_idt);
- __asm__ __volatile__("int3");
-
- /* We're probably dead after this, but... */
- reboot_type = BOOT_KBD;
- break;
-
- case BOOT_BIOS:
- machine_real_restart(MRR_BIOS);
-
- /* We're probably dead after this, but... */
- reboot_type = BOOT_TRIPLE;
- break;
-
- case BOOT_ACPI:
- acpi_reboot();
- reboot_type = BOOT_KBD;
- break;
-
case BOOT_EFI:
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi.reset_system(reboot_mode == REBOOT_WARM ?
EFI_RESET_WARM :
EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
- reboot_type = BOOT_CF9_COND;
+ reboot_type = BOOT_BIOS;
+ break;
+
+ case BOOT_BIOS:
+ machine_real_restart(MRR_BIOS);
+
+ /* We're probably dead after this, but... */
+ reboot_type = BOOT_CF9_SAFE;
break;
- case BOOT_CF9:
+ case BOOT_CF9_FORCE:
port_cf9_safe = true;
/* Fall through */
- case BOOT_CF9_COND:
+ case BOOT_CF9_SAFE:
if (port_cf9_safe) {
- u8 reboot_code = reboot_mode == REBOOT_WARM ?
- 0x06 : 0x0E;
+ u8 reboot_code = reboot_mode == REBOOT_WARM ? 0x06 : 0x0E;
u8 cf9 = inb(0xcf9) & ~reboot_code;
outb(cf9|2, 0xcf9); /* Request hard reset */
udelay(50);
@@ -553,7 +557,15 @@ static void native_machine_emergency_restart(void)
outb(cf9|reboot_code, 0xcf9);
udelay(50);
}
- reboot_type = BOOT_BIOS;
+ reboot_type = BOOT_TRIPLE;
+ break;
+
+ case BOOT_TRIPLE:
+ load_idt(&no_idt);
+ __asm__ __volatile__("int3");
+
+ /* We're probably dead after this, but... */
+ reboot_type = BOOT_KBD;
break;
}
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7c3a5a61f2e..be8e1bde07a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -168,7 +168,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
* this function calls the 'stop' function on all other CPUs in the system.
*/
-asmlinkage void smp_reboot_interrupt(void)
+asmlinkage __visible void smp_reboot_interrupt(void)
{
ack_APIC_irq();
irq_enter();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 57409f6b8c6..f73b5d435bd 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -357,7 +357,7 @@ exit:
* for scheduling or signal handling. The actual stack switch is done in
* entry.S
*/
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
+asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
{
struct pt_regs *regs = eregs;
/* Did already sync */
@@ -601,11 +601,11 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
#endif
}
-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
+asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void)
{
}
-asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
+asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
{
}
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index f6584a90aba..b99b9ad8540 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,6 +26,9 @@
#define TOPOLOGY_REGISTER_OFFSET 0x10
+/* Flag below is initialized once during vSMP PCI initialization. */
+static int irq_routing_comply = 1;
+
#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
/*
* Interrupt control on vSMPowered systems:
@@ -33,7 +36,7 @@
* and vice versa.
*/
-asmlinkage unsigned long vsmp_save_fl(void)
+asmlinkage __visible unsigned long vsmp_save_fl(void)
{
unsigned long flags = native_save_fl();
@@ -53,7 +56,7 @@ __visible void vsmp_restore_fl(unsigned long flags)
}
PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
-asmlinkage void vsmp_irq_disable(void)
+asmlinkage __visible void vsmp_irq_disable(void)
{
unsigned long flags = native_save_fl();
@@ -61,7 +64,7 @@ asmlinkage void vsmp_irq_disable(void)
}
PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
-asmlinkage void vsmp_irq_enable(void)
+asmlinkage __visible void vsmp_irq_enable(void)
{
unsigned long flags = native_save_fl();
@@ -101,6 +104,10 @@ static void __init set_vsmp_pv_ops(void)
#ifdef CONFIG_SMP
if (cap & ctl & BIT(8)) {
ctl &= ~BIT(8);
+
+ /* Interrupt routing set to ignore */
+ irq_routing_comply = 0;
+
#ifdef CONFIG_PROC_FS
/* Don't let users change irq affinity via procfs */
no_irq_affinity = 1;
@@ -218,7 +225,9 @@ static void vsmp_apic_post_init(void)
{
/* need to update phys_pkg_id */
apic->phys_pkg_id = apicid_phys_pkg_id;
- apic->vector_allocation_domain = fill_vector_allocation_domain;
+
+ if (!irq_routing_comply)
+ apic->vector_allocation_domain = fill_vector_allocation_domain;
}
void __init vsmp_init(void)
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index f9c6e56e14b..9531fbb123b 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -43,7 +43,7 @@ void update_vsyscall(struct timekeeper *tk)
vdata->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
vdata->monotonic_time_snsec = tk->xtime_nsec
- + (tk->wall_to_monotonic.tv_nsec
+ + ((u64)tk->wall_to_monotonic.tv_nsec
<< tk->shift);
while (vdata->monotonic_time_snsec >=
(((u64)NSEC_PER_SEC) << tk->shift)) {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f68c583192..33e8c028842 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -503,7 +503,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
[number##_HIGH] = VMCS12_OFFSET(name)+4
-static const unsigned long shadow_read_only_fields[] = {
+static unsigned long shadow_read_only_fields[] = {
/*
* We do NOT shadow fields that are modified when L0
* traps and emulates any vmx instruction (e.g. VMPTRLD,
@@ -526,10 +526,10 @@ static const unsigned long shadow_read_only_fields[] = {
GUEST_LINEAR_ADDRESS,
GUEST_PHYSICAL_ADDRESS
};
-static const int max_shadow_read_only_fields =
+static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
-static const unsigned long shadow_read_write_fields[] = {
+static unsigned long shadow_read_write_fields[] = {
GUEST_RIP,
GUEST_RSP,
GUEST_CR0,
@@ -558,7 +558,7 @@ static const unsigned long shadow_read_write_fields[] = {
HOST_FS_SELECTOR,
HOST_GS_SELECTOR
};
-static const int max_shadow_read_write_fields =
+static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);
static const unsigned short vmcs_field_to_offset_table[] = {
@@ -3009,6 +3009,41 @@ static void free_kvm_area(void)
}
}
+static void init_vmcs_shadow_fields(void)
+{
+ int i, j;
+
+ /* No checks for read only fields yet */
+
+ for (i = j = 0; i < max_shadow_read_write_fields; i++) {
+ switch (shadow_read_write_fields[i]) {
+ case GUEST_BNDCFGS:
+ if (!vmx_mpx_supported())
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ if (j < i)
+ shadow_read_write_fields[j] =
+ shadow_read_write_fields[i];
+ j++;
+ }
+ max_shadow_read_write_fields = j;
+
+ /* shadowed fields guest access without vmexit */
+ for (i = 0; i < max_shadow_read_write_fields; i++) {
+ clear_bit(shadow_read_write_fields[i],
+ vmx_vmwrite_bitmap);
+ clear_bit(shadow_read_write_fields[i],
+ vmx_vmread_bitmap);
+ }
+ for (i = 0; i < max_shadow_read_only_fields; i++)
+ clear_bit(shadow_read_only_fields[i],
+ vmx_vmread_bitmap);
+}
+
static __init int alloc_kvm_area(void)
{
int cpu;
@@ -3039,6 +3074,8 @@ static __init int hardware_setup(void)
enable_vpid = 0;
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
+ if (enable_shadow_vmcs)
+ init_vmcs_shadow_fields();
if (!cpu_has_vmx_ept() ||
!cpu_has_vmx_ept_4levels()) {
@@ -8803,14 +8840,6 @@ static int __init vmx_init(void)
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /* shadowed read/write fields */
- for (i = 0; i < max_shadow_read_write_fields; i++) {
- clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap);
- clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap);
- }
- /* shadowed read only fields */
- for (i = 0; i < max_shadow_read_only_fields; i++)
- clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap);
/*
* Allow direct access to the PC debug port (it is often used for I/O
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b8fc0b792b..b6c0bacca9b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -280,7 +280,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
-asmlinkage void kvm_spurious_fault(void)
+asmlinkage __visible void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
BUG();
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ad1fb5f5392..aae94132bc2 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -233,13 +233,13 @@ static void lguest_end_context_switch(struct task_struct *next)
* flags word contains all kind of stuff, but in practice Linux only cares
* about the interrupt flag. Our "save_flags()" just returns that.
*/
-asmlinkage unsigned long lguest_save_fl(void)
+asmlinkage __visible unsigned long lguest_save_fl(void)
{
return lguest_data.irq_enabled;
}
/* Interrupts go off... */
-asmlinkage void lguest_irq_disable(void)
+asmlinkage __visible void lguest_irq_disable(void)
{
lguest_data.irq_enabled = 0;
}
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index db9db446b71..43623739c7c 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -76,7 +76,7 @@ static inline int __flip_bit(u32 msr, u8 bit, bool set)
if (m1.q == m.q)
return 0;
- err = msr_write(msr, &m);
+ err = msr_write(msr, &m1);
if (err)
return err;
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index a5449089cd9..9e6545f269e 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -302,7 +302,7 @@ static struct {
0x242 in div_Xsig.S
*/
-asmlinkage void FPU_exception(int n)
+asmlinkage __visible void FPU_exception(int n)
{
int i, int_type;
@@ -492,7 +492,7 @@ int real_2op_NaN(FPU_REG const *b, u_char tagb,
/* Invalid arith operation on Valid registers */
/* Returns < 0 if the exception is unmasked */
-asmlinkage int arith_invalid(int deststnr)
+asmlinkage __visible int arith_invalid(int deststnr)
{
EXCEPTION(EX_Invalid);
@@ -507,7 +507,7 @@ asmlinkage int arith_invalid(int deststnr)
}
/* Divide a finite number by zero */
-asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
+asmlinkage __visible int FPU_divide_by_zero(int deststnr, u_char sign)
{
FPU_REG *dest = &st(deststnr);
int tag = TAG_Valid;
@@ -539,7 +539,7 @@ int set_precision_flag(int flags)
}
/* This may be called often, so keep it lean */
-asmlinkage void set_precision_flag_up(void)
+asmlinkage __visible void set_precision_flag_up(void)
{
if (control_word & CW_Precision)
partial_status |= (SW_Precision | SW_C1); /* The masked response */
@@ -548,7 +548,7 @@ asmlinkage void set_precision_flag_up(void)
}
/* This may be called often, so keep it lean */
-asmlinkage void set_precision_flag_down(void)
+asmlinkage __visible void set_precision_flag_down(void)
{
if (control_word & CW_Precision) { /* The masked response */
partial_status &= ~SW_C1;
@@ -557,7 +557,7 @@ asmlinkage void set_precision_flag_down(void)
EXCEPTION(EX_Precision);
}
-asmlinkage int denormal_operand(void)
+asmlinkage __visible int denormal_operand(void)
{
if (control_word & CW_Denormal) { /* The masked response */
partial_status |= SW_Denorm_Op;
@@ -568,7 +568,7 @@ asmlinkage int denormal_operand(void)
}
}
-asmlinkage int arith_overflow(FPU_REG *dest)
+asmlinkage __visible int arith_overflow(FPU_REG *dest)
{
int tag = TAG_Valid;
@@ -596,7 +596,7 @@ asmlinkage int arith_overflow(FPU_REG *dest)
}
-asmlinkage int arith_underflow(FPU_REG *dest)
+asmlinkage __visible int arith_underflow(FPU_REG *dest)
{
int tag = TAG_Valid;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 01495755701..6440221ced0 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -12,13 +12,16 @@
/*
* Calling convention :
- * rdi : skb pointer
+ * rbx : skb pointer (callee saved)
* esi : offset of byte(s) to fetch in skb (can be scratched)
- * r8 : copy of skb->data
+ * r10 : copy of skb->data
* r9d : hlen = skb->len - skb->data_len
*/
-#define SKBDATA %r8
+#define SKBDATA %r10
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
+#define MAX_BPF_STACK (512 /* from filter.h */ + \
+ 32 /* space for rbx,r13,r14,r15 */ + \
+ 8 /* space for skb_copy_bits */)
sk_load_word:
.globl sk_load_word
@@ -68,53 +71,31 @@ sk_load_byte_positive_offset:
movzbl (SKBDATA,%rsi),%eax
ret
-/**
- * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
- *
- * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
- * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value
- */
-sk_load_byte_msh:
- .globl sk_load_byte_msh
- test %esi,%esi
- js bpf_slow_path_byte_msh_neg
-
-sk_load_byte_msh_positive_offset:
- .globl sk_load_byte_msh_positive_offset
- cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
- jle bpf_slow_path_byte_msh
- movzbl (SKBDATA,%rsi),%ebx
- and $15,%bl
- shl $2,%bl
- ret
-
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
- push %rdi; /* save skb */ \
+ mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
push SKBDATA; \
/* rsi already has offset */ \
mov $LEN,%ecx; /* len */ \
- lea -12(%rbp),%rdx; \
+ lea - MAX_BPF_STACK + 32(%rbp),%rdx; \
call skb_copy_bits; \
test %eax,%eax; \
pop SKBDATA; \
- pop %r9; \
- pop %rdi
+ pop %r9;
bpf_slow_path_word:
bpf_slow_path_common(4)
js bpf_error
- mov -12(%rbp),%eax
+ mov - MAX_BPF_STACK + 32(%rbp),%eax
bswap %eax
ret
bpf_slow_path_half:
bpf_slow_path_common(2)
js bpf_error
- mov -12(%rbp),%ax
+ mov - MAX_BPF_STACK + 32(%rbp),%ax
rol $8,%ax
movzwl %ax,%eax
ret
@@ -122,21 +103,11 @@ bpf_slow_path_half:
bpf_slow_path_byte:
bpf_slow_path_common(1)
js bpf_error
- movzbl -12(%rbp),%eax
- ret
-
-bpf_slow_path_byte_msh:
- xchg %eax,%ebx /* dont lose A , X is about to be scratched */
- bpf_slow_path_common(1)
- js bpf_error
- movzbl -12(%rbp),%eax
- and $15,%al
- shl $2,%al
- xchg %eax,%ebx
+ movzbl - MAX_BPF_STACK + 32(%rbp),%eax
ret
#define sk_negative_common(SIZE) \
- push %rdi; /* save skb */ \
+ mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
push SKBDATA; \
/* rsi already has offset */ \
@@ -145,10 +116,8 @@ bpf_slow_path_byte_msh:
test %rax,%rax; \
pop SKBDATA; \
pop %r9; \
- pop %rdi; \
jz bpf_error
-
bpf_slow_path_word_neg:
cmp SKF_MAX_NEG_OFF, %esi /* test range */
jl bpf_error /* offset lower -> error */
@@ -179,22 +148,12 @@ sk_load_byte_negative_offset:
movzbl (%rax), %eax
ret
-bpf_slow_path_byte_msh_neg:
- cmp SKF_MAX_NEG_OFF, %esi
- jl bpf_error
-sk_load_byte_msh_negative_offset:
- .globl sk_load_byte_msh_negative_offset
- xchg %eax,%ebx /* dont lose A , X is about to be scratched */
- sk_negative_common(1)
- movzbl (%rax),%eax
- and $15,%al
- shl $2,%al
- xchg %eax,%ebx
- ret
-
bpf_error:
# force a return 0 from jit handler
- xor %eax,%eax
- mov -8(%rbp),%rbx
+ xor %eax,%eax
+ mov - MAX_BPF_STACK(%rbp),%rbx
+ mov - MAX_BPF_STACK + 8(%rbp),%r13
+ mov - MAX_BPF_STACK + 16(%rbp),%r14
+ mov - MAX_BPF_STACK + 24(%rbp),%r15
leaveq
ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index dc017735bb9..080f3f071bb 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,6 +1,7 @@
/* bpf_jit_comp.c : BPF JIT compiler
*
* Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
+ * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -14,28 +15,16 @@
#include <linux/if_vlan.h>
#include <linux/random.h>
-/*
- * Conventions :
- * EAX : BPF A accumulator
- * EBX : BPF X accumulator
- * RDI : pointer to skb (first argument given to JIT function)
- * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
- * ECX,EDX,ESI : scratch registers
- * r9d : skb->len - skb->data_len (headlen)
- * r8 : skb->data
- * -8(RBP) : saved RBX value
- * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
- */
int bpf_jit_enable __read_mostly;
/*
* assembly code in arch/x86/net/bpf_jit.S
*/
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
-extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_byte_positive_offset[];
extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
-extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
+extern u8 sk_load_byte_negative_offset[];
static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -56,30 +45,44 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
-#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0)
-
-#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
-#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+#define EMIT1_off32(b1, off) \
+ do {EMIT1(b1); EMIT(off, 4); } while (0)
+#define EMIT2_off32(b1, b2, off) \
+ do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
+#define EMIT3_off32(b1, b2, b3, off) \
+ do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+#define EMIT4_off32(b1, b2, b3, b4, off) \
+ do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
static inline bool is_imm8(int value)
{
return value <= 127 && value >= -128;
}
-static inline bool is_near(int offset)
+static inline bool is_simm32(s64 value)
{
- return offset <= 127 && offset >= -128;
+ return value == (s64) (s32) value;
}
-#define EMIT_JMP(offset) \
-do { \
- if (offset) { \
- if (is_near(offset)) \
- EMIT2(0xeb, offset); /* jmp .+off8 */ \
- else \
- EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \
- } \
-} while (0)
+/* mov A, X */
+#define EMIT_mov(A, X) \
+ do {if (A != X) \
+ EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \
+ } while (0)
+
+static int bpf_size_to_x86_bytes(int bpf_size)
+{
+ if (bpf_size == BPF_W)
+ return 4;
+ else if (bpf_size == BPF_H)
+ return 2;
+ else if (bpf_size == BPF_B)
+ return 1;
+ else if (bpf_size == BPF_DW)
+ return 4; /* imm32 */
+ else
+ return 0;
+}
/* list of x86 cond jumps opcodes (. + s8)
* Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
@@ -90,27 +93,8 @@ do { \
#define X86_JNE 0x75
#define X86_JBE 0x76
#define X86_JA 0x77
-
-#define EMIT_COND_JMP(op, offset) \
-do { \
- if (is_near(offset)) \
- EMIT2(op, offset); /* jxx .+off8 */ \
- else { \
- EMIT2(0x0f, op + 0x10); \
- EMIT(offset, 4); /* jxx .+off32 */ \
- } \
-} while (0)
-
-#define COND_SEL(CODE, TOP, FOP) \
- case CODE: \
- t_op = TOP; \
- f_op = FOP; \
- goto cond_branch
-
-
-#define SEEN_DATAREF 1 /* might call external helpers */
-#define SEEN_XREG 2 /* ebx is used */
-#define SEEN_MEM 4 /* use mem[] for temporary storage */
+#define X86_JGE 0x7D
+#define X86_JG 0x7F
static inline void bpf_flush_icache(void *start, void *end)
{
@@ -125,26 +109,6 @@ static inline void bpf_flush_icache(void *start, void *end)
#define CHOOSE_LOAD_FUNC(K, func) \
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-/* Helper to find the offset of pkt_type in sk_buff
- * We want to make sure its still a 3bit field starting at a byte boundary.
- */
-#define PKT_TYPE_MAX 7
-static int pkt_type_offset(void)
-{
- struct sk_buff skb_probe = {
- .pkt_type = ~0,
- };
- char *ct = (char *)&skb_probe;
- unsigned int off;
-
- for (off = 0; off < sizeof(struct sk_buff); off++) {
- if (ct[off] == PKT_TYPE_MAX)
- return off;
- }
- pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
- return -1;
-}
-
struct bpf_binary_header {
unsigned int pages;
/* Note : for security reasons, bpf code will follow a randomly
@@ -171,590 +135,778 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
memset(header, 0xcc, sz); /* fill whole space with int3 instructions */
header->pages = sz / PAGE_SIZE;
- hole = sz - (proglen + sizeof(*header));
+ hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header));
/* insert a random number of int3 instructions before BPF code */
*image_ptr = &header->image[prandom_u32() % hole];
return header;
}
-void bpf_jit_compile(struct sk_filter *fp)
+/* pick a register outside of BPF range for JIT internal work */
+#define AUX_REG (MAX_BPF_REG + 1)
+
+/* the following table maps BPF registers to x64 registers.
+ * x64 register r12 is unused, since if used as base address register
+ * in load/store instructions, it always needs an extra byte of encoding
+ */
+static const int reg2hex[] = {
+ [BPF_REG_0] = 0, /* rax */
+ [BPF_REG_1] = 7, /* rdi */
+ [BPF_REG_2] = 6, /* rsi */
+ [BPF_REG_3] = 2, /* rdx */
+ [BPF_REG_4] = 1, /* rcx */
+ [BPF_REG_5] = 0, /* r8 */
+ [BPF_REG_6] = 3, /* rbx callee saved */
+ [BPF_REG_7] = 5, /* r13 callee saved */
+ [BPF_REG_8] = 6, /* r14 callee saved */
+ [BPF_REG_9] = 7, /* r15 callee saved */
+ [BPF_REG_FP] = 5, /* rbp readonly */
+ [AUX_REG] = 3, /* r11 temp register */
+};
+
+/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
+ * which need extra byte of encoding.
+ * rax,rcx,...,rbp have simpler encoding
+ */
+static inline bool is_ereg(u32 reg)
{
- u8 temp[64];
- u8 *prog;
- unsigned int proglen, oldproglen = 0;
- int ilen, i;
- int t_offset, f_offset;
- u8 t_op, f_op, seen = 0, pass;
- u8 *image = NULL;
- struct bpf_binary_header *header = NULL;
- u8 *func;
- int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
- unsigned int cleanup_addr; /* epilogue code offset */
- unsigned int *addrs;
- const struct sock_filter *filter = fp->insns;
- int flen = fp->len;
+ if (reg == BPF_REG_5 || reg == AUX_REG ||
+ (reg >= BPF_REG_7 && reg <= BPF_REG_9))
+ return true;
+ else
+ return false;
+}
- if (!bpf_jit_enable)
- return;
+/* add modifiers if 'reg' maps to x64 registers r8..r15 */
+static inline u8 add_1mod(u8 byte, u32 reg)
+{
+ if (is_ereg(reg))
+ byte |= 1;
+ return byte;
+}
- addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
- if (addrs == NULL)
- return;
+static inline u8 add_2mod(u8 byte, u32 r1, u32 r2)
+{
+ if (is_ereg(r1))
+ byte |= 1;
+ if (is_ereg(r2))
+ byte |= 4;
+ return byte;
+}
- /* Before first pass, make a rough estimation of addrs[]
- * each bpf instruction is translated to less than 64 bytes
+/* encode dest register 'a_reg' into x64 opcode 'byte' */
+static inline u8 add_1reg(u8 byte, u32 a_reg)
+{
+ return byte + reg2hex[a_reg];
+}
+
+/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */
+static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg)
+{
+ return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3);
+}
+
+struct jit_context {
+ unsigned int cleanup_addr; /* epilogue code offset */
+ bool seen_ld_abs;
+};
+
+static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
+ int oldproglen, struct jit_context *ctx)
+{
+ struct sock_filter_int *insn = bpf_prog->insnsi;
+ int insn_cnt = bpf_prog->len;
+ u8 temp[64];
+ int i;
+ int proglen = 0;
+ u8 *prog = temp;
+ int stacksize = MAX_BPF_STACK +
+ 32 /* space for rbx, r13, r14, r15 */ +
+ 8 /* space for skb_copy_bits() buffer */;
+
+ EMIT1(0x55); /* push rbp */
+ EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
+
+ /* sub rsp, stacksize */
+ EMIT3_off32(0x48, 0x81, 0xEC, stacksize);
+
+ /* all classic BPF filters use R6(rbx) save it */
+
+ /* mov qword ptr [rbp-X],rbx */
+ EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
+
+ /* sk_convert_filter() maps classic BPF register X to R7 and uses R8
+ * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
+ * R8(r14). R9(r15) spill could be made conditional, but there is only
+ * one 'bpf_error' return path out of helper functions inside bpf_jit.S
+ * The overhead of extra spill is negligible for any filter other
+ * than synthetic ones. Therefore not worth adding complexity.
*/
- for (proglen = 0, i = 0; i < flen; i++) {
- proglen += 64;
- addrs[i] = proglen;
+
+ /* mov qword ptr [rbp-X],r13 */
+ EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8);
+ /* mov qword ptr [rbp-X],r14 */
+ EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16);
+ /* mov qword ptr [rbp-X],r15 */
+ EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24);
+
+ /* clear A and X registers */
+ EMIT2(0x31, 0xc0); /* xor eax, eax */
+ EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */
+
+ if (ctx->seen_ld_abs) {
+ /* r9d : skb->len - skb->data_len (headlen)
+ * r10 : skb->data
+ */
+ if (is_imm8(offsetof(struct sk_buff, len)))
+ /* mov %r9d, off8(%rdi) */
+ EMIT4(0x44, 0x8b, 0x4f,
+ offsetof(struct sk_buff, len));
+ else
+ /* mov %r9d, off32(%rdi) */
+ EMIT3_off32(0x44, 0x8b, 0x8f,
+ offsetof(struct sk_buff, len));
+
+ if (is_imm8(offsetof(struct sk_buff, data_len)))
+ /* sub %r9d, off8(%rdi) */
+ EMIT4(0x44, 0x2b, 0x4f,
+ offsetof(struct sk_buff, data_len));
+ else
+ EMIT3_off32(0x44, 0x2b, 0x8f,
+ offsetof(struct sk_buff, data_len));
+
+ if (is_imm8(offsetof(struct sk_buff, data)))
+ /* mov %r10, off8(%rdi) */
+ EMIT4(0x4c, 0x8b, 0x57,
+ offsetof(struct sk_buff, data));
+ else
+ /* mov %r10, off32(%rdi) */
+ EMIT3_off32(0x4c, 0x8b, 0x97,
+ offsetof(struct sk_buff, data));
}
- cleanup_addr = proglen; /* epilogue address */
- for (pass = 0; pass < 10; pass++) {
- u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
- /* no prologue/epilogue for trivial filters (RET something) */
- proglen = 0;
- prog = temp;
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ const s32 K = insn->imm;
+ u32 a_reg = insn->a_reg;
+ u32 x_reg = insn->x_reg;
+ u8 b1 = 0, b2 = 0, b3 = 0;
+ s64 jmp_offset;
+ u8 jmp_cond;
+ int ilen;
+ u8 *func;
+
+ switch (insn->code) {
+ /* ALU */
+ case BPF_ALU | BPF_ADD | BPF_X:
+ case BPF_ALU | BPF_SUB | BPF_X:
+ case BPF_ALU | BPF_AND | BPF_X:
+ case BPF_ALU | BPF_OR | BPF_X:
+ case BPF_ALU | BPF_XOR | BPF_X:
+ case BPF_ALU64 | BPF_ADD | BPF_X:
+ case BPF_ALU64 | BPF_SUB | BPF_X:
+ case BPF_ALU64 | BPF_AND | BPF_X:
+ case BPF_ALU64 | BPF_OR | BPF_X:
+ case BPF_ALU64 | BPF_XOR | BPF_X:
+ switch (BPF_OP(insn->code)) {
+ case BPF_ADD: b2 = 0x01; break;
+ case BPF_SUB: b2 = 0x29; break;
+ case BPF_AND: b2 = 0x21; break;
+ case BPF_OR: b2 = 0x09; break;
+ case BPF_XOR: b2 = 0x31; break;
+ }
+ if (BPF_CLASS(insn->code) == BPF_ALU64)
+ EMIT1(add_2mod(0x48, a_reg, x_reg));
+ else if (is_ereg(a_reg) || is_ereg(x_reg))
+ EMIT1(add_2mod(0x40, a_reg, x_reg));
+ EMIT2(b2, add_2reg(0xC0, a_reg, x_reg));
+ break;
- if (seen_or_pass0) {
- EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
- EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
- /* note : must save %rbx in case bpf_error is hit */
- if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
- EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
- if (seen_or_pass0 & SEEN_XREG)
- CLEAR_X(); /* make sure we dont leek kernel memory */
-
- /*
- * If this filter needs to access skb data,
- * loads r9 and r8 with :
- * r9 = skb->len - skb->data_len
- * r8 = skb->data
+ /* mov A, X */
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ EMIT_mov(a_reg, x_reg);
+ break;
+
+ /* mov32 A, X */
+ case BPF_ALU | BPF_MOV | BPF_X:
+ if (is_ereg(a_reg) || is_ereg(x_reg))
+ EMIT1(add_2mod(0x40, a_reg, x_reg));
+ EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg));
+ break;
+
+ /* neg A */
+ case BPF_ALU | BPF_NEG:
+ case BPF_ALU64 | BPF_NEG:
+ if (BPF_CLASS(insn->code) == BPF_ALU64)
+ EMIT1(add_1mod(0x48, a_reg));
+ else if (is_ereg(a_reg))
+ EMIT1(add_1mod(0x40, a_reg));
+ EMIT2(0xF7, add_1reg(0xD8, a_reg));
+ break;
+
+ case BPF_ALU | BPF_ADD | BPF_K:
+ case BPF_ALU | BPF_SUB | BPF_K:
+ case BPF_ALU | BPF_AND | BPF_K:
+ case BPF_ALU | BPF_OR | BPF_K:
+ case BPF_ALU | BPF_XOR | BPF_K:
+ case BPF_ALU64 | BPF_ADD | BPF_K:
+ case BPF_ALU64 | BPF_SUB | BPF_K:
+ case BPF_ALU64 | BPF_AND | BPF_K:
+ case BPF_ALU64 | BPF_OR | BPF_K:
+ case BPF_ALU64 | BPF_XOR | BPF_K:
+ if (BPF_CLASS(insn->code) == BPF_ALU64)
+ EMIT1(add_1mod(0x48, a_reg));
+ else if (is_ereg(a_reg))
+ EMIT1(add_1mod(0x40, a_reg));
+
+ switch (BPF_OP(insn->code)) {
+ case BPF_ADD: b3 = 0xC0; break;
+ case BPF_SUB: b3 = 0xE8; break;
+ case BPF_AND: b3 = 0xE0; break;
+ case BPF_OR: b3 = 0xC8; break;
+ case BPF_XOR: b3 = 0xF0; break;
+ }
+
+ if (is_imm8(K))
+ EMIT3(0x83, add_1reg(b3, a_reg), K);
+ else
+ EMIT2_off32(0x81, add_1reg(b3, a_reg), K);
+ break;
+
+ case BPF_ALU64 | BPF_MOV | BPF_K:
+ /* optimization: if imm32 is positive,
+ * use 'mov eax, imm32' (which zero-extends imm32)
+ * to save 2 bytes
*/
- if (seen_or_pass0 & SEEN_DATAREF) {
- if (offsetof(struct sk_buff, len) <= 127)
- /* mov off8(%rdi),%r9d */
- EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
- else {
- /* mov off32(%rdi),%r9d */
- EMIT3(0x44, 0x8b, 0x8f);
- EMIT(offsetof(struct sk_buff, len), 4);
- }
- if (is_imm8(offsetof(struct sk_buff, data_len)))
- /* sub off8(%rdi),%r9d */
- EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
- else {
- EMIT3(0x44, 0x2b, 0x8f);
- EMIT(offsetof(struct sk_buff, data_len), 4);
- }
+ if (K < 0) {
+ /* 'mov rax, imm32' sign extends imm32 */
+ b1 = add_1mod(0x48, a_reg);
+ b2 = 0xC7;
+ b3 = 0xC0;
+ EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K);
+ break;
+ }
- if (is_imm8(offsetof(struct sk_buff, data)))
- /* mov off8(%rdi),%r8 */
- EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
- else {
- /* mov off32(%rdi),%r8 */
- EMIT3(0x4c, 0x8b, 0x87);
- EMIT(offsetof(struct sk_buff, data), 4);
- }
+ case BPF_ALU | BPF_MOV | BPF_K:
+ /* mov %eax, imm32 */
+ if (is_ereg(a_reg))
+ EMIT1(add_1mod(0x40, a_reg));
+ EMIT1_off32(add_1reg(0xB8, a_reg), K);
+ break;
+
+ /* A %= X, A /= X, A %= K, A /= K */
+ case BPF_ALU | BPF_MOD | BPF_X:
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ EMIT1(0x50); /* push rax */
+ EMIT1(0x52); /* push rdx */
+
+ if (BPF_SRC(insn->code) == BPF_X)
+ /* mov r11, X */
+ EMIT_mov(AUX_REG, x_reg);
+ else
+ /* mov r11, K */
+ EMIT3_off32(0x49, 0xC7, 0xC3, K);
+
+ /* mov rax, A */
+ EMIT_mov(BPF_REG_0, a_reg);
+
+ /* xor edx, edx
+ * equivalent to 'xor rdx, rdx', but one byte less
+ */
+ EMIT2(0x31, 0xd2);
+
+ if (BPF_SRC(insn->code) == BPF_X) {
+ /* if (X == 0) return 0 */
+
+ /* cmp r11, 0 */
+ EMIT4(0x49, 0x83, 0xFB, 0x00);
+
+ /* jne .+9 (skip over pop, pop, xor and jmp) */
+ EMIT2(X86_JNE, 1 + 1 + 2 + 5);
+ EMIT1(0x5A); /* pop rdx */
+ EMIT1(0x58); /* pop rax */
+ EMIT2(0x31, 0xc0); /* xor eax, eax */
+
+ /* jmp cleanup_addr
+ * addrs[i] - 11, because there are 11 bytes
+ * after this insn: div, mov, pop, pop, mov
+ */
+ jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
+ EMIT1_off32(0xE9, jmp_offset);
}
- }
- switch (filter[0].code) {
- case BPF_S_RET_K:
- case BPF_S_LD_W_LEN:
- case BPF_S_ANC_PROTOCOL:
- case BPF_S_ANC_IFINDEX:
- case BPF_S_ANC_MARK:
- case BPF_S_ANC_RXHASH:
- case BPF_S_ANC_CPU:
- case BPF_S_ANC_VLAN_TAG:
- case BPF_S_ANC_VLAN_TAG_PRESENT:
- case BPF_S_ANC_QUEUE:
- case BPF_S_ANC_PKTTYPE:
- case BPF_S_LD_W_ABS:
- case BPF_S_LD_H_ABS:
- case BPF_S_LD_B_ABS:
- /* first instruction sets A register (or is RET 'constant') */
+ if (BPF_CLASS(insn->code) == BPF_ALU64)
+ /* div r11 */
+ EMIT3(0x49, 0xF7, 0xF3);
+ else
+ /* div r11d */
+ EMIT3(0x41, 0xF7, 0xF3);
+
+ if (BPF_OP(insn->code) == BPF_MOD)
+ /* mov r11, rdx */
+ EMIT3(0x49, 0x89, 0xD3);
+ else
+ /* mov r11, rax */
+ EMIT3(0x49, 0x89, 0xC3);
+
+ EMIT1(0x5A); /* pop rdx */
+ EMIT1(0x58); /* pop rax */
+
+ /* mov A, r11 */
+ EMIT_mov(a_reg, AUX_REG);
break;
- default:
- /* make sure we dont leak kernel information to user */
- CLEAR_A(); /* A = 0 */
- }
- for (i = 0; i < flen; i++) {
- unsigned int K = filter[i].k;
+ case BPF_ALU | BPF_MUL | BPF_K:
+ case BPF_ALU | BPF_MUL | BPF_X:
+ case BPF_ALU64 | BPF_MUL | BPF_K:
+ case BPF_ALU64 | BPF_MUL | BPF_X:
+ EMIT1(0x50); /* push rax */
+ EMIT1(0x52); /* push rdx */
+
+ /* mov r11, A */
+ EMIT_mov(AUX_REG, a_reg);
+
+ if (BPF_SRC(insn->code) == BPF_X)
+ /* mov rax, X */
+ EMIT_mov(BPF_REG_0, x_reg);
+ else
+ /* mov rax, K */
+ EMIT3_off32(0x48, 0xC7, 0xC0, K);
+
+ if (BPF_CLASS(insn->code) == BPF_ALU64)
+ EMIT1(add_1mod(0x48, AUX_REG));
+ else if (is_ereg(AUX_REG))
+ EMIT1(add_1mod(0x40, AUX_REG));
+ /* mul(q) r11 */
+ EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
+
+ /* mov r11, rax */
+ EMIT_mov(AUX_REG, BPF_REG_0);
+
+ EMIT1(0x5A); /* pop rdx */
+ EMIT1(0x58); /* pop rax */
+
+ /* mov A, r11 */
+ EMIT_mov(a_reg, AUX_REG);
+ break;
- switch (filter[i].code) {
- case BPF_S_ALU_ADD_X: /* A += X; */
- seen |= SEEN_XREG;
- EMIT2(0x01, 0xd8); /* add %ebx,%eax */
- break;
- case BPF_S_ALU_ADD_K: /* A += K; */
- if (!K)
- break;
- if (is_imm8(K))
- EMIT3(0x83, 0xc0, K); /* add imm8,%eax */
- else
- EMIT1_off32(0x05, K); /* add imm32,%eax */
- break;
- case BPF_S_ALU_SUB_X: /* A -= X; */
- seen |= SEEN_XREG;
- EMIT2(0x29, 0xd8); /* sub %ebx,%eax */
- break;
- case BPF_S_ALU_SUB_K: /* A -= K */
- if (!K)
- break;
- if (is_imm8(K))
- EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
- else
- EMIT1_off32(0x2d, K); /* sub imm32,%eax */
- break;
- case BPF_S_ALU_MUL_X: /* A *= X; */
- seen |= SEEN_XREG;
- EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */
- break;
- case BPF_S_ALU_MUL_K: /* A *= K */
- if (is_imm8(K))
- EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
- else {
- EMIT2(0x69, 0xc0); /* imul imm32,%eax */
- EMIT(K, 4);
- }
- break;
- case BPF_S_ALU_DIV_X: /* A /= X; */
- seen |= SEEN_XREG;
- EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
- if (pc_ret0 > 0) {
- /* addrs[pc_ret0 - 1] is start address of target
- * (addrs[i] - 4) is the address following this jmp
- * ("xor %edx,%edx; div %ebx" being 4 bytes long)
- */
- EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
- (addrs[i] - 4));
- } else {
- EMIT_COND_JMP(X86_JNE, 2 + 5);
- CLEAR_A();
- EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
- }
- EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
- break;
- case BPF_S_ALU_MOD_X: /* A %= X; */
- seen |= SEEN_XREG;
- EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
- if (pc_ret0 > 0) {
- /* addrs[pc_ret0 - 1] is start address of target
- * (addrs[i] - 6) is the address following this jmp
- * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
- */
- EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
- (addrs[i] - 6));
- } else {
- EMIT_COND_JMP(X86_JNE, 2 + 5);
- CLEAR_A();
- EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
- }
- EMIT2(0x31, 0xd2); /* xor %edx,%edx */
- EMIT2(0xf7, 0xf3); /* div %ebx */
- EMIT2(0x89, 0xd0); /* mov %edx,%eax */
- break;
- case BPF_S_ALU_MOD_K: /* A %= K; */
- if (K == 1) {
- CLEAR_A();
- break;
- }
- EMIT2(0x31, 0xd2); /* xor %edx,%edx */
- EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
- EMIT2(0xf7, 0xf1); /* div %ecx */
- EMIT2(0x89, 0xd0); /* mov %edx,%eax */
- break;
- case BPF_S_ALU_DIV_K: /* A /= K */
- if (K == 1)
- break;
- EMIT2(0x31, 0xd2); /* xor %edx,%edx */
- EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */
- EMIT2(0xf7, 0xf1); /* div %ecx */
- break;
- case BPF_S_ALU_AND_X:
- seen |= SEEN_XREG;
- EMIT2(0x21, 0xd8); /* and %ebx,%eax */
- break;
- case BPF_S_ALU_AND_K:
- if (K >= 0xFFFFFF00) {
- EMIT2(0x24, K & 0xFF); /* and imm8,%al */
- } else if (K >= 0xFFFF0000) {
- EMIT2(0x66, 0x25); /* and imm16,%ax */
- EMIT(K, 2);
- } else {
- EMIT1_off32(0x25, K); /* and imm32,%eax */
- }
- break;
- case BPF_S_ALU_OR_X:
- seen |= SEEN_XREG;
- EMIT2(0x09, 0xd8); /* or %ebx,%eax */
- break;
- case BPF_S_ALU_OR_K:
- if (is_imm8(K))
- EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
- else
- EMIT1_off32(0x0d, K); /* or imm32,%eax */
- break;
- case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
- case BPF_S_ALU_XOR_X:
- seen |= SEEN_XREG;
- EMIT2(0x31, 0xd8); /* xor %ebx,%eax */
- break;
- case BPF_S_ALU_XOR_K: /* A ^= K; */
- if (K == 0)
- break;
- if (is_imm8(K))
- EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */
- else
- EMIT1_off32(0x35, K); /* xor imm32,%eax */
- break;
- case BPF_S_ALU_LSH_X: /* A <<= X; */
- seen |= SEEN_XREG;
- EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */
- break;
- case BPF_S_ALU_LSH_K:
- if (K == 0)
- break;
- else if (K == 1)
- EMIT2(0xd1, 0xe0); /* shl %eax */
- else
- EMIT3(0xc1, 0xe0, K);
- break;
- case BPF_S_ALU_RSH_X: /* A >>= X; */
- seen |= SEEN_XREG;
- EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */
- break;
- case BPF_S_ALU_RSH_K: /* A >>= K; */
- if (K == 0)
- break;
- else if (K == 1)
- EMIT2(0xd1, 0xe8); /* shr %eax */
- else
- EMIT3(0xc1, 0xe8, K);
- break;
- case BPF_S_ALU_NEG:
- EMIT2(0xf7, 0xd8); /* neg %eax */
- break;
- case BPF_S_RET_K:
- if (!K) {
- if (pc_ret0 == -1)
- pc_ret0 = i;
- CLEAR_A();
- } else {
- EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
- }
- /* fallinto */
- case BPF_S_RET_A:
- if (seen_or_pass0) {
- if (i != flen - 1) {
- EMIT_JMP(cleanup_addr - addrs[i]);
- break;
- }
- if (seen_or_pass0 & SEEN_XREG)
- EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */
- EMIT1(0xc9); /* leaveq */
- }
- EMIT1(0xc3); /* ret */
- break;
- case BPF_S_MISC_TAX: /* X = A */
- seen |= SEEN_XREG;
- EMIT2(0x89, 0xc3); /* mov %eax,%ebx */
- break;
- case BPF_S_MISC_TXA: /* A = X */
- seen |= SEEN_XREG;
- EMIT2(0x89, 0xd8); /* mov %ebx,%eax */
- break;
- case BPF_S_LD_IMM: /* A = K */
- if (!K)
- CLEAR_A();
- else
- EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
- break;
- case BPF_S_LDX_IMM: /* X = K */
- seen |= SEEN_XREG;
- if (!K)
- CLEAR_X();
+ /* shifts */
+ case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_ARSH | BPF_K:
+ case BPF_ALU64 | BPF_LSH | BPF_K:
+ case BPF_ALU64 | BPF_RSH | BPF_K:
+ case BPF_ALU64 | BPF_ARSH | BPF_K:
+ if (BPF_CLASS(insn->code) == BPF_ALU64)
+ EMIT1(add_1mod(0x48, a_reg));
+ else if (is_ereg(a_reg))
+ EMIT1(add_1mod(0x40, a_reg));
+
+ switch (BPF_OP(insn->code)) {
+ case BPF_LSH: b3 = 0xE0; break;
+ case BPF_RSH: b3 = 0xE8; break;
+ case BPF_ARSH: b3 = 0xF8; break;
+ }
+ EMIT3(0xC1, add_1reg(b3, a_reg), K);
+ break;
+
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ switch (K) {
+ case 16:
+ /* emit 'ror %ax, 8' to swap lower 2 bytes */
+ EMIT1(0x66);
+ if (is_ereg(a_reg))
+ EMIT1(0x41);
+ EMIT3(0xC1, add_1reg(0xC8, a_reg), 8);
+ break;
+ case 32:
+ /* emit 'bswap eax' to swap lower 4 bytes */
+ if (is_ereg(a_reg))
+ EMIT2(0x41, 0x0F);
else
- EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
- break;
- case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
- seen |= SEEN_MEM;
- EMIT3(0x8b, 0x45, 0xf0 - K*4);
- break;
- case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
- seen |= SEEN_XREG | SEEN_MEM;
- EMIT3(0x8b, 0x5d, 0xf0 - K*4);
- break;
- case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
- seen |= SEEN_MEM;
- EMIT3(0x89, 0x45, 0xf0 - K*4);
- break;
- case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
- seen |= SEEN_XREG | SEEN_MEM;
- EMIT3(0x89, 0x5d, 0xf0 - K*4);
- break;
- case BPF_S_LD_W_LEN: /* A = skb->len; */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
- if (is_imm8(offsetof(struct sk_buff, len)))
- /* mov off8(%rdi),%eax */
- EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
- else {
- EMIT2(0x8b, 0x87);
- EMIT(offsetof(struct sk_buff, len), 4);
- }
- break;
- case BPF_S_LDX_W_LEN: /* X = skb->len; */
- seen |= SEEN_XREG;
- if (is_imm8(offsetof(struct sk_buff, len)))
- /* mov off8(%rdi),%ebx */
- EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
- else {
- EMIT2(0x8b, 0x9f);
- EMIT(offsetof(struct sk_buff, len), 4);
- }
- break;
- case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
- if (is_imm8(offsetof(struct sk_buff, protocol))) {
- /* movzwl off8(%rdi),%eax */
- EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
- } else {
- EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
- EMIT(offsetof(struct sk_buff, protocol), 4);
- }
- EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */
- break;
- case BPF_S_ANC_IFINDEX:
- if (is_imm8(offsetof(struct sk_buff, dev))) {
- /* movq off8(%rdi),%rax */
- EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
- } else {
- EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
- EMIT(offsetof(struct sk_buff, dev), 4);
- }
- EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */
- EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
- EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */
- EMIT(offsetof(struct net_device, ifindex), 4);
+ EMIT1(0x0F);
+ EMIT1(add_1reg(0xC8, a_reg));
break;
- case BPF_S_ANC_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
- if (is_imm8(offsetof(struct sk_buff, mark))) {
- /* mov off8(%rdi),%eax */
- EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
- } else {
- EMIT2(0x8b, 0x87);
- EMIT(offsetof(struct sk_buff, mark), 4);
- }
- break;
- case BPF_S_ANC_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- if (is_imm8(offsetof(struct sk_buff, hash))) {
- /* mov off8(%rdi),%eax */
- EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash));
- } else {
- EMIT2(0x8b, 0x87);
- EMIT(offsetof(struct sk_buff, hash), 4);
- }
- break;
- case BPF_S_ANC_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
- if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
- /* movzwl off8(%rdi),%eax */
- EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
- } else {
- EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
- EMIT(offsetof(struct sk_buff, queue_mapping), 4);
- }
- break;
- case BPF_S_ANC_CPU:
-#ifdef CONFIG_SMP
- EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
- EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
-#else
- CLEAR_A();
-#endif
- break;
- case BPF_S_ANC_VLAN_TAG:
- case BPF_S_ANC_VLAN_TAG_PRESENT:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- if (is_imm8(offsetof(struct sk_buff, vlan_tci))) {
- /* movzwl off8(%rdi),%eax */
- EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci));
- } else {
- EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
- EMIT(offsetof(struct sk_buff, vlan_tci), 4);
- }
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
- if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
- EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */
- } else {
- EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */
- EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */
- }
- break;
- case BPF_S_ANC_PKTTYPE:
- {
- int off = pkt_type_offset();
-
- if (off < 0)
- goto out;
- if (is_imm8(off)) {
- /* movzbl off8(%rdi),%eax */
- EMIT4(0x0f, 0xb6, 0x47, off);
- } else {
- /* movbl off32(%rdi),%eax */
- EMIT3(0x0f, 0xb6, 0x87);
- EMIT(off, 4);
- }
- EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */
+ case 64:
+ /* emit 'bswap rax' to swap 8 bytes */
+ EMIT3(add_1mod(0x48, a_reg), 0x0F,
+ add_1reg(0xC8, a_reg));
break;
}
- case BPF_S_LD_W_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_word);
-common_load: seen |= SEEN_DATAREF;
- t_offset = func - (image + addrs[i]);
- EMIT1_off32(0xbe, K); /* mov imm32,%esi */
- EMIT1_off32(0xe8, t_offset); /* call */
- break;
- case BPF_S_LD_H_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_half);
- goto common_load;
- case BPF_S_LD_B_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
- goto common_load;
- case BPF_S_LDX_B_MSH:
- func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
- seen |= SEEN_DATAREF | SEEN_XREG;
- t_offset = func - (image + addrs[i]);
- EMIT1_off32(0xbe, K); /* mov imm32,%esi */
- EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
- break;
- case BPF_S_LD_W_IND:
- func = sk_load_word;
-common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
- t_offset = func - (image + addrs[i]);
- if (K) {
- if (is_imm8(K)) {
- EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
- } else {
- EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
- EMIT(K, 4);
- }
- } else {
- EMIT2(0x89,0xde); /* mov %ebx,%esi */
- }
- EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
- break;
- case BPF_S_LD_H_IND:
- func = sk_load_half;
- goto common_load_ind;
- case BPF_S_LD_B_IND:
- func = sk_load_byte;
- goto common_load_ind;
- case BPF_S_JMP_JA:
- t_offset = addrs[i + K] - addrs[i];
- EMIT_JMP(t_offset);
- break;
- COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
- COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
- COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
- COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
- COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
- COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
- COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
- COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
-
-cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
- t_offset = addrs[i + filter[i].jt] - addrs[i];
-
- /* same targets, can avoid doing the test :) */
- if (filter[i].jt == filter[i].jf) {
- EMIT_JMP(t_offset);
- break;
- }
+ break;
+
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ break;
- switch (filter[i].code) {
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JEQ_X:
- seen |= SEEN_XREG;
- EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
- break;
- case BPF_S_JMP_JSET_X:
- seen |= SEEN_XREG;
- EMIT2(0x85, 0xd8); /* test %ebx,%eax */
- break;
- case BPF_S_JMP_JEQ_K:
- if (K == 0) {
- EMIT2(0x85, 0xc0); /* test %eax,%eax */
- break;
- }
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGE_K:
- if (K <= 127)
- EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
+ /* ST: *(u8*)(a_reg + off) = imm */
+ case BPF_ST | BPF_MEM | BPF_B:
+ if (is_ereg(a_reg))
+ EMIT2(0x41, 0xC6);
+ else
+ EMIT1(0xC6);
+ goto st;
+ case BPF_ST | BPF_MEM | BPF_H:
+ if (is_ereg(a_reg))
+ EMIT3(0x66, 0x41, 0xC7);
+ else
+ EMIT2(0x66, 0xC7);
+ goto st;
+ case BPF_ST | BPF_MEM | BPF_W:
+ if (is_ereg(a_reg))
+ EMIT2(0x41, 0xC7);
+ else
+ EMIT1(0xC7);
+ goto st;
+ case BPF_ST | BPF_MEM | BPF_DW:
+ EMIT2(add_1mod(0x48, a_reg), 0xC7);
+
+st: if (is_imm8(insn->off))
+ EMIT2(add_1reg(0x40, a_reg), insn->off);
+ else
+ EMIT1_off32(add_1reg(0x80, a_reg), insn->off);
+
+ EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
+ break;
+
+ /* STX: *(u8*)(a_reg + off) = x_reg */
+ case BPF_STX | BPF_MEM | BPF_B:
+ /* emit 'mov byte ptr [rax + off], al' */
+ if (is_ereg(a_reg) || is_ereg(x_reg) ||
+ /* have to add extra byte for x86 SIL, DIL regs */
+ x_reg == BPF_REG_1 || x_reg == BPF_REG_2)
+ EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88);
+ else
+ EMIT1(0x88);
+ goto stx;
+ case BPF_STX | BPF_MEM | BPF_H:
+ if (is_ereg(a_reg) || is_ereg(x_reg))
+ EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89);
+ else
+ EMIT2(0x66, 0x89);
+ goto stx;
+ case BPF_STX | BPF_MEM | BPF_W:
+ if (is_ereg(a_reg) || is_ereg(x_reg))
+ EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89);
+ else
+ EMIT1(0x89);
+ goto stx;
+ case BPF_STX | BPF_MEM | BPF_DW:
+ EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89);
+stx: if (is_imm8(insn->off))
+ EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+ else
+ EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+ insn->off);
+ break;
+
+ /* LDX: a_reg = *(u8*)(x_reg + off) */
+ case BPF_LDX | BPF_MEM | BPF_B:
+ /* emit 'movzx rax, byte ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6);
+ goto ldx;
+ case BPF_LDX | BPF_MEM | BPF_H:
+ /* emit 'movzx rax, word ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7);
+ goto ldx;
+ case BPF_LDX | BPF_MEM | BPF_W:
+ /* emit 'mov eax, dword ptr [rax+0x14]' */
+ if (is_ereg(a_reg) || is_ereg(x_reg))
+ EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B);
+ else
+ EMIT1(0x8B);
+ goto ldx;
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ /* emit 'mov rax, qword ptr [rax+0x14]' */
+ EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B);
+ldx: /* if insn->off == 0 we can save one extra byte, but
+ * special case of x86 r13 which always needs an offset
+ * is not worth the hassle
+ */
+ if (is_imm8(insn->off))
+ EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off);
+ else
+ EMIT1_off32(add_2reg(0x80, x_reg, a_reg),
+ insn->off);
+ break;
+
+ /* STX XADD: lock *(u32*)(a_reg + off) += x_reg */
+ case BPF_STX | BPF_XADD | BPF_W:
+ /* emit 'lock add dword ptr [rax + off], eax' */
+ if (is_ereg(a_reg) || is_ereg(x_reg))
+ EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01);
+ else
+ EMIT2(0xF0, 0x01);
+ goto xadd;
+ case BPF_STX | BPF_XADD | BPF_DW:
+ EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01);
+xadd: if (is_imm8(insn->off))
+ EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+ else
+ EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+ insn->off);
+ break;
+
+ /* call */
+ case BPF_JMP | BPF_CALL:
+ func = (u8 *) __bpf_call_base + K;
+ jmp_offset = func - (image + addrs[i]);
+ if (ctx->seen_ld_abs) {
+ EMIT2(0x41, 0x52); /* push %r10 */
+ EMIT2(0x41, 0x51); /* push %r9 */
+ /* need to adjust jmp offset, since
+ * pop %r9, pop %r10 take 4 bytes after call insn
+ */
+ jmp_offset += 4;
+ }
+ if (!K || !is_simm32(jmp_offset)) {
+ pr_err("unsupported bpf func %d addr %p image %p\n",
+ K, func, image);
+ return -EINVAL;
+ }
+ EMIT1_off32(0xE8, jmp_offset);
+ if (ctx->seen_ld_abs) {
+ EMIT2(0x41, 0x59); /* pop %r9 */
+ EMIT2(0x41, 0x5A); /* pop %r10 */
+ }
+ break;
+
+ /* cond jump */
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ /* cmp a_reg, x_reg */
+ EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39,
+ add_2reg(0xC0, a_reg, x_reg));
+ goto emit_cond_jmp;
+
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* test a_reg, x_reg */
+ EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85,
+ add_2reg(0xC0, a_reg, x_reg));
+ goto emit_cond_jmp;
+
+ case BPF_JMP | BPF_JSET | BPF_K:
+ /* test a_reg, imm32 */
+ EMIT1(add_1mod(0x48, a_reg));
+ EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K);
+ goto emit_cond_jmp;
+
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ /* cmp a_reg, imm8/32 */
+ EMIT1(add_1mod(0x48, a_reg));
+
+ if (is_imm8(K))
+ EMIT3(0x83, add_1reg(0xF8, a_reg), K);
+ else
+ EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K);
+
+emit_cond_jmp: /* convert BPF opcode to x86 */
+ switch (BPF_OP(insn->code)) {
+ case BPF_JEQ:
+ jmp_cond = X86_JE;
+ break;
+ case BPF_JSET:
+ case BPF_JNE:
+ jmp_cond = X86_JNE;
+ break;
+ case BPF_JGT:
+ /* GT is unsigned '>', JA in x86 */
+ jmp_cond = X86_JA;
+ break;
+ case BPF_JGE:
+ /* GE is unsigned '>=', JAE in x86 */
+ jmp_cond = X86_JAE;
+ break;
+ case BPF_JSGT:
+ /* signed '>', GT in x86 */
+ jmp_cond = X86_JG;
+ break;
+ case BPF_JSGE:
+ /* signed '>=', GE in x86 */
+ jmp_cond = X86_JGE;
+ break;
+ default: /* to silence gcc warning */
+ return -EFAULT;
+ }
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+ if (is_imm8(jmp_offset)) {
+ EMIT2(jmp_cond, jmp_offset);
+ } else if (is_simm32(jmp_offset)) {
+ EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+ } else {
+ pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+ return -EFAULT;
+ }
+
+ break;
+
+ case BPF_JMP | BPF_JA:
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+ if (!jmp_offset)
+ /* optimize out nop jumps */
+ break;
+emit_jmp:
+ if (is_imm8(jmp_offset)) {
+ EMIT2(0xEB, jmp_offset);
+ } else if (is_simm32(jmp_offset)) {
+ EMIT1_off32(0xE9, jmp_offset);
+ } else {
+ pr_err("jmp gen bug %llx\n", jmp_offset);
+ return -EFAULT;
+ }
+ break;
+
+ case BPF_LD | BPF_IND | BPF_W:
+ func = sk_load_word;
+ goto common_load;
+ case BPF_LD | BPF_ABS | BPF_W:
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
+common_load: ctx->seen_ld_abs = true;
+ jmp_offset = func - (image + addrs[i]);
+ if (!func || !is_simm32(jmp_offset)) {
+ pr_err("unsupported bpf func %d addr %p image %p\n",
+ K, func, image);
+ return -EINVAL;
+ }
+ if (BPF_MODE(insn->code) == BPF_ABS) {
+ /* mov %esi, imm32 */
+ EMIT1_off32(0xBE, K);
+ } else {
+ /* mov %rsi, x_reg */
+ EMIT_mov(BPF_REG_2, x_reg);
+ if (K) {
+ if (is_imm8(K))
+ /* add %esi, imm8 */
+ EMIT3(0x83, 0xC6, K);
else
- EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
- break;
- case BPF_S_JMP_JSET_K:
- if (K <= 0xFF)
- EMIT2(0xa8, K); /* test imm8,%al */
- else if (!(K & 0xFFFF00FF))
- EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
- else if (K <= 0xFFFF) {
- EMIT2(0x66, 0xa9); /* test imm16,%ax */
- EMIT(K, 2);
- } else {
- EMIT1_off32(0xa9, K); /* test imm32,%eax */
- }
- break;
+ /* add %esi, imm32 */
+ EMIT2_off32(0x81, 0xC6, K);
}
- if (filter[i].jt != 0) {
- if (filter[i].jf && f_offset)
- t_offset += is_near(f_offset) ? 2 : 5;
- EMIT_COND_JMP(t_op, t_offset);
- if (filter[i].jf)
- EMIT_JMP(f_offset);
- break;
- }
- EMIT_COND_JMP(f_op, f_offset);
- break;
- default:
- /* hmm, too complex filter, give up with jit compiler */
- goto out;
}
- ilen = prog - temp;
- if (image) {
- if (unlikely(proglen + ilen > oldproglen)) {
- pr_err("bpb_jit_compile fatal error\n");
- kfree(addrs);
- module_free(NULL, header);
- return;
- }
- memcpy(image + proglen, temp, ilen);
+ /* skb pointer is in R6 (%rbx), it will be copied into
+ * %rdi if skb_copy_bits() call is necessary.
+ * sk_load_* helpers also use %r10 and %r9d.
+ * See bpf_jit.S
+ */
+ EMIT1_off32(0xE8, jmp_offset); /* call */
+ break;
+
+ case BPF_LD | BPF_IND | BPF_H:
+ func = sk_load_half;
+ goto common_load;
+ case BPF_LD | BPF_ABS | BPF_H:
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
+ goto common_load;
+ case BPF_LD | BPF_IND | BPF_B:
+ func = sk_load_byte;
+ goto common_load;
+ case BPF_LD | BPF_ABS | BPF_B:
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
+ goto common_load;
+
+ case BPF_JMP | BPF_EXIT:
+ if (i != insn_cnt - 1) {
+ jmp_offset = ctx->cleanup_addr - addrs[i];
+ goto emit_jmp;
}
- proglen += ilen;
- addrs[i] = proglen;
- prog = temp;
+ /* update cleanup_addr */
+ ctx->cleanup_addr = proglen;
+ /* mov rbx, qword ptr [rbp-X] */
+ EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize);
+ /* mov r13, qword ptr [rbp-X] */
+ EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8);
+ /* mov r14, qword ptr [rbp-X] */
+ EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16);
+ /* mov r15, qword ptr [rbp-X] */
+ EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24);
+
+ EMIT1(0xC9); /* leave */
+ EMIT1(0xC3); /* ret */
+ break;
+
+ default:
+ /* By design x64 JIT should support all BPF instructions
+ * This error will be seen if new instruction was added
+ * to interpreter, but not to JIT
+ * or if there is junk in sk_filter
+ */
+ pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
+ return -EINVAL;
}
- /* last bpf instruction is always a RET :
- * use it to give the cleanup instruction(s) addr
- */
- cleanup_addr = proglen - 1; /* ret */
- if (seen_or_pass0)
- cleanup_addr -= 1; /* leaveq */
- if (seen_or_pass0 & SEEN_XREG)
- cleanup_addr -= 4; /* mov -8(%rbp),%rbx */
+ ilen = prog - temp;
+ if (image) {
+ if (unlikely(proglen + ilen > oldproglen)) {
+ pr_err("bpf_jit_compile fatal error\n");
+ return -EFAULT;
+ }
+ memcpy(image + proglen, temp, ilen);
+ }
+ proglen += ilen;
+ addrs[i] = proglen;
+ prog = temp;
+ }
+ return proglen;
+}
+
+void bpf_jit_compile(struct sk_filter *prog)
+{
+}
+
+void bpf_int_jit_compile(struct sk_filter *prog)
+{
+ struct bpf_binary_header *header = NULL;
+ int proglen, oldproglen = 0;
+ struct jit_context ctx = {};
+ u8 *image = NULL;
+ int *addrs;
+ int pass;
+ int i;
+
+ if (!bpf_jit_enable)
+ return;
+
+ if (!prog || !prog->len)
+ return;
+
+ addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
+ if (!addrs)
+ return;
+
+ /* Before first pass, make a rough estimation of addrs[]
+ * each bpf instruction is translated to less than 64 bytes
+ */
+ for (proglen = 0, i = 0; i < prog->len; i++) {
+ proglen += 64;
+ addrs[i] = proglen;
+ }
+ ctx.cleanup_addr = proglen;
+
+ for (pass = 0; pass < 10; pass++) {
+ proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
+ if (proglen <= 0) {
+ image = NULL;
+ if (header)
+ module_free(NULL, header);
+ goto out;
+ }
if (image) {
if (proglen != oldproglen)
- pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen);
+ pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
+ proglen, oldproglen);
break;
}
if (proglen == oldproglen) {
@@ -766,17 +918,16 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
}
if (bpf_jit_enable > 1)
- bpf_jit_dump(flen, proglen, pass, image);
+ bpf_jit_dump(prog->len, proglen, 0, image);
if (image) {
bpf_flush_icache(header, image + proglen);
set_memory_ro((unsigned long)header, header->pages);
- fp->bpf_func = (void *)image;
- fp->jited = 1;
+ prog->bpf_func = (void *)image;
+ prog->jited = 1;
}
out:
kfree(addrs);
- return;
}
static void bpf_jit_free_deferred(struct work_struct *work)
diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
index 81b506d5bef..52414211729 100644
--- a/arch/x86/platform/efi/early_printk.c
+++ b/arch/x86/platform/efi/early_printk.c
@@ -14,48 +14,92 @@
static const struct font_desc *font;
static u32 efi_x, efi_y;
+static void *efi_fb;
+static bool early_efi_keep;
-static __init void early_efi_clear_scanline(unsigned int y)
+/*
+ * efi earlyprintk need use early_ioremap to map the framebuffer.
+ * But early_ioremap is not usable for earlyprintk=efi,keep, ioremap should
+ * be used instead. ioremap will be available after paging_init() which is
+ * earlier than initcall callbacks. Thus adding this early initcall function
+ * early_efi_map_fb to map the whole efi framebuffer.
+ */
+static __init int early_efi_map_fb(void)
{
- unsigned long base, *dst;
- u16 len;
+ unsigned long base, size;
+
+ if (!early_efi_keep)
+ return 0;
base = boot_params.screen_info.lfb_base;
- len = boot_params.screen_info.lfb_linelength;
+ size = boot_params.screen_info.lfb_size;
+ efi_fb = ioremap(base, size);
+
+ return efi_fb ? 0 : -ENOMEM;
+}
+early_initcall(early_efi_map_fb);
+
+/*
+ * early_efi_map maps efi framebuffer region [start, start + len -1]
+ * In case earlyprintk=efi,keep we have the whole framebuffer mapped already
+ * so just return the offset efi_fb + start.
+ */
+static __init_refok void *early_efi_map(unsigned long start, unsigned long len)
+{
+ unsigned long base;
+
+ base = boot_params.screen_info.lfb_base;
+
+ if (efi_fb)
+ return (efi_fb + start);
+ else
+ return early_ioremap(base + start, len);
+}
- dst = early_ioremap(base + y*len, len);
+static __init_refok void early_efi_unmap(void *addr, unsigned long len)
+{
+ if (!efi_fb)
+ early_iounmap(addr, len);
+}
+
+static void early_efi_clear_scanline(unsigned int y)
+{
+ unsigned long *dst;
+ u16 len;
+
+ len = boot_params.screen_info.lfb_linelength;
+ dst = early_efi_map(y*len, len);
if (!dst)
return;
memset(dst, 0, len);
- early_iounmap(dst, len);
+ early_efi_unmap(dst, len);
}
-static __init void early_efi_scroll_up(void)
+static void early_efi_scroll_up(void)
{
- unsigned long base, *dst, *src;
+ unsigned long *dst, *src;
u16 len;
u32 i, height;
- base = boot_params.screen_info.lfb_base;
len = boot_params.screen_info.lfb_linelength;
height = boot_params.screen_info.lfb_height;
for (i = 0; i < height - font->height; i++) {
- dst = early_ioremap(base + i*len, len);
+ dst = early_efi_map(i*len, len);
if (!dst)
return;
- src = early_ioremap(base + (i + font->height) * len, len);
+ src = early_efi_map((i + font->height) * len, len);
if (!src) {
- early_iounmap(dst, len);
+ early_efi_unmap(dst, len);
return;
}
memmove(dst, src, len);
- early_iounmap(src, len);
- early_iounmap(dst, len);
+ early_efi_unmap(src, len);
+ early_efi_unmap(dst, len);
}
}
@@ -79,16 +123,14 @@ static void early_efi_write_char(u32 *dst, unsigned char c, unsigned int h)
}
}
-static __init void
+static void
early_efi_write(struct console *con, const char *str, unsigned int num)
{
struct screen_info *si;
- unsigned long base;
unsigned int len;
const char *s;
void *dst;
- base = boot_params.screen_info.lfb_base;
si = &boot_params.screen_info;
len = si->lfb_linelength;
@@ -109,7 +151,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num)
for (h = 0; h < font->height; h++) {
unsigned int n, x;
- dst = early_ioremap(base + (efi_y + h) * len, len);
+ dst = early_efi_map((efi_y + h) * len, len);
if (!dst)
return;
@@ -123,7 +165,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num)
s++;
}
- early_iounmap(dst, len);
+ early_efi_unmap(dst, len);
}
num -= count;
@@ -179,6 +221,9 @@ static __init int early_efi_setup(struct console *con, char *options)
for (i = 0; i < (yres - efi_y) / font->height; i++)
early_efi_scroll_up();
+ /* early_console_register will unset CON_BOOT in case ,keep */
+ if (!(con->flags & CON_BOOT))
+ early_efi_keep = true;
return 0;
}
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index ff0174dda81..a9acde72d4e 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -75,7 +75,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
return 0;
}
-asmlinkage int xo1_do_sleep(u8 sleep_state)
+asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
{
void *pgd_addr = __va(read_cr3());
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 304fca20d96..35e2bb6c0f3 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -23,7 +23,7 @@
extern __visible const void __nosave_begin, __nosave_end;
/* Defined in hibernate_asm_64.S */
-extern asmlinkage int restore_image(void);
+extern asmlinkage __visible int restore_image(void);
/*
* Address to jump to in the last phase of restore in order to get to the image
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index f325af26107..3323c274524 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -54,5 +54,7 @@ syshdr-$(CONFIG_X86_64) += syscalls_64.h
targets += $(uapisyshdr-y) $(syshdr-y)
+PHONY += all
all: $(addprefix $(uapi)/,$(uapisyshdr-y))
all: $(addprefix $(out)/,$(syshdr-y))
+ @:
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 96bc506ac6d..d6b86792161 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -359,3 +359,4 @@
350 i386 finit_module sys_finit_module
351 i386 sched_setattr sys_sched_setattr
352 i386 sched_getattr sys_sched_getattr
+353 i386 renameat2 sys_renameat2
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index e8120346903..604a37efd4d 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -40,4 +40,6 @@ $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/ina
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
hostprogs-y += relocs
relocs-objs := relocs_32.o relocs_64.o relocs_common.o
+PHONY += relocs
relocs: $(obj)/relocs
+ @:
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 2e263f367b1..9df017ab228 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -9,12 +9,9 @@ SECTIONS
#ifdef BUILD_VDSO32
#include <asm/vdso32.h>
- .hpet_sect : {
- hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
- } :text :hpet_sect
+ hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
- .vvar_sect : {
- vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
+ vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
/* Place all vvars at the offsets in asm/vvar.h. */
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset;
@@ -22,7 +19,6 @@ SECTIONS
#include <asm/vvar.h>
#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
- } :text :vvar_sect
#endif
. = SIZEOF_HEADERS;
@@ -61,7 +57,12 @@ SECTIONS
*/
. = ALIGN(0x100);
- .text : { *(.text*) } :text =0x90909090
+ .text : { *(.text*) } :text =0x90909090,
+
+ /*
+ * The comma above works around a bug in gold:
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=16804
+ */
/DISCARD/ : {
*(.discard)
@@ -84,8 +85,4 @@ PHDRS
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr PT_GNU_EH_FRAME;
-#ifdef BUILD_VDSO32
- vvar_sect PT_NULL FLAGS(4); /* PF_R */
- hpet_sect PT_NULL FLAGS(4); /* PF_R */
-#endif
}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 00348980a3a..e1f220e3ca6 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -39,6 +39,7 @@
#ifdef CONFIG_X86_64
#define vdso_enabled sysctl_vsyscall32
#define arch_setup_additional_pages syscall32_setup_pages
+extern int sysctl_ldt16;
#endif
/*
@@ -249,6 +250,13 @@ static struct ctl_table abi_table2[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "ldt16",
+ .data = &sysctl_ldt16,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{}
};
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 201d09a7c46..c34bfc4bbe7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1515,7 +1515,7 @@ static void __init xen_pvh_early_guest_init(void)
}
/* First C function to be called on Xen boot */
-asmlinkage void __init xen_start_kernel(void)
+asmlinkage __visible void __init xen_start_kernel(void)
{
struct physdev_set_iopl set_iopl;
int rc;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 08f763de26f..a1207cb6472 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -23,7 +23,7 @@ void xen_force_evtchn_callback(void)
(void)HYPERVISOR_xen_version(0, NULL);
}
-asmlinkage unsigned long xen_save_fl(void)
+asmlinkage __visible unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
@@ -63,7 +63,7 @@ __visible void xen_restore_fl(unsigned long flags)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
-asmlinkage void xen_irq_disable(void)
+asmlinkage __visible void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
@@ -74,7 +74,7 @@ asmlinkage void xen_irq_disable(void)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
-asmlinkage void xen_irq_enable(void)
+asmlinkage __visible void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index a18eadd8bb4..7005974c3ff 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -441,10 +441,11 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
irq_ctx_init(cpu);
#else
clear_tsk_thread_flag(idle, TIF_FORK);
+#endif
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
-#endif
+
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_init_lock_cpu(cpu);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 4d3acc34a99..0ba5f3b967f 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -274,7 +274,7 @@ void __init xen_init_spinlocks(void)
printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
return;
}
-
+ printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
}
@@ -290,6 +290,9 @@ static __init int xen_init_spinlocks_jump(void)
if (!xen_pvspin)
return 0;
+ if (!xen_domain())
+ return 0;
+
static_key_slow_inc(&paravirt_ticketlocks_enabled);
return 0;
}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 33ca6e42a4c..fd92a64d748 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -75,6 +75,17 @@ ENDPROC(xen_sysexit)
* stack state in whatever form its in, we keep things simple by only
* using a single register which is pushed/popped on the stack.
*/
+
+.macro POP_FS
+1:
+ popw %fs
+.pushsection .fixup, "ax"
+2: movw $0, (%esp)
+ jmp 1b
+.popsection
+ _ASM_EXTABLE(1b,2b)
+.endm
+
ENTRY(xen_iret)
/* test eflags for special cases */
testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
@@ -83,15 +94,13 @@ ENTRY(xen_iret)
push %eax
ESP_OFFSET=4 # bytes pushed onto stack
- /*
- * Store vcpu_info pointer for easy access. Do it this way to
- * avoid having to reload %fs
- */
+ /* Store vcpu_info pointer for easy access */
#ifdef CONFIG_SMP
- GET_THREAD_INFO(%eax)
- movl %ss:TI_cpu(%eax), %eax
- movl %ss:__per_cpu_offset(,%eax,4), %eax
- mov %ss:xen_vcpu(%eax), %eax
+ pushw %fs
+ movl $(__KERNEL_PERCPU), %eax
+ movl %eax, %fs
+ movl %fs:xen_vcpu, %eax
+ POP_FS
#else
movl %ss:xen_vcpu, %eax
#endif