From b960d6c43a63ebd2d8518b328da3816b833ee8cc Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 27 Mar 2012 14:52:44 +0100 Subject: xen/p2m: m2p_find_override: use list_for_each_entry_safe Use list_for_each_entry_safe and remove the spin_lock acquisition in m2p_find_override. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 1b267e75158..7ed8cc3434c 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -809,21 +809,17 @@ struct page *m2p_find_override(unsigned long mfn) { unsigned long flags; struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *ret; + struct page *p, *t, *ret; ret = NULL; - spin_lock_irqsave(&m2p_override_lock, flags); - - list_for_each_entry(p, bucket, lru) { + list_for_each_entry_safe(p, t, bucket, lru) { if (page_private(p) == mfn) { ret = p; break; } } - spin_unlock_irqrestore(&m2p_override_lock, flags); - return ret; } -- cgit v1.2.3-70-g09d2 From 3d81acb1cdb242378a1acb3eb1bc28c6bb5895f1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 20 Apr 2012 11:50:30 -0400 Subject: Revert "xen/p2m: m2p_find_override: use list_for_each_entry_safe" This reverts commit b960d6c43a63ebd2d8518b328da3816b833ee8cc. If we have another thread (very likely) touched the list, we end up hitting a problem "that the next element is wrong because we should be able to cope with that. The problem is that the next->next pointer would be set LIST_POISON1. " (Stefano's comment on the patch). Reverting for now. Suggested-by: Dan Carpenter Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 7ed8cc3434c..1b267e75158 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -809,17 +809,21 @@ struct page *m2p_find_override(unsigned long mfn) { unsigned long flags; struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *t, *ret; + struct page *p, *ret; ret = NULL; - list_for_each_entry_safe(p, t, bucket, lru) { + spin_lock_irqsave(&m2p_override_lock, flags); + + list_for_each_entry(p, bucket, lru) { if (page_private(p) == mfn) { ret = p; break; } } + spin_unlock_irqrestore(&m2p_override_lock, flags); + return ret; } -- cgit v1.2.3-70-g09d2 From df88b2d96e36d9a9e325bfcd12eb45671cbbc937 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Apr 2012 13:13:21 -0400 Subject: xen/enlighten: Disable MWAIT_LEAF so that acpi-pad won't be loaded. There are exactly four users of __monitor and __mwait: - cstate.c (which allows acpi_processor_ffh_cstate_enter to be called when the cpuidle API drivers are used. However patch "cpuidle: replace xen access to x86 pm_idle and default_idle" provides a mechanism to disable the cpuidle and use safe_halt. - smpboot (which allows mwait_play_dead to be called). However safe_halt is always used so we skip that. - intel_idle (same deal as above). - acpi_pad.c. This the one that we do not want to run as we will hit the below crash. Why do we want to expose MWAIT_LEAF in the first place? We want it for the xen-acpi-processor driver - which uploads C-states to the hypervisor. If MWAIT_LEAF is set, the cstate.c sets the proper address in the C-states so that the hypervisor can benefit from using the MWAIT functionality. And that is the sole reason for using it. Without this patch, if a module performs mwait or monitor we get this: invalid opcode: 0000 [#1] SMP CPU 2 .. snip.. Pid: 5036, comm: insmod Tainted: G O 3.4.0-rc2upstream-dirty #2 Intel Corporation S2600CP/S2600CP RIP: e030:[] [] mwait_check_init+0x17/0x1000 [mwait_check] RSP: e02b:ffff8801c298bf18 EFLAGS: 00010282 RAX: ffff8801c298a010 RBX: ffffffffa03b2000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8801c29800d8 RDI: ffff8801ff097200 RBP: ffff8801c298bf18 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: ffffffffa000a000 R14: 0000005148db7294 R15: 0000000000000003 FS: 00007fbb364f2700(0000) GS:ffff8801ff08c000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000179f038 CR3: 00000001c9469000 CR4: 0000000000002660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process insmod (pid: 5036, threadinfo ffff8801c298a000, task ffff8801c29cd7e0) Stack: ffff8801c298bf48 ffffffff81002124 ffffffffa03b2000 00000000000081fd 000000000178f010 000000000178f030 ffff8801c298bf78 ffffffff810c41e6 00007fff3fb30db9 00007fff3fb30db9 00000000000081fd 0000000000010000 Call Trace: [] do_one_initcall+0x124/0x170 [] sys_init_module+0xc6/0x220 [] system_call_fastpath+0x16/0x1b Code: <0f> 01 c8 31 c0 0f 01 c9 c9 c3 00 00 00 00 00 00 00 00 00 00 00 00 RIP [] mwait_check_init+0x17/0x1000 [mwait_check] RSP ---[ end trace 16582fc8a3d1e29a ]--- Kernel panic - not syncing: Fatal exception With this module (which is what acpi_pad.c would hit): MODULE_AUTHOR("Konrad Rzeszutek Wilk "); MODULE_DESCRIPTION("mwait_check_and_back"); MODULE_LICENSE("GPL"); MODULE_VERSION(); static int __init mwait_check_init(void) { __monitor((void *)¤t_thread_info()->flags, 0, 0); __mwait(0, 0); return 0; } static void __exit mwait_check_exit(void) { } module_init(mwait_check_init); module_exit(mwait_check_exit); Reported-by: Liu, Jinsong Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4f51bebac02..a8f8844b8d3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -261,7 +261,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, static bool __init xen_check_mwait(void) { -#ifdef CONFIG_ACPI +#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \ + !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE) struct xen_platform_op op = { .cmd = XENPF_set_processor_pminfo, .u.set_pminfo.id = -1, @@ -349,7 +350,6 @@ static void __init xen_init_cpuid_mask(void) /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ if ((cx & xsave_mask) != xsave_mask) cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ - if (xen_check_mwait()) cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); } -- cgit v1.2.3-70-g09d2 From cf405ae612b0f7e2358db7ff594c0e94846137aa Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Apr 2012 13:50:03 -0400 Subject: xen/smp: Fix crash when booting with ACPI hotplug CPUs. When we boot on a machine that can hotplug CPUs and we are using 'dom0_max_vcpus=X' on the Xen hypervisor line to clip the amount of CPUs available to the initial domain, we get this: (XEN) Command line: com1=115200,8n1 dom0_mem=8G noreboot dom0_max_vcpus=8 sync_console mce_verbosity=verbose console=com1,vga loglvl=all guest_loglvl=all .. snip.. DMI: Intel Corporation S2600CP/S2600CP, BIOS SE5C600.86B.99.99.x032.072520111118 07/25/2011 .. snip. SMP: Allowing 64 CPUs, 32 hotplug CPUs installing Xen timer for CPU 7 cpu 7 spinlock event irq 361 NMI watchdog: disabled (cpu7): hardware events not enabled Brought up 8 CPUs .. snip.. [acpi processor finds the CPUs are not initialized and starts calling arch_register_cpu, which creates /sys/devices/system/cpu/cpu8/online] CPU 8 got hotplugged CPU 9 got hotplugged CPU 10 got hotplugged .. snip.. initcall 1_acpi_battery_init_async+0x0/0x1b returned 0 after 406 usecs calling erst_init+0x0/0x2bb @ 1 [and the scheduler sticks newly started tasks on the new CPUs, but said CPUs cannot be initialized b/c the hypervisor has limited the amount of vCPUS to 8 - as per the dom0_max_vcpus=8 flag. The spinlock tries to kick the other CPU, but the structure for that is not initialized and we crash.] BUG: unable to handle kernel paging request at fffffffffffffed8 IP: [] xen_spin_lock+0x29/0x60 PGD 180d067 PUD 180e067 PMD 0 Oops: 0002 [#1] SMP CPU 7 Modules linked in: Pid: 1, comm: swapper/0 Not tainted 3.4.0-rc2upstream-00001-gf5154e8 #1 Intel Corporation S2600CP/S2600CP RIP: e030:[] [] xen_spin_lock+0x29/0x60 RSP: e02b:ffff8801fb9b3a70 EFLAGS: 00010282 With this patch, we cap the amount of vCPUS that the initial domain can run, to exactly what dom0_max_vcpus=X has specified. In the future, if there is a hypercall that will allow a running domain to expand past its initial set of vCPUS, this patch should be re-evaluated. CC: stable@kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 5fac6919b95..0503c0c493a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -178,6 +178,7 @@ static void __init xen_fill_possible_map(void) static void __init xen_filter_cpu_maps(void) { int i, rc; + unsigned int subtract = 0; if (!xen_initial_domain()) return; @@ -192,8 +193,22 @@ static void __init xen_filter_cpu_maps(void) } else { set_cpu_possible(i, false); set_cpu_present(i, false); + subtract++; } } +#ifdef CONFIG_HOTPLUG_CPU + /* This is akin to using 'nr_cpus' on the Linux command line. + * Which is OK as when we use 'dom0_max_vcpus=X' we can only + * have up to X, while nr_cpu_ids is greater than X. This + * normally is not a problem, except when CPU hotplugging + * is involved and then there might be more than X CPUs + * in the guest - which will not work as there is no + * hypercall to expand the max number of VCPUs an already + * running guest has. So cap it up to X. */ + if (subtract) + nr_cpu_ids = nr_cpu_ids - subtract; +#endif + } static void __init xen_smp_prepare_boot_cpu(void) -- cgit v1.2.3-70-g09d2 From 7eb7ce4d2e8991aff4ecb71a81949a907ca755ac Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 26 Apr 2012 19:44:06 +0100 Subject: xen: correctly check for pending events when restoring irq flags In xen_restore_fl_direct(), xen_force_evtchn_callback() was being called even if no events were pending. This resulted in (depending on workload) about a 100 times as many xen_version hypercalls as necessary. Fix this by correcting the sense of the conditional jump. This seems to give a significant performance benefit for some workloads. There is some subtle tricksy "..since the check here is trying to check both pending and masked in a single cmpw, but I think this is correct. It will call check_events now only when the combined mask+pending word is 0x0001 (aka unmasked, pending)." (Ian) CC: stable@kernel.org Acked-by: Ian Campbell Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/xen-asm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 79d7362ad6d..3e45aa00071 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct) /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f + jnz 1f 2: call check_events 1: ENDPATCH(xen_restore_fl_direct) -- cgit v1.2.3-70-g09d2