From: Jan Beulich on
Use the (alternative instructions based) callout hooks to the ticket
spinlock code to enlighten ticket locks when running fully virtualized
on Xen. Ultimately, this code might also be a candidate to be used
when running para-virtualized.

Signed-off-by: Jan Beulich <jbeulich(a)novell.com>
Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge(a)citrix.com>
Cc: KY Srinivasan <ksrinivasan(a)novell.com>

---
arch/x86/include/asm/hypervisor.h | 1
arch/x86/include/asm/spinlock_types.h | 17 +-
arch/x86/include/asm/xen/cpuid.h | 68 ++++++++
arch/x86/kernel/cpu/Makefile | 2
arch/x86/kernel/cpu/hypervisor.c | 1
arch/x86/kernel/cpu/xen.c | 269 ++++++++++++++++++++++++++++++++++
6 files changed, 355 insertions(+), 3 deletions(-)

--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/hypervisor.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/hypervisor.h
@@ -45,5 +45,6 @@ extern const struct hypervisor_x86 *x86_
/* Recognized hypervisors */
extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen;

#endif
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock_types.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock_types.h
@@ -5,11 +5,24 @@
# error "please don't include this file directly"
#endif

+#include <asm/types.h>
+
typedef struct arch_spinlock {
- unsigned int slock;
+ union {
+ unsigned int slock;
+#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
+ struct {
+# if CONFIG_NR_CPUS < 256
+ u8 cur, seq;
+# else
+ u16 cur, seq;
+# endif
+ };
+#endif
+ };
} arch_spinlock_t;

-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }

typedef struct {
unsigned int lock;
--- /dev/null
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/xen/cpuid.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * arch-x86/cpuid.h
+ *
+ * CPUID interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2007 Citrix Systems, Inc.
+ *
+ * Authors:
+ * Keir Fraser <keir.fraser(a)citrix.com>
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
+#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
+
+/* Xen identification leaves start at 0x40000000. */
+#define XEN_CPUID_FIRST_LEAF 0x40000000
+#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i))
+
+/*
+ * Leaf 1 (0x40000000)
+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
+ * are supported by the Xen host.
+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
+ * of a Xen host.
+ */
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+/*
+ * Leaf 2 (0x40000001)
+ * EAX[31:16]: Xen major version.
+ * EAX[15: 0]: Xen minor version.
+ * EBX-EDX: Reserved (currently all zeroes).
+ */
+
+/*
+ * Leaf 3 (0x40000002)
+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
+ * to specify one hypercall page.
+ * EBX: Base address of Xen-specific MSRs.
+ * ECX: Features 1. Unused bits are set to zero.
+ * EDX: Features 2. Unused bits are set to zero.
+ */
+
+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+
+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/Makefile
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/Makefile
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp)

obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
-obj-y += vmware.o hypervisor.o sched.o mshyperv.o
+obj-y += vmware.o xen.o hypervisor.o sched.o mshyperv.o

obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/hypervisor.c
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/hypervisor.c
@@ -43,6 +43,7 @@ static const __initconst struct hypervis
{
&x86_hyper_vmware,
&x86_hyper_ms_hyperv,
+ &x86_hyper_xen,
};

const struct hypervisor_x86 *x86_hyper;
--- /dev/null
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/xen.c
@@ -0,0 +1,269 @@
+#define __XEN_INTERFACE_VERSION__ 0x00030207
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/stringify.h>
+#include <asm/sync_bitops.h>
+#include <asm/hypervisor.h>
+#include <asm/xen/cpuid.h>
+#include <asm/xen/hypercall.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/vcpu.h>
+
+#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
+struct spinning {
+ struct arch_spinlock *lock;
+ unsigned int ticket;
+ struct spinning *prev;
+};
+
+static struct shared_info *__read_mostly xen_shared_info;
+EXPORT_SYMBOL_GPL(xen_shared_info);
+
+static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+static DEFINE_PER_CPU(evtchn_port_t, poll_evtchn);
+static DEFINE_PER_CPU(struct spinning *, _spinning);
+/*
+ * Protect removal of objects: Insertion can be done lockless, and even
+ * removal itself doesn't need protection - what needs to be prevented is
+ * removed objects going out of scope (as they're living on the stack).
+ */
+static DEFINE_PER_CPU(arch_rwlock_t, spinning_rm_lock) = __ARCH_RW_LOCK_UNLOCKED;
+
+static unsigned int __read_mostly spin_count = 1000;
+static int __init setup_spin_count(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ spin_count = simple_strtoul(s, &s, 0);
+ return !*s ? 0 : -EINVAL;
+}
+early_param("spin_count", setup_spin_count);
+
+#ifndef CONFIG_XEN
+__asm__(".pushsection .text, \"ax\", @progbits\n"
+ ".p2align " __stringify(PAGE_SHIFT) "\n"
+ "hypercall_page:\n"
+ ".skip 1 << " __stringify(PAGE_SHIFT) "\n"
+ ".popsection");
+#endif
+
+static void xen_set_cpu_features(struct cpuinfo_x86 *);
+
+static void xen_spin_lock(struct arch_spinlock *lock, unsigned int token)
+{
+ arch_rwlock_t *rm_lock;
+ unsigned long flags;
+ unsigned int count;
+ struct spinning spinning;
+
+ if (unlikely(percpu_read(runstate.state) != RUNSTATE_running))
+ xen_set_cpu_features(&__get_cpu_var(cpu_info));
+
+#if TICKET_SHIFT == 8
+ token >>= TICKET_SHIFT;
+#endif
+ spinning.ticket = token;
+ spinning.lock = lock;
+ spinning.prev = percpu_read(_spinning);
+ smp_wmb();
+ percpu_write(_spinning, &spinning);
+
+ sync_clear_bit(percpu_read(poll_evtchn),
+ xen_shared_info->evtchn_pending);
+
+ for (count = spin_count; ({ barrier(); lock->cur != token; }); )
+ if (likely(cpu_online(raw_smp_processor_id()))
+ && unlikely(!--count)) {
+ struct sched_poll sched_poll;
+
+ set_xen_guest_handle(sched_poll.ports,
+ &__get_cpu_var(poll_evtchn));
+ sched_poll.nr_ports = 1;
+ sched_poll.timeout = 0;
+ HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll);
+ count = spin_count;
+ } else
+ cpu_relax();
+
+ /*
+ * If we interrupted another spinlock while it was blocking, make
+ * sure it doesn't block (again) without re-checking the lock.
+ */
+ if (spinning.prev)
+ sync_set_bit(percpu_read(poll_evtchn),
+ xen_shared_info->evtchn_pending);
+
+ percpu_write(_spinning, spinning.prev);
+ rm_lock = &__get_cpu_var(spinning_rm_lock);
+ raw_local_irq_save(flags);
+ arch_write_lock(rm_lock);
+ arch_write_unlock(rm_lock);
+ raw_local_irq_restore(flags);
+}
+
+static void xen_spin_unlock(struct arch_spinlock *lock, unsigned int token)
+{
+ unsigned int cpu;
+
+ token &= (1U << TICKET_SHIFT) - 1;
+ for_each_online_cpu(cpu) {
+ arch_rwlock_t *rm_lock;
+ unsigned long flags;
+ struct spinning *spinning;
+
+ if (cpu == raw_smp_processor_id())
+ continue;
+
+ rm_lock = &per_cpu(spinning_rm_lock, cpu);
+ raw_local_irq_save(flags);
+ arch_read_lock(rm_lock);
+
+ spinning = per_cpu(_spinning, cpu);
+ smp_rmb();
+ if (spinning
+ && (spinning->lock != lock || spinning->ticket != token))
+ spinning = NULL;
+
+ arch_read_unlock(rm_lock);
+ raw_local_irq_restore(flags);
+
+ if (unlikely(spinning)) {
+ struct evtchn_send send;
+
+ send.port = per_cpu(poll_evtchn, cpu);
+ HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
+ return;
+ }
+ }
+}
+
+static void __init _prepare_shared_info_page(void)
+{
+ struct xen_add_to_physmap xatp;
+
+ xen_shared_info = slab_is_available()
+ ? (void *)get_zeroed_page(GFP_KERNEL)
+ : alloc_bootmem_pages(PAGE_SIZE);
+
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = __pa(xen_shared_info) >> PAGE_SHIFT;
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ BUG();
+}
+
+static void __ref prepare_shared_info_page(void)
+{
+ _prepare_shared_info_page();
+}
+#endif
+
+static bool __cpuinit xen_platform(void)
+{
+ unsigned int first = XEN_CPUID_FIRST_LEAF;
+
+#if 0 /* So far, Xen sets this only for PV guests. */
+ if (!cpu_has_hypervisor)
+ return false;
+#endif
+
+ while (first < XEN_CPUID_LEAF(0x10000)) {
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(first, &eax, &ebx, &ecx, &edx);
+ if (ebx == XEN_CPUID_SIGNATURE_EBX
+ && ecx == XEN_CPUID_SIGNATURE_ECX
+ && edx == XEN_CPUID_SIGNATURE_EDX) {
+ if (!smp_processor_id()) {
+ cpuid(first + 1, &eax, &ebx, &ecx, &edx);
+ printk(KERN_INFO "Running on Xen %u.%u\n",
+ eax >> 16, eax & 0xffff);
+ }
+ return true;
+ }
+ first += 0x100;
+ }
+
+ return false;
+}
+
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
+ unsigned int msr, eax, ebx, ecx, edx;
+ unsigned int first = XEN_CPUID_FIRST_LEAF;
+ int ret;
+ struct vcpu_register_runstate_memory_area vrrma;
+
+ if (num_possible_cpus() <= 1
+ || !spin_count
+ || (c != &boot_cpu_data
+ && !boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD)))
+ return;
+
+ while (first < XEN_CPUID_LEAF(0x10000)) {
+ cpuid(first, &eax, &ebx, &ecx, &edx);
+ if (ebx == XEN_CPUID_SIGNATURE_EBX
+ && ecx == XEN_CPUID_SIGNATURE_ECX
+ && edx == XEN_CPUID_SIGNATURE_EDX)
+ break;
+ first += 0x100;
+ }
+ BUG_ON(first >= XEN_CPUID_LEAF(0x10000));
+
+ cpuid(first + 2, &eax, &msr, &ecx, &edx);
+ BUG_ON(!eax);
+ wrmsrl(msr, __pa_symbol(hypercall_page));
+
+ if (!xen_shared_info)
+ prepare_shared_info_page();
+
+ memset(&vrrma, 0, sizeof(vrrma));
+ set_xen_guest_handle(vrrma.addr.h, &__get_cpu_var(runstate));
+ ret = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
+ c->cpu_index, &vrrma);
+ if (ret) {
+ printk(KERN_WARNING
+ "Could not register runstate area for CPU%u: %d\n",
+ c->cpu_index, ret);
+ BUG_ON(boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD));
+ return;
+ }
+
+ if (c != &boot_cpu_data || !percpu_read(poll_evtchn)) {
+ struct evtchn_bind_ipi bind_ipi;
+
+ bind_ipi.vcpu = c->cpu_index;
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi);
+ if (ret) {
+ printk(KERN_WARNING
+ "Could not bind event channel for CPU%u: %d\n",
+ c->cpu_index, ret);
+ BUG_ON(boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD));
+ return;
+ }
+ sync_set_bit(bind_ipi.port, xen_shared_info->evtchn_mask);
+ percpu_write(poll_evtchn, bind_ipi.port);
+ printk(KERN_INFO "CPU%u spinlock poll event channel: %u\n",
+ c->cpu_index, bind_ipi.port);
+ }
+
+ virt_spin_lock = xen_spin_lock;
+ virt_spin_unlock = xen_spin_unlock;
+ set_cpu_cap(c, X86_FEATURE_SPINLOCK_YIELD);
+#endif
+}
+
+const __refconst struct hypervisor_x86 x86_hyper_xen = {
+ .name = "Xen",
+ .detect = xen_platform,
+ .set_cpu_features = xen_set_cpu_features
+};


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/