From: Greg KH on
2.6.27-stable review patch. If anyone has any objections, please let us know.

------------------

From: Jeremy Fitzhardinge <jeremy.fitzhardinge(a)citrix.com>

commit 8a22b9996b001c88f2bfb54c6de6a05fc39e177a upstream.

xen_sched_clock only counts unstolen time. In principle this should
be useful to the Linux scheduler so that it knows how much time a process
actually consumed. But in practice this doesn't work very well as the
scheduler expects the sched_clock time to be synchronized between
cpus. It also uses sched_clock to measure the time a task spends
sleeping, in which case "unstolen time" isn't meaningful.

So just use plain xen_clocksource_read to return wallclock nanoseconds
for sched_clock.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge(a)citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)suse.de>

---
arch/x86/xen/enlighten.c | 2 +-
arch/x86/xen/time.c | 43 +------------------------------------------
arch/x86/xen/xen-ops.h | 3 ++-
3 files changed, 4 insertions(+), 44 deletions(-)

--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1179,7 +1179,7 @@ static const struct pv_time_ops xen_time
.set_wallclock = xen_set_wallclock,
.get_wallclock = xen_get_wallclock,
.get_tsc_khz = xen_tsc_khz,
- .sched_clock = xen_sched_clock,
+ .sched_clock = xen_clocksource_read,
};

static const struct pv_cpu_ops xen_cpu_ops __initdata = {
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -30,8 +30,6 @@
#define TIMER_SLOP 100000
#define NS_PER_TICK (1000000000LL / HZ)

-static cycle_t xen_clocksource_read(void);
-
/* runstate info updated by Xen */
static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);

@@ -158,45 +156,6 @@ static void do_stolen_accounting(void)
account_steal_time(idle_task(smp_processor_id()), ticks);
}

-/*
- * Xen sched_clock implementation. Returns the number of unstolen
- * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
- * states.
- */
-unsigned long long xen_sched_clock(void)
-{
- struct vcpu_runstate_info state;
- cycle_t now;
- u64 ret;
- s64 offset;
-
- /*
- * Ideally sched_clock should be called on a per-cpu basis
- * anyway, so preempt should already be disabled, but that's
- * not current practice at the moment.
- */
- preempt_disable();
-
- now = xen_clocksource_read();
-
- get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- offset = now - state.state_entry_time;
- if (offset < 0)
- offset = 0;
-
- ret = state.time[RUNSTATE_blocked] +
- state.time[RUNSTATE_running] +
- offset;
-
- preempt_enable();
-
- return ret;
-}
-
-
/* Get the TSC speed from Xen */
unsigned long xen_tsc_khz(void)
{
@@ -213,7 +172,7 @@ unsigned long xen_tsc_khz(void)
return xen_khz;
}

-static cycle_t xen_clocksource_read(void)
+cycle_t xen_clocksource_read(void)
{
struct pvclock_vcpu_time_info *src;
cycle_t ret;
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -3,6 +3,7 @@

#include <linux/init.h>
#include <linux/irqreturn.h>
+#include <linux/clocksource.h>
#include <xen/xen-ops.h>

/* These are code, but not functions. Defined in entry.S */
@@ -37,7 +38,7 @@ unsigned long xen_tsc_khz(void);
void __init xen_time_init(void);
unsigned long xen_get_wallclock(void);
int xen_set_wallclock(unsigned long time);
-unsigned long long xen_sched_clock(void);
+cycle_t xen_clocksource_read(void);

irqreturn_t xen_debug_interrupt(int irq, void *dev_id);



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/