perf, x86: Implement simple LBR support [Kernel]

Prev: Selinux: Remove unused headers skbuff.h in selinux/nlmsgtab.c
Next: epoll clarification sought: multithreaded epoll_wait for UDP sockets?

From: Stephane Eranian on 3 Mar 2010 17:00

On Wed, Mar 3, 2010 at 5:39 PM, Peter Zijlstra <a.p.zijlstra(a)chello.nl> wrote:
> Implement support for Intel LBR stacks that support
> FREEZE_LBRS_ON_PMI. We do not (yet?) support the LBR config register
> because that is SMT wide and would also put undue restraints on the
> PEBS users.
>
You're saying PEBS users have priorities over pure LBR users?
Why is that?

Without coding this, how would you expose LBR configuration to userland
given you're using the PERF_SAMPLE_BRANCH_STACK approach?

> Signed-off-by: Peter Zijlstra <a.p.zijlstra(a)chello.nl>
> ---
> arch/x86/kernel/cpu/perf_event.c | 22 ++
> arch/x86/kernel/cpu/perf_event_intel.c | 13 +
> arch/x86/kernel/cpu/perf_event_intel_lbr.c | 228 +++++++++++++++++++++++++++++
> 3 files changed, 263 insertions(+)
>
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -48,6 +48,12 @@ struct amd_nb {
> struct event_constraint event_constraints[X86_PMC_IDX_MAX];
> };
>
> +#define MAX_LBR_ENTRIES 16
> +
> +struct lbr_entry {
> + u64 from, to, flags;
> +};
> +
> struct cpu_hw_events {
> /*
> * Generic x86 PMC bits
> @@ -70,6 +76,14 @@ struct cpu_hw_events {
> u64 pebs_enabled;
>
> /*
> + * Intel LBR bits
> + */
> + int lbr_users;
> + int lbr_entries;
> + struct lbr_entry lbr_stack[MAX_LBR_ENTRIES];
> + void *lbr_context;
> +
> + /*
> * AMD specific bits
> */
> struct amd_nb *amd_nb;
> @@ -154,6 +168,13 @@ struct x86_pmu {
> int pebs_record_size;
> void (*drain_pebs)(void);
> struct event_constraint *pebs_constraints;
> +
> + /*
> + * Intel LBR
> + */
> + unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
> + int lbr_nr; /* hardware stack size */
> + int lbr_format; /* hardware format */
> };
>
> static struct x86_pmu x86_pmu __read_mostly;
> @@ -1238,6 +1259,7 @@ undo:
>
> #include "perf_event_amd.c"
> #include "perf_event_p6.c"
> +#include "perf_event_intel_lbr.c"
> #include "perf_event_intel_ds.c"
> #include "perf_event_intel.c"
>
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void)
> intel_pmu_disable_bts();
>
> intel_pmu_pebs_disable_all();
> + intel_pmu_lbr_disable_all();
> }
>
> static void intel_pmu_enable_all(void)
> @@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void)
> }
>
> intel_pmu_pebs_enable_all();
> + intel_pmu_lbr_enable_all();
> }
>
> static inline u64 intel_pmu_get_status(void)
> @@ -675,6 +677,8 @@ again:
> inc_irq_stat(apic_perf_irqs);
> ack = status;
>
> + intel_pmu_lbr_read();
> +
> /*
> * PEBS overflow sets bit 62 in the global status register
> */
> @@ -847,6 +851,8 @@ static __init int intel_pmu_init(void)
> memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
> sizeof(hw_cache_event_ids));
>
> + intel_pmu_lbr_init_core();
> +
> x86_pmu.event_constraints = intel_core2_event_constraints;
> pr_cont("Core2 events, ");
> break;
> @@ -856,13 +862,18 @@ static __init int intel_pmu_init(void)
> memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
> sizeof(hw_cache_event_ids));
>
> + intel_pmu_lbr_init_nhm();
> +
> x86_pmu.event_constraints = intel_nehalem_event_constraints;
> pr_cont("Nehalem/Corei7 events, ");
> break;
> +
> case 28: /* Atom */
> memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
> sizeof(hw_cache_event_ids));
>
> + intel_pmu_lbr_init_atom();
> +
> x86_pmu.event_constraints = intel_gen_event_constraints;
> pr_cont("Atom events, ");
> break;
> @@ -872,6 +883,8 @@ static __init int intel_pmu_init(void)
> memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
> sizeof(hw_cache_event_ids));
>
> + intel_pmu_lbr_init_nhm();
> +
> x86_pmu.event_constraints = intel_westmere_event_constraints;
> pr_cont("Westmere events, ");
> break;
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> @@ -0,0 +1,228 @@
> +#ifdef CONFIG_CPU_SUP_INTEL
> +
> +enum {
> + LBR_FORMAT_32 = 0x00,
> + LBR_FORMAT_LIP = 0x01,
> + LBR_FORMAT_EIP = 0x02,
> + LBR_FORMAT_EIP_FLAGS = 0x03,
> +};
> +
> +/*
> + * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
> + * otherwise it becomes near impossible to get a reliable stack.
> + */
> +
> +#define X86_DEBUGCTL_LBR (1 << 0)
> +#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11)
> +
> +static void __intel_pmu_lbr_enable(void)
> +{
> + u64 debugctl;
> +
> + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> + debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
> + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> +}
> +
> +static void __intel_pmu_lbr_disable(void)
> +{
> + u64 debugctl;
> +
> + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> + debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
> + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> +}
> +
> +static void intel_pmu_lbr_reset_32(void)
> +{
> + int i;
> +
> + for (i = 0; i < x86_pmu.lbr_nr; i++)
> + wrmsrl(x86_pmu.lbr_from + i, 0);
> +}
> +
> +static void intel_pmu_lbr_reset_64(void)
> +{
> + int i;
> +
> + for (i = 0; i < x86_pmu.lbr_nr; i++) {
> + wrmsrl(x86_pmu.lbr_from + i, 0);
> + wrmsrl(x86_pmu.lbr_to + i, 0);
> + }
> +}
> +
> +static void intel_pmu_lbr_reset(void)
> +{
> + if (x86_pmu.lbr_format == LBR_FORMAT_32)
> + intel_pmu_lbr_reset_32();
> + else
> + intel_pmu_lbr_reset_64();
> +}
> +
> +static void intel_pmu_lbr_enable(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + if (!x86_pmu.lbr_nr)
> + return;
> +
> + WARN_ON(cpuc->enabled);
> +
> + /*
> + * Reset the LBR stack if this is the first LBR user or
> + * we changed task context so as to avoid data leaks.
> + */
> +
> + if (!cpuc->lbr_users ||
> + (event->ctx->task && cpuc->lbr_context != event->ctx)) {
> + intel_pmu_lbr_reset();
> + cpuc->lbr_context = event->ctx;
> + }
> +
> + cpuc->lbr_users++;
> +}
> +
> +static void intel_pmu_lbr_disable(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + if (!x86_pmu.lbr_nr)
> + return;
> +
> + cpuc->lbr_users--;
> +
> + BUG_ON(cpuc->lbr_users < 0);
> + WARN_ON(cpuc->enabled);
> +}
> +
> +static void intel_pmu_lbr_enable_all(void)
> +{
> + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + if (cpuc->lbr_users)
> + __intel_pmu_lbr_enable();
> +}
> +
> +static void intel_pmu_lbr_disable_all(void)
> +{
> + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + if (cpuc->lbr_users)
> + __intel_pmu_lbr_disable();
> +}
> +
> +static inline u64 intel_pmu_lbr_tos(void)
> +{
> + u64 tos;
> +
> + rdmsrl(x86_pmu.lbr_tos, tos);
> +
> + return tos;
> +}
> +
> +static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
> +{
> + unsigned long mask = x86_pmu.lbr_nr - 1;
> + u64 tos = intel_pmu_lbr_tos();
> + int i;
> +
> + for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
> + unsigned long lbr_idx = (tos - i) & mask;
> + union {
> + struct {
> + u32 from;
> + u32 to;
> + };
> + u64 lbr;
> + } msr_lastbranch;
> +
> + rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
> +
> + cpuc->lbr_stack[i].from = msr_lastbranch.from;
> + cpuc->lbr_stack[i].to = msr_lastbranch.to;
> + cpuc->lbr_stack[i].flags = 0;
> + }
> + cpuc->lbr_entries = i;
> +}
> +
> +#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
> +
> +/*
> + * Due to lack of segmentation in Linux the effective address (offset)
> + * is the same as the linear address, allowing us to merge the LIP and EIP
> + * LBR formats.
> + */
> +static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
> +{
> + unsigned long mask = x86_pmu.lbr_nr - 1;
> + u64 tos = intel_pmu_lbr_tos();
> + int i;
> +
> + for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
> + unsigned long lbr_idx = (tos - i) & mask;
> + u64 from, to, flags = 0;
> +
> + rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
> + rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
> +
> + if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) {
> + flags = !!(from & LBR_FROM_FLAG_MISPRED);
> + from = (u64)((((s64)from) << 1) >> 1);
> + }
> +
> + cpuc->lbr_stack[i].from = from;
> + cpuc->lbr_stack[i].to = to;
> + cpuc->lbr_stack[i].flags = flags;
> + }
> + cpuc->lbr_entries = i;
> +}
> +
> +static void intel_pmu_lbr_read(void)
> +{
> + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + if (!cpuc->lbr_users)
> + return;
> +
> + if (x86_pmu.lbr_format == LBR_FORMAT_32)
> + intel_pmu_lbr_read_32(cpuc);
> + else
> + intel_pmu_lbr_read_64(cpuc);
> +}
> +
> +static int intel_pmu_lbr_format(void)
> +{
> + u64 capabilities;
> +
> + rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
> + return capabilities & 0x1f;
> +}
> +
> +static void intel_pmu_lbr_init_core(void)
> +{
> + x86_pmu.lbr_format = intel_pmu_lbr_format();
> + x86_pmu.lbr_nr = 4;
> + x86_pmu.lbr_tos = 0x01c9;
> + x86_pmu.lbr_from = 0x40;
> + x86_pmu.lbr_to = 0x60;
> +}
> +
> +static void intel_pmu_lbr_init_nhm(void)
> +{
> + x86_pmu.lbr_format = intel_pmu_lbr_format();
> + x86_pmu.lbr_nr = 16;
> + x86_pmu.lbr_tos = 0x01c9;
> + x86_pmu.lbr_from = 0x680;
> + x86_pmu.lbr_to = 0x6c0;
> +}
> +
> +static void intel_pmu_lbr_init_atom(void)
> +{
> + x86_pmu.lbr_format = intel_pmu_lbr_format();
> + x86_pmu.lbr_nr = 8;
> + x86_pmu.lbr_tos = 0x01c9;
> + x86_pmu.lbr_from = 0x40;
> + x86_pmu.lbr_to = 0x60;
> +}
> +
> +#endif /* CONFIG_CPU_SUP_INTEL */
>
> --
>
>

--
Stephane Eranian | EMEA Software Engineering
Google France | 38 avenue de l'Opéra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Stephane Eranian on 3 Mar 2010 17:00

I don't understand how LBR state is migrated when a per-thread event is moved
from one CPU to another. It seems LBR is managed per-cpu.

Can you explain this to me?

On Wed, Mar 3, 2010 at 5:39 PM, Peter Zijlstra <a.p.zijlstra(a)chello.nl> wrote:
> Implement support for Intel LBR stacks that support
> FREEZE_LBRS_ON_PMI. We do not (yet?) support the LBR config register
> because that is SMT wide and would also put undue restraints on the
> PEBS users.
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra(a)chello.nl>
> ---
> Â arch/x86/kernel/cpu/perf_event.c Â Â Â Â Â | Â 22 ++
> Â arch/x86/kernel/cpu/perf_event_intel.c Â Â | Â 13 +
> Â arch/x86/kernel/cpu/perf_event_intel_lbr.c | Â 228 +++++++++++++++++++++++++++++
> Â 3 files changed, 263 insertions(+)
>
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -48,6 +48,12 @@ struct amd_nb {
> Â Â Â Â struct event_constraint event_constraints[X86_PMC_IDX_MAX];
> Â };
>
> +#define MAX_LBR_ENTRIES Â Â Â Â Â Â Â Â 16
> +
> +struct lbr_entry {
> + Â Â Â u64 Â Â from, to, flags;
> +};
> +
> Â struct cpu_hw_events {
> Â Â Â Â /*
> Â Â Â Â * Generic x86 PMC bits
> @@ -70,6 +76,14 @@ struct cpu_hw_events {
> Â Â Â Â u64 Â Â Â Â Â Â Â Â Â Â pebs_enabled;
>
> Â Â Â Â /*
> + Â Â Â Â * Intel LBR bits
> + Â Â Â Â */
> + Â Â Â int Â Â Â Â Â Â Â Â Â Â lbr_users;
> + Â Â Â int Â Â Â Â Â Â Â Â Â Â lbr_entries;
> + Â Â Â struct lbr_entry Â Â Â Â lbr_stack[MAX_LBR_ENTRIES];
> + Â Â Â void Â Â Â Â Â Â Â Â Â Â *lbr_context;
> +
> + Â Â Â /*
> Â Â Â Â * AMD specific bits
> Â Â Â Â */
> Â Â Â Â struct amd_nb Â Â Â Â Â *amd_nb;
> @@ -154,6 +168,13 @@ struct x86_pmu {
> Â Â Â Â int Â Â Â Â Â Â pebs_record_size;
> Â Â Â Â void Â Â Â Â Â Â (*drain_pebs)(void);
> Â Â Â Â struct event_constraint *pebs_constraints;
> +
> + Â Â Â /*
> + Â Â Â Â * Intel LBR
> + Â Â Â Â */
> + Â Â Â unsigned long Â lbr_tos, lbr_from, lbr_to; /* MSR base regs Â Â Â */
> + Â Â Â int Â Â Â Â Â Â lbr_nr; Â Â Â Â Â Â Â Â Â Â /* hardware stack size */
> + Â Â Â int Â Â Â Â Â Â lbr_format; Â Â Â Â Â Â Â Â /* hardware format Â Â */
> Â };
>
> Â static struct x86_pmu x86_pmu __read_mostly;
> @@ -1238,6 +1259,7 @@ undo:
>
> Â #include "perf_event_amd.c"
> Â #include "perf_event_p6.c"
> +#include "perf_event_intel_lbr.c"
> Â #include "perf_event_intel_ds.c"
> Â #include "perf_event_intel.c"
>
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void)
> Â Â Â Â Â Â Â Â intel_pmu_disable_bts();
>
> Â Â Â Â intel_pmu_pebs_disable_all();
> + Â Â Â intel_pmu_lbr_disable_all();
> Â }
>
> Â static void intel_pmu_enable_all(void)
> @@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void)
> Â Â Â Â }
>
> Â Â Â Â intel_pmu_pebs_enable_all();
> + Â Â Â intel_pmu_lbr_enable_all();
> Â }
>
> Â static inline u64 intel_pmu_get_status(void)
> @@ -675,6 +677,8 @@ again:
> Â Â Â Â inc_irq_stat(apic_perf_irqs);
> Â Â Â Â ack = status;
>
> + Â Â Â intel_pmu_lbr_read();
> +
> Â Â Â Â /*
> Â Â Â Â * PEBS overflow sets bit 62 in the global status register
> Â Â Â Â */
> @@ -847,6 +851,8 @@ static __init int intel_pmu_init(void)
> Â Â Â Â Â Â Â Â memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
> Â Â Â Â Â Â Â Â Â Â Â sizeof(hw_cache_event_ids));
>
> + Â Â Â Â Â Â Â intel_pmu_lbr_init_core();
> +
> Â Â Â Â Â Â Â Â x86_pmu.event_constraints = intel_core2_event_constraints;
> Â Â Â Â Â Â Â Â pr_cont("Core2 events, ");
> Â Â Â Â Â Â Â Â break;
> @@ -856,13 +862,18 @@ static __init int intel_pmu_init(void)
> Â Â Â Â Â Â Â Â memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
> Â Â Â Â Â Â Â Â Â Â Â sizeof(hw_cache_event_ids));
>
> + Â Â Â Â Â Â Â intel_pmu_lbr_init_nhm();
> +
> Â Â Â Â Â Â Â Â x86_pmu.event_constraints = intel_nehalem_event_constraints;
> Â Â Â Â Â Â Â Â pr_cont("Nehalem/Corei7 events, ");
> Â Â Â Â Â Â Â Â break;
> +
> Â Â Â Â case 28: /* Atom */
> Â Â Â Â Â Â Â Â memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
> Â Â Â Â Â Â Â Â Â Â Â sizeof(hw_cache_event_ids));
>
> + Â Â Â Â Â Â Â intel_pmu_lbr_init_atom();
> +
> Â Â Â Â Â Â Â Â x86_pmu.event_constraints = intel_gen_event_constraints;
> Â Â Â Â Â Â Â Â pr_cont("Atom events, ");
> Â Â Â Â Â Â Â Â break;
> @@ -872,6 +883,8 @@ static __init int intel_pmu_init(void)
> Â Â Â Â Â Â Â Â memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
> Â Â Â Â Â Â Â Â Â Â Â sizeof(hw_cache_event_ids));
>
> + Â Â Â Â Â Â Â intel_pmu_lbr_init_nhm();
> +
> Â Â Â Â Â Â Â Â x86_pmu.event_constraints = intel_westmere_event_constraints;
> Â Â Â Â Â Â Â Â pr_cont("Westmere events, ");
> Â Â Â Â Â Â Â Â break;
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> @@ -0,0 +1,228 @@
> +#ifdef CONFIG_CPU_SUP_INTEL
> +
> +enum {
> + Â Â Â LBR_FORMAT_32 Â Â Â Â Â = 0x00,
> + Â Â Â LBR_FORMAT_LIP Â Â Â Â Â = 0x01,
> + Â Â Â LBR_FORMAT_EIP Â Â Â Â Â = 0x02,
> + Â Â Â LBR_FORMAT_EIP_FLAGS Â Â = 0x03,
> +};
> +
> +/*
> + * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
> + * otherwise it becomes near impossible to get a reliable stack.
> + */
> +
> +#define X86_DEBUGCTL_LBR Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â (1 << 0)
> +#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI Â Â Â Â Â Â Â Â (1 << 11)
> +
> +static void __intel_pmu_lbr_enable(void)
> +{
> + Â Â Â u64 debugctl;
> +
> + Â Â Â rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> + Â Â Â debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
> + Â Â Â wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> +}
> +
> +static void __intel_pmu_lbr_disable(void)
> +{
> + Â Â Â u64 debugctl;
> +
> + Â Â Â rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> + Â Â Â debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
> + Â Â Â wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> +}
> +
> +static void intel_pmu_lbr_reset_32(void)
> +{
> + Â Â Â int i;
> +
> + Â Â Â for (i = 0; i < x86_pmu.lbr_nr; i++)
> + Â Â Â Â Â Â Â wrmsrl(x86_pmu.lbr_from + i, 0);
> +}
> +
> +static void intel_pmu_lbr_reset_64(void)
> +{
> + Â Â Â int i;
> +
> + Â Â Â for (i = 0; i < x86_pmu.lbr_nr; i++) {
> + Â Â Â Â Â Â Â wrmsrl(x86_pmu.lbr_from + i, 0);
> + Â Â Â Â Â Â Â wrmsrl(x86_pmu.lbr_to Â + i, 0);
> + Â Â Â }
> +}
> +
> +static void intel_pmu_lbr_reset(void)
> +{
> + Â Â Â if (x86_pmu.lbr_format == LBR_FORMAT_32)
> + Â Â Â Â Â Â Â intel_pmu_lbr_reset_32();
> + Â Â Â else
> + Â Â Â Â Â Â Â intel_pmu_lbr_reset_64();
> +}
> +
> +static void intel_pmu_lbr_enable(struct perf_event *event)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (!x86_pmu.lbr_nr)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â WARN_ON(cpuc->enabled);
> +
> + Â Â Â /*
> + Â Â Â Â * Reset the LBR stack if this is the first LBR user or
> + Â Â Â Â * we changed task context so as to avoid data leaks.
> + Â Â Â Â */
> +
> + Â Â Â if (!cpuc->lbr_users ||
> + Â Â Â Â Â (event->ctx->task && cpuc->lbr_context != event->ctx)) {
> + Â Â Â Â Â Â Â intel_pmu_lbr_reset();
> + Â Â Â Â Â Â Â cpuc->lbr_context = event->ctx;
> + Â Â Â }
> +
> + Â Â Â cpuc->lbr_users++;
> +}
> +
> +static void intel_pmu_lbr_disable(struct perf_event *event)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (!x86_pmu.lbr_nr)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â cpuc->lbr_users--;
> +
> + Â Â Â BUG_ON(cpuc->lbr_users < 0);
> + Â Â Â WARN_ON(cpuc->enabled);
> +}
> +
> +static void intel_pmu_lbr_enable_all(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (cpuc->lbr_users)
> + Â Â Â Â Â Â Â __intel_pmu_lbr_enable();
> +}
> +
> +static void intel_pmu_lbr_disable_all(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (cpuc->lbr_users)
> + Â Â Â Â Â Â Â __intel_pmu_lbr_disable();
> +}
> +
> +static inline u64 intel_pmu_lbr_tos(void)
> +{
> + Â Â Â u64 tos;
> +
> + Â Â Â rdmsrl(x86_pmu.lbr_tos, tos);
> +
> + Â Â Â return tos;
> +}
> +
> +static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
> +{
> + Â Â Â unsigned long mask = x86_pmu.lbr_nr - 1;
> + Â Â Â u64 tos = intel_pmu_lbr_tos();
> + Â Â Â int i;
> +
> + Â Â Â for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
> + Â Â Â Â Â Â Â unsigned long lbr_idx = (tos - i) & mask;
> + Â Â Â Â Â Â Â union {
> + Â Â Â Â Â Â Â Â Â Â Â struct {
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â u32 from;
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â u32 to;
> + Â Â Â Â Â Â Â Â Â Â Â };
> + Â Â Â Â Â Â Â Â Â Â Â u64 Â Â lbr;
> + Â Â Â Â Â Â Â } msr_lastbranch;
> +
> + Â Â Â Â Â Â Â rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
> +
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].from Â = msr_lastbranch.from;
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].to Â Â = msr_lastbranch.to;
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].flags = 0;
> + Â Â Â }
> + Â Â Â cpuc->lbr_entries = i;
> +}
> +
> +#define LBR_FROM_FLAG_MISPRED Â (1ULL << 63)
> +
> +/*
> + * Due to lack of segmentation in Linux the effective address (offset)
> + * is the same as the linear address, allowing us to merge the LIP and EIP
> + * LBR formats.
> + */
> +static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
> +{
> + Â Â Â unsigned long mask = x86_pmu.lbr_nr - 1;
> + Â Â Â u64 tos = intel_pmu_lbr_tos();
> + Â Â Â int i;
> +
> + Â Â Â for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
> + Â Â Â Â Â Â Â unsigned long lbr_idx = (tos - i) & mask;
> + Â Â Â Â Â Â Â u64 from, to, flags = 0;
> +
> + Â Â Â Â Â Â Â rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
> + Â Â Â Â Â Â Â rdmsrl(x86_pmu.lbr_to Â + lbr_idx, to);
> +
> + Â Â Â Â Â Â Â if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) {
> + Â Â Â Â Â Â Â Â Â Â Â flags = !!(from & LBR_FROM_FLAG_MISPRED);
> + Â Â Â Â Â Â Â Â Â Â Â from = (u64)((((s64)from) << 1) >> 1);
> + Â Â Â Â Â Â Â }
> +
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].from Â = from;
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].to Â Â = to;
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].flags = flags;
> + Â Â Â }
> + Â Â Â cpuc->lbr_entries = i;
> +}
> +
> +static void intel_pmu_lbr_read(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (!cpuc->lbr_users)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â if (x86_pmu.lbr_format == LBR_FORMAT_32)
> + Â Â Â Â Â Â Â intel_pmu_lbr_read_32(cpuc);
> + Â Â Â else
> + Â Â Â Â Â Â Â intel_pmu_lbr_read_64(cpuc);
> +}
> +
> +static int intel_pmu_lbr_format(void)
> +{
> + Â Â Â u64 capabilities;
> +
> + Â Â Â rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
> + Â Â Â return capabilities & 0x1f;
> +}
> +
> +static void intel_pmu_lbr_init_core(void)
> +{
> + Â Â Â x86_pmu.lbr_format = intel_pmu_lbr_format();
> + Â Â Â x86_pmu.lbr_nr Â Â = 4;
> + Â Â Â x86_pmu.lbr_tos Â Â = 0x01c9;
> + Â Â Â x86_pmu.lbr_from Â = 0x40;
> + Â Â Â x86_pmu.lbr_to Â Â = 0x60;
> +}
> +
> +static void intel_pmu_lbr_init_nhm(void)
> +{
> + Â Â Â x86_pmu.lbr_format = intel_pmu_lbr_format();
> + Â Â Â x86_pmu.lbr_nr Â Â = 16;
> + Â Â Â x86_pmu.lbr_tos Â Â = 0x01c9;
> + Â Â Â x86_pmu.lbr_from Â = 0x680;
> + Â Â Â x86_pmu.lbr_to Â Â = 0x6c0;
> +}
> +
> +static void intel_pmu_lbr_init_atom(void)
> +{
> + Â Â Â x86_pmu.lbr_format = intel_pmu_lbr_format();
> + Â Â Â x86_pmu.lbr_nr Â Â = 8;
> + Â Â Â x86_pmu.lbr_tos Â Â = 0x01c9;
> + Â Â Â x86_pmu.lbr_from Â = 0x40;
> + Â Â Â x86_pmu.lbr_to Â Â = 0x60;
> +}
> +
> +#endif /* CONFIG_CPU_SUP_INTEL */
>
> --
>
>

--
Stephane EranianÂ | EMEA Software Engineering
Google France | 38 avenue de l'OpÃ©ra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks

From: Peter Zijlstra on 4 Mar 2010 04:00

On Wed, 2010-03-03 at 22:57 +0100, Stephane Eranian wrote:
> I don't understand how LBR state is migrated when a per-thread event is moved
> from one CPU to another. It seems LBR is managed per-cpu.
>
> Can you explain this to me?

It is not, its basically impossible to do given that the TOS doesn't
count more bits than is strictly needed.

Or we should stop supporting cpu and task users at the same time.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Peter Zijlstra on 4 Mar 2010 04:00

On Wed, 2010-03-03 at 22:52 +0100, Stephane Eranian wrote:
> On Wed, Mar 3, 2010 at 5:39 PM, Peter Zijlstra <a.p.zijlstra(a)chello.nl> wrote:
> > Implement support for Intel LBR stacks that support
> > FREEZE_LBRS_ON_PMI. We do not (yet?) support the LBR config register
> > because that is SMT wide and would also put undue restraints on the
> > PEBS users.
> >
> You're saying PEBS users have priorities over pure LBR users?
> Why is that?

I say no such thing, I only say it would make scheduling the PEBS things
more interesting.

> Without coding this, how would you expose LBR configuration to userland
> given you're using the PERF_SAMPLE_BRANCH_STACK approach?

Possibly using a second config word in the attr, but given how sucky the
hardware currently is (sharing the config between SMT) I'd be inclined
to pretend it doesn't exist for the moment.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Stephane Eranian on 4 Mar 2010 13:00

On Thu, Mar 4, 2010 at 12:58 AM, Peter Zijlstra <peterz(a)infradead.org> wrote:
> On Wed, 2010-03-03 at 22:57 +0100, Stephane Eranian wrote:
>> I don't understand how LBR state is migrated when a per-thread event is moved
>> from one CPU to another. It seems LBR is managed per-cpu.
>>
>> Can you explain this to me?
>
> It is not, its basically impossible to do given that the TOS doesn't
> count more bits than is strictly needed.
>
I don't get that about the TOS.

So you are saying that one context switch out, you drop the current
content of LBR. When you are scheduled back in on an another CPU,
you grab whatever is there?

> Or we should stop supporting cpu and task users at the same time.
>
Or you should consider LBR as an event which has a constraint that
it can only run on one pseudo counter (similar to what you do with
BTS). Scheduling would take care of the mutual exclusion. Multiplexing
would provide the work-around.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

| Next | Last
Pages: 1 2
Prev: Selinux: Remove unused headers skbuff.h in selinux/nlmsgtab.c
Next: epoll clarification sought: multithreaded epoll_wait for UDP sockets?