From: Peter Zijlstra on
On Mon, 2010-02-01 at 14:50 +0200, Stephane Eranian wrote:
> We cannot assume that because hwc->idx == assign[i], we
> can avoid reprogramming the counter in hw_perf_enable().
>
> The event may have been scheduled out and another event
> may have been programmed into this counter. Thus, we need
> a more robust way of verifying if the counter still
> contains config/data related to an event.
>
> This patch adds a generation number to each counter on each
> cpu. Using this mechanism we can verify reliabilty whether the
> content of a counter corresponds to an event.
>
> Signed-off-by: Stephane Eranian <eranian(a)google.com>

Thanks, got it.

btw, I've also added the below, from what I can make from the docs fixed
counter 2 is identical to arch perf event 0x013c, as per table A-1 and
A-7. Both are called CPU_CLK_UNHALTED.REF, except for Core2, where
0x013c is called CPU_CLK_UNHALTED.BUS.

---
Subject: perf_events, x86: Fixup fixed counter constraints
From: Peter Zijlstra <a.p.zijlstra(a)chello.nl>
Date: Mon Feb 01 15:36:30 CET 2010

Patch 1da53e0230 ("perf_events, x86: Improve x86 event scheduling")
lost us one of the fixed purpose counters and then ed8777fc13
("perf_events, x86: Fix event constraint masks") broke it even
further.

Widen the fixed event mask to event+umask and specify the full config
for each of the 3 fixed purpose counters. Then let the init code fill
out the placement for the GP regs based on the cpuid info.

Signed-off-by: Peter Zijlstra <a.p.zijlstra(a)chello.nl>
LKML-Reference: <new-submission>
---
arch/x86/include/asm/perf_event.h | 2 +-
arch/x86/kernel/cpu/perf_event.c | 38 ++++++++++++++++++++++++++++++--------
2 files changed, 31 insertions(+), 9 deletions(-)

Index: linux-2.6/arch/x86/include/asm/perf_event.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/perf_event.h
+++ linux-2.6/arch/x86/include/asm/perf_event.h
@@ -50,7 +50,7 @@
INTEL_ARCH_INV_MASK| \
INTEL_ARCH_EDGE_MASK|\
INTEL_ARCH_UNIT_MASK|\
- INTEL_ARCH_EVTSEL_MASK)
+ INTEL_ARCH_EVENT_MASK)

#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -243,8 +243,18 @@ static struct event_constraint intel_cor

static struct event_constraint intel_core2_event_constraints[] =
{
- FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
- FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
+ /*
+ * FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34), CPU_CLK_UNHALTED.REF
+ *
+ * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
+ * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
+ * ratio between these counters.
+ *
+ * TODO: find/measure the fixed ratio and apply it so that we can
+ * enable this fixed purpose counter in a transparent way.
+ */
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -259,8 +269,9 @@ static struct event_constraint intel_cor

static struct event_constraint intel_nehalem_event_constraints[] =
{
- FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
- FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -274,8 +285,9 @@ static struct event_constraint intel_neh

static struct event_constraint intel_westmere_event_constraints[] =
{
- FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
- FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -284,8 +296,9 @@ static struct event_constraint intel_wes

static struct event_constraint intel_gen_event_constraints[] =
{
- FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
- FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34), /* CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};

@@ -2602,6 +2615,7 @@ static void __init pmu_check_apic(void)

void __init init_hw_perf_events(void)
{
+ struct event_constraint *c;
int err;

pr_info("Performance Events: ");
@@ -2650,6 +2664,14 @@ void __init init_hw_perf_events(void)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
0, x86_pmu.num_events);

+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if (c->cmask != INTEL_ARCH_FIXED_MASK)
+ continue;
+
+ c->idxmsk64[0] |= (1ULL << x86_pmu.num_events) - 1;
+ c->weight += x86_pmu.num_events;
+ }
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.event_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_events);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Peter Zijlstra on
On Mon, 2010-02-01 at 16:35 +0100, Peter Zijlstra wrote:
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -243,8 +243,18 @@ static struct event_constraint intel_cor
>
> static struct event_constraint intel_core2_event_constraints[] =
> {
> - FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
> - FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
> + FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
> + FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
> + /*
> + * FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34), CPU_CLK_UNHALTED.REF
> + *
> + * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
> + * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
> + * ratio between these counters.
> + *
> + * TODO: find/measure the fixed ratio and apply it so that we can
> + * enable this fixed purpose counter in a transparent way.
> + */
> INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
> INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
> INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */

>From what I can measure on the available Core2 systems this ratio is
exactly 1, which would be consistent with the Nehalem and Westmere
tables calling this event .REF

Stephane, have you ever observed this ratio to be anything other than 1?

If not, I think we can simply stick this counter back in and not worry
about it.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Stephane Eranian on
On Mon, Feb 1, 2010 at 4:35 PM, Peter Zijlstra <peterz(a)infradead.org> wrote:
> On Mon, 2010-02-01 at 14:50 +0200, Stephane Eranian wrote:
>>       We cannot assume that because hwc->idx == assign[i], we
>>       can avoid reprogramming the counter in hw_perf_enable().
>>
>>       The event may have been scheduled out and another event
>>       may have been programmed into this counter. Thus, we need
>>       a more robust way of verifying if the counter still
>>       contains config/data related to an event.
>>
>>       This patch adds a generation number to each counter on each
>>       cpu. Using this mechanism we can verify reliabilty whether the
>>       content of a counter corresponds to an event.
>>
>>       Signed-off-by: Stephane Eranian <eranian(a)google.com>
>
> Thanks, got it.
>
> btw, I've also added the below, from what I can make from the docs fixed
> counter 2 is identical to arch perf event 0x013c, as per table A-1 and
> A-7. Both are called CPU_CLK_UNHALTED.REF, except for Core2, where
> 0x013c is called CPU_CLK_UNHALTED.BUS.
>

If you measure 0x013c in a generic counter or in fixed counter 2
it will count the same thing but not at the same rate.
This is true on Core2, Atom, Nehalem, Westmere. The ratio is the
clock/bus ratio.

This goes back to an earlier discussion where I was asking about
the meaning of the generic PMU events and in particular
PERF_COUNT_HW_CPU_CYCLES. Which of the 3 distinct cycle
events (unhalted_core_cycles, unhalted_reference_cycles, bus_cycles)
does not correspond to?



> ---
> Subject: perf_events, x86: Fixup fixed counter constraints
> From: Peter Zijlstra <a.p.zijlstra(a)chello.nl>
> Date: Mon Feb 01 15:36:30 CET 2010
>
> Patch 1da53e0230 ("perf_events, x86: Improve x86 event scheduling")
> lost us one of the fixed purpose counters and then ed8777fc13
> ("perf_events, x86: Fix event constraint masks") broke it even
> further.
>
> Widen the fixed event mask to event+umask and specify the full config
> for each of the 3 fixed purpose counters. Then let the init code fill
> out the placement for the GP regs based on the cpuid info.
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra(a)chello.nl>
> LKML-Reference: <new-submission>
> ---
>  arch/x86/include/asm/perf_event.h |    2 +-
>  arch/x86/kernel/cpu/perf_event.c  |   38 ++++++++++++++++++++++++++++++--------
>  2 files changed, 31 insertions(+), 9 deletions(-)
>
> Index: linux-2.6/arch/x86/include/asm/perf_event.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/perf_event.h
> +++ linux-2.6/arch/x86/include/asm/perf_event.h
> @@ -50,7 +50,7 @@
>         INTEL_ARCH_INV_MASK| \
>         INTEL_ARCH_EDGE_MASK|\
>         INTEL_ARCH_UNIT_MASK|\
> -        INTEL_ARCH_EVTSEL_MASK)
> +        INTEL_ARCH_EVENT_MASK)
>
>  #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL                0x3c
>  #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK                (0x00 << 8)
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -243,8 +243,18 @@ static struct event_constraint intel_cor
>
>  static struct event_constraint intel_core2_event_constraints[] =
>  {
> -       FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
> -       FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
> +       FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
> +       FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
> +       /*
> +        * FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34),  CPU_CLK_UNHALTED.REF
> +        *
> +        * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
> +        * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
> +        * ratio between these counters.
> +        *
> +        * TODO: find/measure the fixed ratio and apply it so that we can
> +        * enable this fixed purpose counter in a transparent way.
> +        */
>        INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
>        INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
>        INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
> @@ -259,8 +269,9 @@ static struct event_constraint intel_cor
>
>  static struct event_constraint intel_nehalem_event_constraints[] =
>  {
> -       FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
> -       FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
> +       FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
> +       FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
> +       FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34), /* CPU_CLK_UNHALTED.REF */
>        INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
>        INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
>        INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
> @@ -274,8 +285,9 @@ static struct event_constraint intel_neh
>
>  static struct event_constraint intel_westmere_event_constraints[] =
>  {
> -       FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
> -       FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
> +       FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
> +       FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
> +       FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34), /* CPU_CLK_UNHALTED.REF */
>        INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
>        INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
>        INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
> @@ -284,8 +296,9 @@ static struct event_constraint intel_wes
>
>  static struct event_constraint intel_gen_event_constraints[] =
>  {
> -       FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
> -       FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
> +       FIXED_EVENT_CONSTRAINT(0x00c0, 1ULL << 32), /* INST_RETIRED.ANY */
> +       FIXED_EVENT_CONSTRAINT(0x003c, 1ULL << 33), /* CPU_CLK_UNHALTED.CORE */
> +       FIXED_EVENT_CONSTRAINT(0x013c, 1ULL << 34), /* CPU_CLK_UNHALTED.REF */
>        EVENT_CONSTRAINT_END
>  };
>
> @@ -2602,6 +2615,7 @@ static void __init pmu_check_apic(void)
>
>  void __init init_hw_perf_events(void)
>  {
> +       struct event_constraint *c;
>        int err;
>
>        pr_info("Performance Events: ");
> @@ -2650,6 +2664,14 @@ void __init init_hw_perf_events(void)
>                __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
>                                   0, x86_pmu.num_events);
>
> +       for_each_event_constraint(c, x86_pmu.event_constraints) {
> +               if (c->cmask != INTEL_ARCH_FIXED_MASK)
> +                       continue;
> +
> +               c->idxmsk64[0] |= (1ULL << x86_pmu.num_events) - 1;
> +               c->weight += x86_pmu.num_events;
> +       }
> +
>        pr_info("... version:                %d\n",     x86_pmu.version);
>        pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
>        pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
>
>
>



--
Stephane Eranian | EMEA Software Engineering
Google France | 38 avenue de l'Opéra | 75002 Paris
Tel : +33 (0) 1 42 68 53 00
This email may be confidential or privileged. If you received this
communication by mistake, please
don't forward it to anyone else, please erase all copies and
attachments, and please let me know that
it went to the wrong person. Thanks
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Peter Zijlstra on
On Mon, 2010-02-01 at 17:14 +0100, Stephane Eranian wrote:

> Using perfmon on Core 2 on a 10s noploop:
>
> pfmon -eunhalted_reference_cycles,unhalted_core_cycles,cpu_clk_unhalted:bus
> noploop 10
> noploop for 10 seconds
> 23869090125 UNHALTED_REFERENCE_CYCLES
> 23849336873 UNHALTED_CORE_CYCLES
> 2652122099 CPU_CLK_UNHALTED:BUS

Weird, I used:

while :; do :; done &
while :; do :; done &
while :; do :; done &
while :; do :; done &
perf stat -a -e r013c -e r013c sleep 4
killall bash

Which gives:

Performance counter stats for 'sleep 4':

244235699509090 raw 0x13c
244235695558036 raw 0x13c

4.005485333 seconds time elapsed

And verified it used fixed counter 2 and general purpose counter 0 using
sysrq-p.

[523417.108402] CPU#0: gen-PMC0 ctrl: 000000000053013c
[523417.108403] CPU#0: gen-PMC0 count: 000000ff80019948
[523417.108405] CPU#0: gen-PMC0 left: 000000007fffffff
[523417.108407] CPU#0: gen-PMC1 ctrl: 0000000000000000
[523417.108409] CPU#0: gen-PMC1 count: 0000000000000000
[523417.108411] CPU#0: gen-PMC1 left: 000000007fffb8a8
[523417.108412] CPU#0: fixed-PMC0 count: 0000000000000000
[523417.108414] CPU#0: fixed-PMC1 count: 0000000000000000
[523417.108416] CPU#0: fixed-PMC2 count: 0000010db1db2117

Using -linus, since that doesn't have any of the recent constraint
patches in that would avoid us from using fixed-PMC2.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Peter Zijlstra on
On Mon, 2010-02-01 at 17:12 +0100, Stephane Eranian wrote:
> > btw, I've also added the below, from what I can make from the docs
> fixed
> > counter 2 is identical to arch perf event 0x013c, as per table A-1
> and
> > A-7. Both are called CPU_CLK_UNHALTED.REF, except for Core2, where
> > 0x013c is called CPU_CLK_UNHALTED.BUS.
> >
>
> If you measure 0x013c in a generic counter or in fixed counter 2
> it will count the same thing but not at the same rate.
> This is true on Core2, Atom, Nehalem, Westmere. The ratio is the
> clock/bus ratio.

But for Nehalem and Westmere event 0x3c umask 0x01 is referred to as
CPU_CLK_UNHALTED.REF_P (Tables A-2 and A-4),

Fixed Counter 2 is referred to as CPU_CLK_UNHALTED.REF (Table A-7).

For Core2 and Atom (Table A-8, A-9) it is called CPU_CLK_UNHALTED.BUS,
for these entries there is talk about a fixed ratio.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/