From: Eric Dumazet on
Arjan van de Ven a �crit :
> On Tue, 29 Sep 2009 14:56:28 -0700 (PDT)
> Linus Torvalds <torvalds(a)linux-foundation.org> wrote:
>
>>
>> On Tue, 29 Sep 2009, Arjan van de Ven wrote:
>>> can't we use alternatives() for this, to patch cmpxchg64 in ?
>>> I mean.. it'll be commonly supported nowadays.. the fallback to it
>>> not being supported could be a bit slower by now...
>> Yes, we could. It would limit us to some fixed address format,
>> probably
>>
>> cmpxchg8b (%esi)
>>
>> or something. Use something like this as a starting point, perhaps?
>>
>> NOTE! Totally untested! And you'd actually need to implement the
>> "cmpxchg8b_emu" function that takes it's arguments in %eax:%edx,
>> %ebx:%ecx and %esi and doesn't trash any other registers..
>
> so I debugged this guy (had a few bugs ;-)
>
> patch, including a new cmpxchg8b_emu below:
>
> From 5a76986c5dd272ea16a9b8abb7349ff3d6791c2b Mon Sep 17 00:00:00 2001
> From: Arjan van de Ven <arjan(a)linux.intel.com>
> Date: Wed, 30 Sep 2009 17:04:35 +0200
> Subject: [PATCH] x86: Provide an alternative() based cmpxchg64()
>
> Based on Linus' patch, this patch provides cmpxchg64() using
> the alternative() infrastructure.
>
> Note: the fallback is NOT smp safe, just like the current fallback
> is not SMP safe.
>
> Signed-off-by: Arjan van de Ven <arjan(a)linux.intel.com>
> ---
> arch/x86/include/asm/cmpxchg_32.h | 29 ++++++++++--------
> arch/x86/kernel/i386_ksyms_32.c | 3 ++
> arch/x86/lib/Makefile | 2 +-
> arch/x86/lib/cmpxchg8b_emu.S | 61 +++++++++++++++++++++++++++++++++++++
> 4 files changed, 81 insertions(+), 14 deletions(-)
> create mode 100644 arch/x86/lib/cmpxchg8b_emu.S
>
> diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
> index 82ceb78..3b21afa 100644
> --- a/arch/x86/include/asm/cmpxchg_32.h
> +++ b/arch/x86/include/asm/cmpxchg_32.h
> @@ -312,19 +312,22 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
>
> extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
>
> -#define cmpxchg64(ptr, o, n) \
> -({ \
> - __typeof__(*(ptr)) __ret; \
> - if (likely(boot_cpu_data.x86 > 4)) \
> - __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \
> - (unsigned long long)(o), \
> - (unsigned long long)(n)); \
> - else \
> - __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \
> - (unsigned long long)(o), \
> - (unsigned long long)(n)); \
> - __ret; \
> -})
> +#define cmpxchg64(ptr, o, n) \
> +({ \
> + __typeof__(*(ptr)) __ret; \
> + __typeof__(*(ptr)) __old = (o); \
> + __typeof__(*(ptr)) __new = (n); \
> + alternative_io("call cmpxchg8b_emu", \
> + "lock; cmpxchg8b (%%esi)" , \
> + X86_FEATURE_CX8, \
> + "=A" (__ret), \
> + "S" ((ptr)), "0" (__old), \
> + "b" ((unsigned int)__new), \
> + "c" ((unsigned int)(__new>>32))); \
> + __ret; })
> +
> +
> +
> #define cmpxchg64_local(ptr, o, n) \
> ({ \
> __typeof__(*(ptr)) __ret; \
> diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
> index 43cec6b..f17930e 100644
> --- a/arch/x86/kernel/i386_ksyms_32.c
> +++ b/arch/x86/kernel/i386_ksyms_32.c
> @@ -10,6 +10,9 @@
> EXPORT_SYMBOL(mcount);
> #endif
>
> +extern void cmpxchg8b_emu(void); /* dummy proto */
> +EXPORT_SYMBOL(cmpxchg8b_emu);
> +
> /* Networking helper routines. */
> EXPORT_SYMBOL(csum_partial_copy_generic);
>
> diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
> index 9e60920..3e549b8 100644
> --- a/arch/x86/lib/Makefile
> +++ b/arch/x86/lib/Makefile
> @@ -15,7 +15,7 @@ ifeq ($(CONFIG_X86_32),y)
> obj-y += atomic64_32.o
> lib-y += checksum_32.o
> lib-y += strstr_32.o
> - lib-y += semaphore_32.o string_32.o
> + lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o
>
> lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
> else
> diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
> new file mode 100644
> index 0000000..b8af4c7
> --- /dev/null
> +++ b/arch/x86/lib/cmpxchg8b_emu.S
> @@ -0,0 +1,61 @@
> +/*
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; version 2
> + * of the License.
> + *
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/alternative-asm.h>
> +#include <asm/frame.h>
> +#include <asm/dwarf2.h>
> +
> +
> +.text
> +
> +/*
> + * Inputs:
> + * %esi : memory location to compare
> + * %eax : low 32 bits of old value
> + * %edx : high 32 bits of old value
> + * %ebx : low 32 bits of new value
> + * %ecx : high 32 bits of new value
> + */
> +ENTRY(cmpxchg8b_emu)
> + CFI_STARTPROC
> +
> + push %edi
> + push %ebx
> + push %ecx
> + /* disable interrupts */
> + pushf

> + pop %edi
Why do you pop flags in edi, to later re-push them ?

> + cli
> +
> + cmpl %edx, 4(%esi)
> + jne 1f
> + cmpl %eax, (%esi)
> + jne 1f
> +

So we dont have irq fro this cpu, ok.

And other cpus allowed, and xchg implies lock prefix, ok.


> + xchg (%esi), %ebx
> + xchg 4(%esi), %ecx
How this sequence is guaranteed to be atomic with other cpus ?

If it is a !SMP implementation, then you could replace xchg by mov instructions.

mov %ebx,(%esi)
mov %ecx,4(%esi)

> + mov %ebx, %eax
> + mov %ecx, %edx
and avoid these of course


> +
> +2:
> + /* restore interrupts */
> + push %edi
> + popf
> +
> + pop %ecx
> + pop %ebx
> + pop %edi
> + ret
> +1:
> + mov (%esi), %eax
> + mov 4(%esi), %edx
> + jmp 2b
> + CFI_ENDPROC
> +ENDPROC(cmpxchg8b_emu)
> +


So I suggest :


ENTRY(cmpxchg8b_emu)
CFI_STARTPROC

/* disable interrupts */
pushf
cli

cmpl %eax,(%esi)
jne 1f
cmpl %edx,4(%esi)
jne 2f

mov %ebx,(%esi)
mov %ecx,4(%esi)

/* restore interrupts */
popf
ret
1:
mov (%esi), %eax
2:
mov 4(%esi), %edx
popf
ret
CFI_ENDPROC
ENDPROC(cmpxchg8b_emu)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Arjan van de Ven on
On Wed, 30 Sep 2009 17:27:05 +0200
Eric Dumazet <eric.dumazet(a)gmail.com> wrote:
>
> > + pop %edi
> Why do you pop flags in edi, to later re-push them ?
>
> > + cli

because here I disable interrupts

(basically this is local_irq_save() )
>
>
> > + xchg (%esi), %ebx
> > + xchg 4(%esi), %ecx
> How this sequence is guaranteed to be atomic with other cpus ?

it is not. this is the 486 implementation which is !SMP
(just like the current cmpxchg64() fallback)

>
> If it is a !SMP implementation, then you could replace xchg by mov
> instructions.

that is not equivalent. I need to also store the old values
and return them....


>
> So I suggest :
>
>
> ENTRY(cmpxchg8b_emu)
> CFI_STARTPROC
>
> /* disable interrupts */
> pushf
> cli
>
> cmpl %eax,(%esi)
> jne 1f
> cmpl %edx,4(%esi)
> jne 2f
>
> mov %ebx,(%esi)
> mov %ecx,4(%esi)

this is not equivalent since you don't return the "prev" value.



--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Eric Dumazet on
Arjan van de Ven a �crit :
> On Tue, 29 Sep 2009 14:56:28 -0700 (PDT)
> Linus Torvalds <torvalds(a)linux-foundation.org> wrote:
>
>>
>> On Tue, 29 Sep 2009, Arjan van de Ven wrote:
>>> can't we use alternatives() for this, to patch cmpxchg64 in ?
>>> I mean.. it'll be commonly supported nowadays.. the fallback to it
>>> not being supported could be a bit slower by now...
>> Yes, we could. It would limit us to some fixed address format,
>> probably
>>
>> cmpxchg8b (%esi)
>>
>> or something. Use something like this as a starting point, perhaps?
>>
>> NOTE! Totally untested! And you'd actually need to implement the
>> "cmpxchg8b_emu" function that takes it's arguments in %eax:%edx,
>> %ebx:%ecx and %esi and doesn't trash any other registers..
>
> so I debugged this guy (had a few bugs ;-)
>
> patch, including a new cmpxchg8b_emu below:
>
>>From 5a76986c5dd272ea16a9b8abb7349ff3d6791c2b Mon Sep 17 00:00:00 2001
> From: Arjan van de Ven <arjan(a)linux.intel.com>
> Date: Wed, 30 Sep 2009 17:04:35 +0200
> Subject: [PATCH] x86: Provide an alternative() based cmpxchg64()
>
> Based on Linus' patch, this patch provides cmpxchg64() using
> the alternative() infrastructure.
>
> Note: the fallback is NOT smp safe, just like the current fallback
> is not SMP safe.
>
> Signed-off-by: Arjan van de Ven <arjan(a)linux.intel.com>

> +#define cmpxchg64(ptr, o, n) \
> +({ \
> + __typeof__(*(ptr)) __ret; \
> + __typeof__(*(ptr)) __old = (o); \
> + __typeof__(*(ptr)) __new = (n); \
> + alternative_io("call cmpxchg8b_emu", \
> + "lock; cmpxchg8b (%%esi)" , \
> + X86_FEATURE_CX8, \
> + "=A" (__ret), \
> + "S" ((ptr)), "0" (__old), \
> + "b" ((unsigned int)__new), \
> + "c" ((unsigned int)(__new>>32))); \


Note:

lock; cmpxchg8b (%%esi)

gives 4 bytes opcode : f0 0f c7 0e
Because alternative (call cmpxchg8b_emu) uses 5 bytes, a nop will be added.

Choosing ".byte 0xf0, 0x0f, 0xc7, 0x4e, 0x00" aka "lock cmpxchg8b 0x0(%esi)" is a litle bit better ?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Linus Torvalds on


On Wed, 30 Sep 2009, Eric Dumazet wrote:
>
> lock; cmpxchg8b (%%esi)
>
> gives 4 bytes opcode : f0 0f c7 0e
> Because alternative (call cmpxchg8b_emu) uses 5 bytes, a nop will be added.
>
> Choosing ".byte 0xf0, 0x0f, 0xc7, 0x4e, 0x00" aka "lock cmpxchg8b 0x0(%esi)" is a litle bit better ?

And if you want to be really clever, you would want to handle the non-SMP
case too, where you have just "cmpxchgb (%%esi)" (three bytes) without the
lock prefix.

However, at this point I think Arjan's patch is already way superior to
what we have now (feel free to take a look at what we generate on 32-bit
without PAE today - just have a barf-bag handy), so all I'd really want is
a few "tested-by"s to make me feel happier about it, and a few more people
looking at the emulation routine to all say "ok, looks sane, ACK".

And at that point we can then either make "cmpxchg()" just do the 8-byte
case natively, or just take your patch to change sched_clock.c to now use
the no-longer-entirely-disgusting cmpxchg64().

Ingo - I suspect both those patches should just go through you. You do
both x86 and scheduler, so I'll happily pull the end result.

Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Cyrill Gorcunov on
[Eric Dumazet - Wed, Sep 30, 2009 at 05:57:25PM +0200]
....
|
| > +#define cmpxchg64(ptr, o, n) \
| > +({ \
| > + __typeof__(*(ptr)) __ret; \
| > + __typeof__(*(ptr)) __old = (o); \
| > + __typeof__(*(ptr)) __new = (n); \
| > + alternative_io("call cmpxchg8b_emu", \
| > + "lock; cmpxchg8b (%%esi)" , \
| > + X86_FEATURE_CX8, \
| > + "=A" (__ret), \
| > + "S" ((ptr)), "0" (__old), \
| > + "b" ((unsigned int)__new), \
| > + "c" ((unsigned int)(__new>>32))); \
|
|
| Note:
|
| lock; cmpxchg8b (%%esi)
|
| gives 4 bytes opcode : f0 0f c7 0e
| Because alternative (call cmpxchg8b_emu) uses 5 bytes, a nop will be added.
|
| Choosing ".byte 0xf0, 0x0f, 0xc7, 0x4e, 0x00" aka "lock cmpxchg8b 0x0(%esi)" is a litle bit better ?
|

Just curious why not "nop; lock; cmpxchg8b (%esi)"? lock itself is destructive
instruction with causes write buffers to flush data back and NOP itself will
be discarded by cpu internals so I suppose this form should be better. Though
I could miss something, and OTOH it's not a big deal. But still curious :)

-- Cyrill
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/