From: Cliff Wickman on

kdump is failing on an SGI UV system because it depends on
/sys/devices/system/cpu/cpuN/crash_notes.
And these files contain bad addresses for cpus beyond cpu 0.

This occurs using 2.6.35-rc3 code. But the same problem looks
present in 2.6.33.

The problem traces to per_cpu_ptr_to_phys() -> pcpu_addr_to_page() ->
vmalloc_to_page() for per-cpu addresses not in the first per-cpu 'chunk',
but not in the VMALLOC_START/VMALLOC_END range.

I wonder why this shows up on UV but not other x86_64's?
I've included a patch that solves this for me. But I defer to the
authors for a proper solution.

This is where per_cpu_ptr_to_phys() is called for this /sys file:
static ssize_t show_crash_notes(struct sys_device *dev, struct ...
....
addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum));
rc = sprintf(buf, "%Lx\n", addr);
return rc;
}

The problem, without the below patch: (but a couple of printk's)

uv3-sys:/tmp/cpw # cat /sys/devices/system/cpu/cpu0/crash_notes
1c1b040
uv3-sys:/tmp/cpw # cat /sys/devices/system/cpu/cpu1/crash_notes
db74000000000000
uv3-sys:/tmp/cpw # cat /sys/devices/system/cpu/cpu2/crash_notes
db74000000000000
uv3-sys:/tmp/cpw # dmesg | tail -n 6
[ 133.883009] cpw: per_cpu_ptr_to_phys addr ffff880001c1b040
[ 133.883012] cpw: per_cpu_ptr_to_phys returning 0x1c1b040
[ 136.910178] cpw: per_cpu_ptr_to_phys addr ffff880001c3b040
[ 136.910181] cpw: per_cpu_ptr_to_phys returning 0xdb74000000000000
[ 140.304825] cpw: per_cpu_ptr_to_phys addr ffff880001c5b040
[ 140.304828] cpw: per_cpu_ptr_to_phys returning 0xdb74000000000000


With the below patch: (plus a couple of printk's)

uv3-sys: # cat /sys/devices/system/cpu/cpu0/crash_notes
1c1b040
uv3-sys: # cat /sys/devices/system/cpu/cpu1/crash_notes
1c3b040
uv3-sys: # cat /sys/devices/system/cpu/cpu2/crash_notes
1c5b040
uv3-sys: # dmesg | tail -n 6
[ 130.411358] cpw: per_cpu_ptr_to_phys addr ffff880001c1b040
[ 130.411361] cpw: per_cpu_ptr_to_phys returning 0x1c1b040
[ 135.420702] cpw: per_cpu_ptr_to_phys addr ffff880001c3b040
[ 135.420705] cpw: per_cpu_ptr_to_phys returning 0x1c3b040
[ 139.514014] cpw: per_cpu_ptr_to_phys addr ffff880001c5b040
[ 139.514016] cpw: per_cpu_ptr_to_phys returning 0x1c5b040

Diffed against 2.6.35-rc5

Signed-off-by: Cliff Wickman <cpw(a)sgi.com>
---
mm/percpu.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)

Index: linux-2.6.35-rc3/mm/percpu.c
===================================================================
--- linux-2.6.35-rc3.orig/mm/percpu.c
+++ linux-2.6.35-rc3/mm/percpu.c
@@ -978,12 +978,11 @@ bool is_kernel_percpu_address(unsigned l
*/
phys_addr_t per_cpu_ptr_to_phys(void *addr)
{
+ if ((unsigned long)addr < VMALLOC_START ||
+ (unsigned long)addr >= VMALLOC_END)
+ return __pa(addr);
if (pcpu_addr_in_first_chunk(addr)) {
- if ((unsigned long)addr < VMALLOC_START ||
- (unsigned long)addr >= VMALLOC_END)
- return __pa(addr);
- else
- return page_to_phys(vmalloc_to_page(addr));
+ return page_to_phys(vmalloc_to_page(addr));
} else
return page_to_phys(pcpu_addr_to_page(addr));
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Tejun Heo on
Hello,

On 06/17/2010 06:20 PM, Cliff Wickman wrote:
> phys_addr_t per_cpu_ptr_to_phys(void *addr)
> {
> + if ((unsigned long)addr < VMALLOC_START ||
> + (unsigned long)addr >= VMALLOC_END)
> + return __pa(addr);
> if (pcpu_addr_in_first_chunk(addr)) {
> - if ((unsigned long)addr < VMALLOC_START ||
> - (unsigned long)addr >= VMALLOC_END)
> - return __pa(addr);
> - else
> - return page_to_phys(vmalloc_to_page(addr));
> + return page_to_phys(vmalloc_to_page(addr));
> } else
> return page_to_phys(pcpu_addr_to_page(addr));
> }

(scratching head...) So, that means it's given an address for which
!pcpu_addr_in_first_chunk() but outside of vmalloc area. Strange.
I'll find out what's going on.

Thanks.

--
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Tejun Heo on
On 06/17/2010 07:08 PM, Tejun Heo wrote:
> (scratching head...) So, that means it's given an address for which
> !pcpu_addr_in_first_chunk() but outside of vmalloc area. Strange.
> I'll find out what's going on.

Does the following patch work? The original patch assumed that @addr
would be the address of the base cpu which isn't true. I only compile
tested the patch so it might be broken (sorry, I gotta go somewhere
now) but this should be the right direction.

Thanks.

diff --git a/mm/percpu.c b/mm/percpu.c
index 46485e1..8956155 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -978,14 +978,23 @@ bool is_kernel_percpu_address(unsigned long addr)
*/
phys_addr_t per_cpu_ptr_to_phys(void *addr)
{
- if (pcpu_addr_in_first_chunk(addr)) {
- if ((unsigned long)addr < VMALLOC_START ||
- (unsigned long)addr >= VMALLOC_END)
- return __pa(addr);
- else
- return page_to_phys(vmalloc_to_page(addr));
- } else
- return page_to_phys(pcpu_addr_to_page(addr));
+ void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ void *start = per_cpu_ptr(base, cpu);
+
+ if (addr >= start && addr < start + pcpu_unit_size) {
+ /* in the first chunk */
+ if ((unsigned long)addr < VMALLOC_START ||
+ (unsigned long)addr >= VMALLOC_END)
+ return __pa(addr);
+ else
+ return page_to_phys(vmalloc_to_page(addr));
+ }
+ }
+ /* in one of the other chunks */
+ return page_to_phys(pcpu_addr_to_page(addr));
}

static inline size_t pcpu_calc_fc_sizes(size_t static_size,


--
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Cliff Wickman on

On Thu, Jun 17, 2010 at 07:35:16PM +0200, Tejun Heo wrote:
> On 06/17/2010 07:08 PM, Tejun Heo wrote:
> > (scratching head...) So, that means it's given an address for which
> > !pcpu_addr_in_first_chunk() but outside of vmalloc area. Strange.
> > I'll find out what's going on.
>
> Does the following patch work? The original patch assumed that @addr
> would be the address of the base cpu which isn't true. I only compile
> tested the patch so it might be broken (sorry, I gotta go somewhere
> now) but this should be the right direction.

Yes, your patch works. I tested it on a 32p UV system.

-Cliff

> diff --git a/mm/percpu.c b/mm/percpu.c
> index 46485e1..8956155 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -978,14 +978,23 @@ bool is_kernel_percpu_address(unsigned long addr)
> */
> phys_addr_t per_cpu_ptr_to_phys(void *addr)
> {
> - if (pcpu_addr_in_first_chunk(addr)) {
> - if ((unsigned long)addr < VMALLOC_START ||
> - (unsigned long)addr >= VMALLOC_END)
> - return __pa(addr);
> - else
> - return page_to_phys(vmalloc_to_page(addr));
> - } else
> - return page_to_phys(pcpu_addr_to_page(addr));
> + void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
> + unsigned int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + void *start = per_cpu_ptr(base, cpu);
> +
> + if (addr >= start && addr < start + pcpu_unit_size) {
> + /* in the first chunk */
> + if ((unsigned long)addr < VMALLOC_START ||
> + (unsigned long)addr >= VMALLOC_END)
> + return __pa(addr);
> + else
> + return page_to_phys(vmalloc_to_page(addr));
> + }
> + }
> + /* in one of the other chunks */
> + return page_to_phys(pcpu_addr_to_page(addr));
> }
>
> static inline size_t pcpu_calc_fc_sizes(size_t static_size,
>
>
> --
> tejun

--
Cliff Wickman
SGI
cpw(a)sgi.com
(651) 683-3824
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/