From: Yinghai Lu on
On 07/09/2010 12:52 PM, Conny Seidel wrote:
> Hi,
>
> On Fri, 9 Jul 2010 15:08:52 -0400
> Yinghai Lu <yinghai(a)kernel.org> wrote:
>
>> On 07/09/2010 07:54 AM, Borislav Petkov wrote:
>>> Hi,
>>>
>>> this is something we're getting during testing on one of our boxes
>>> here, a dual socket Magny-Cours machine. It is oopsing on the addr
>>> variable in __alloc_memory_core_early() after converting it to a
>>> virtual address in order to clear the mem region at it. I've removed
>>> the "#if 0.. #endif" around the printk which dumps the ranges in the
>>> early node map for more info.
>>>
>>> It's latest -git, 32bit build, config is attached.
>>>
>>
>> can not duplicate on all my setup.
>>
>> but look at the code there is limit problem with 32 bit...
>>
>> please check if it will fix the problem.
>>
>> and we will need to rebase early_res to memblock patchset.
>>
>> Thanks
>>
>> Yinghai Lu
>>
>
> I'm sorry but the patch isn't working.
>
> Now we are seeing this:
>
> [ 0.000000] BUG: Int 6: CR2 (null)
> [ 0.000000] EDI 00000005 ESI f3e00800 EBP c21afed4 ESP c21afe90
> [ 0.000000] EBX 01000000 EDX 00000006 ECX (null) EAX fffffff4
> [ 0.000000] err (null) EIP c2359558 CS 00000060 flg 00010046
> [ 0.000000] Stack: fffffff4 (null) 00000001 f3e00000 00000005 00080000 00000001 00000002
> [ 0.000000] 000b6800 00000002 f3e00000 00000001 00000002 c21aff0c c2359dd2 (null)
> [ 0.000000] ffe00000 00000004 (null) 00000004 001fafd8 c2943000 (null) 00004e00
> [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.35-rc4+ #1
> [ 0.000000] Call Trace:
> [ 0.000000] [<c1dd39d9>] hlt_loop+0x0/0x3
> [ 0.000000] [<c2359558>] ? free_area_init_node+0x5b1/0x647
> [ 0.000000] [<c2359dd2>] free_area_init_nodes+0x7b5/0x89a
> [ 0.000000] [<c234a2c9>] paging_init+0x1a9/0x1cc
> [ 0.000000] [<c2330325>] setup_arch+0x1414/0x15f7
> [ 0.000000] [<c23510c1>] ? __reserve_early+0x119/0x135
> [ 0.000000] [<c1078c8a>] ? printk+0x36/0x54
> [ 0.000000] [<c232917b>] start_kernel+0x1d0/0xb6f
> [ 0.000000] [<c2328163>] i386_start_kernel+0x163/0x179

can you send out boot log from start?

YH
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Yinghai Lu on
On 07/09/2010 01:42 PM, Conny Seidel wrote:
> sparse_early_mem_map_alloc: sparsemem memory map backing failed some memory will not be available.

please check

[PATCH -v2] x86,mm: fix 32bit numa sparsemem

Borislav Petkov <borislav.petkov(a)amd.com> reported his 32bit numa has problem:

[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
....
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb

looks like it allocate much high address for bootmem.

try to cut limit with get_max_mapped()

-v2: add fallback for sparse_mem_map_populate()
for "sparse_early_mem_map_alloc: sparsemem memory map backing failed some memory will not be available."


Reported-by: Borislav Petkov <borislav.petkov(a)amd.com>
Signed-off-by: Yinghai Lu <yinghai(a)kernel.org>

---
mm/page_alloc.c | 3 +++
mm/sparse.c | 10 +++++++++-
2 files changed, 12 insertions(+), 1 deletion(-)

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(
int i;
void *ptr;

+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;
Index: linux-2.6/mm/sparse.c
===================================================================
--- linux-2.6.orig/mm/sparse.c
+++ linux-2.6/mm/sparse.c
@@ -363,7 +363,10 @@ static void __init sparse_early_usemaps_
return;
}

- usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
+ usemap = __alloc_bootmem_node_nopanic(NODE_DATA(nodeid),
+ size * usemap_count,
+ SMP_CACHE_BYTES,
+ __pa(MAX_DMA_ADDRESS));
if (usemap) {
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
if (!present_section_nr(pnum))
@@ -391,6 +394,11 @@ struct page __init *sparse_mem_map_popul
size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
map = __alloc_bootmem_node_high(NODE_DATA(nid), size,
PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+
+ if (!map)
+ map = __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
+ PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+
return map;
}
void __init sparse_mem_maps_populate_node(struct page **map_map,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Yinghai Lu on
On 07/09/2010 04:51 PM, Conny Seidel wrote:
> On Fri, 9 Jul 2010 18:07:50 -0400
> Yinghai Lu <yinghai(a)kernel.org> wrote:
>
>> On 07/09/2010 01:42 PM, Conny Seidel wrote:
>>> sparse_early_mem_map_alloc: sparsemem memory map backing failed some
>>> memory will not be available.
>>
>> please check
>>
> This still panics, see logfile for complete trace.
>

this one should fix the problem.

Thanks

Yinghai

[PATCH -v3] x86,nobootmem: make alloc_bootmem_node fall back to other node when 32bit numa are used

Borislav Petkov <borislav.petkov(a)amd.com> reported his 32bit numa has problem:

[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
....
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb

looks like it allocate much high address for bootmem.

try to cut limit with get_max_mapped()

-v3: make alloc_bootmem_node could fallback to other node.
just like old alloc_bootmem_node did

need this patch for 2.6.34 and 2.6.35

Reported-by: Borislav Petkov <borislav.petkov(a)amd.com>
Signed-off-by: Yinghai Lu <yinghai(a)kernel.org>
Cc: stable(a)kernel.org

---
mm/bootmem.c | 24 ++++++++++++++++++++----
mm/page_alloc.c | 3 +++
2 files changed, 23 insertions(+), 4 deletions(-)

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(
int i;
void *ptr;

+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_no
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, -1ULL);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
#endif
+
+ return ptr;
}

void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsign
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align,
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#endif
+ return ptr;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/