From: Greg KH on
2.6.34-stable review patch. If anyone has any objections, please let us know.

------------------

From: Yinghai Lu <yinghai(a)kernel.org>

commit b8ab9f82025adea77864115da73e70026fa4f540 upstream.

Borislav Petkov reported his 32bit numa system has problem:

[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
....
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb

looks like it allocates too much high address for bootmem.

Try to cut limit with get_max_mapped()

Reported-by: Borislav Petkov <borislav.petkov(a)amd.com>
Tested-by: Conny Seidel <conny.seidel(a)amd.com>
Signed-off-by: Yinghai Lu <yinghai(a)kernel.org>
Cc: Ingo Molnar <mingo(a)elte.hu>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Lee Schermerhorn <lee.schermerhorn(a)hp.com>
Cc: Mel Gorman <mel(a)csn.ul.ie>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)suse.de>

---
mm/bootmem.c | 24 ++++++++++++++++++++----
mm/page_alloc.c | 3 +++
2 files changed, 23 insertions(+), 4 deletions(-)

--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_no
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, -1ULL);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
#endif
+
+ return ptr;
}

void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsign
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align,
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#endif
+ return ptr;
}
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3415,6 +3415,9 @@ void * __init __alloc_memory_core_early(
int i;
void *ptr;

+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/