From: Anton Blanchard <anton@samba.org>

Distribute boot time memory allocations across all nodes, from Manfred
Spraul.

We want to spread memory across nodes to avoid all allocations ending
up on node 0.

Spreading boot time allocations around also helps us to avoid node 0
becoming the hot node.

I took it for a spin:

buddyinfo before:
Node 7, 0    2    1    1    0    2    1    2    1    2    1    2    741
Node 6, 0    0    0    2    0    2    1    1    2    2    2    2   1002
Node 5, 0    0    0    2    0    2    1    2    1    2    2    2   2006
Node 4, 0    0    0    2    0    2    1    2    1    2    2    2   2006
Node 3, 0    0    0    2    0    2    1    2    1    2    2    2   2006
Node 2, 0    0    0    2    0    2    1    2    1    2    2    2   2006
Node 1, 0    0    0    2    0    2    1    1    2    2    2    2   1002
Node 0, 0    0   38    7    0    1    1    1    0    0    0    0   1998

buddyinfo after:
Node 7, 0    1    0    1    1    1    1    0    0    0    1    2    738
Node 6, 0    1    0    1    1    1    0    1    0    0    2    2   1002
Node 5, 0    0    0    1    1    1    1    0    0    0    2    2   2006
Node 4, 0    1    0    1    0    1    1    0    0    0    2    2   2006
Node 3, 0    0    0    1    0    1    1    0    0    0    2    2   2005
Node 2, 0    1    0    0    0    0    0    1    0    0    2    2   2006
Node 1, 0    2    1    1    0    1    1    1    0    0    2    2   1002
Node 0, 0   20   45    8    3    0    1    1    1    1    0    1   2004

Change in free memory due to patch:

Node 7 -54.08 MB
Node 6  -6.33 MB
Node 5  -6.09 MB
Node 4  -6.14 MB
Node 3 -22.15 MB
Node 2  -6.05 MB
Node 1  -6.12 MB
Node 0 107.35 MB

As you can see we gained over 100MB on node 0. 



---

 25-akpm/mm/page_alloc.c |   40 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 40 insertions(+)

diff -puN mm/page_alloc.c~distribute-early-allocations-across-nodes mm/page_alloc.c
--- 25/mm/page_alloc.c~distribute-early-allocations-across-nodes	Tue Mar  9 13:18:00 2004
+++ 25-akpm/mm/page_alloc.c	Tue Mar  9 13:18:00 2004
@@ -690,6 +690,42 @@ got_pg:
 
 EXPORT_SYMBOL(__alloc_pages);
 
+#ifdef CONFIG_NUMA
+/* Early boot: Everything is done by one cpu, but the data structures will be
+ * used by all cpus - spread them on all nodes.
+ */
+static __init unsigned long get_boot_pages(unsigned int gfp_mask, unsigned int order)
+{
+static int nodenr;
+	int i = nodenr;
+	struct page *page;
+
+	for (;;) {
+		if (i > nodenr + numnodes)
+			return 0;
+		if (node_present_pages(i%numnodes)) {
+			struct zone **z;
+			/* The node contains memory. Check that there is
+			 * memory in the intended zonelist.
+			 */
+			z = NODE_DATA(i%numnodes)->node_zonelists[gfp_mask & GFP_ZONEMASK].zones;
+			while (*z) {
+				if ( (*z)->free_pages > (1UL<<order))
+					goto found_node;
+				z++;
+			}
+		}
+		i++;
+	}
+found_node:
+	nodenr = i+1;
+	page = alloc_pages_node(i%numnodes, gfp_mask, order);
+	if (!page)
+		return 0;
+	return (unsigned long) page_address(page);
+}
+#endif
+
 /*
  * Common helper functions.
  */
@@ -697,6 +733,10 @@ fastcall unsigned long __get_free_pages(
 {
 	struct page * page;
 
+#ifdef CONFIG_NUMA
+	if (unlikely(!system_running))
+		return get_boot_pages(gfp_mask, order);
+#endif
 	page = alloc_pages(gfp_mask, order);
 	if (!page)
 		return 0;

_