From: Dave Hansen <haveblue@us.ibm.com>


The following four patches provide the last needed changes before the
introduction of sparsemem.  For a more complete description of what this will
do, please see this patch:

 http://www.sr71.net/patches/2.6.11/2.6.11-bk7-mhp1/broken-out/B-sparse-150-sparsemem.patch

or previous posts on the subject:
 http://marc.theaimsgroup.com/?t=110868540700001&r=1&w=2
 http://marc.theaimsgroup.com/?l=linux-mm&m=109897373315016&w=2

Three of these are i386-only, but one of them reorganizes the macros used to
manage the space in page->flags, and will affect all platforms.  There are
analogous patches to the i386 ones for ppc64, ia64, and x86_64, but those will
be submitted by the normal arch maintainers.

The combination of the four patches has been test-booted on a variety of i386
hardware, and compiled for ppc64, i386, and x86-64 with about 17 different
.configs.  It's also been runtime-tested on ia64 configs (with more patches on
top).


This patch:

discontig.c has some assumptions that mem_map[]s inside of a node are
contiguous.  Teach it to make sure that each region that it's brining online
is actually made up of valid ranges of ram.

Written-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/i386/mm/discontig.c |   14 ++++++++++++++
 25-akpm/arch/i386/mm/init.c      |    2 +-
 25-akpm/include/asm-i386/page.h  |    2 ++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff -puN arch/i386/mm/discontig.c~sparsemem-base-teach-discontig-about-sparse-ranges arch/i386/mm/discontig.c
--- 25/arch/i386/mm/discontig.c~sparsemem-base-teach-discontig-about-sparse-ranges	2005-03-14 18:22:45.000000000 -0800
+++ 25-akpm/arch/i386/mm/discontig.c	2005-03-14 18:22:52.000000000 -0800
@@ -185,6 +185,7 @@ static unsigned long calculate_numa_rema
 {
 	int nid;
 	unsigned long size, reserve_pages = 0;
+	unsigned long pfn;
 
 	for_each_online_node(nid) {
 		if (nid == 0)
@@ -208,6 +209,19 @@ static unsigned long calculate_numa_rema
 		size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
 		/* now the roundup is correct, convert to PAGE_SIZE pages */
 		size = size * PTRS_PER_PTE;
+
+		/*
+		 * Validate the region we are allocating only contains valid
+		 * pages.
+		 */
+		for (pfn = node_end_pfn[nid] - size;
+		     pfn < node_end_pfn[nid]; pfn++)
+			if (!page_is_ram(pfn))
+				break;
+
+		if (pfn != node_end_pfn[nid])
+			size = 0;
+
 		printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
 				size, nid);
 		node_remap_size[nid] = size;
diff -puN arch/i386/mm/init.c~sparsemem-base-teach-discontig-about-sparse-ranges arch/i386/mm/init.c
--- 25/arch/i386/mm/init.c~sparsemem-base-teach-discontig-about-sparse-ranges	2005-03-14 18:22:45.000000000 -0800
+++ 25-akpm/arch/i386/mm/init.c	2005-03-14 18:22:45.000000000 -0800
@@ -191,7 +191,7 @@ static inline int page_kills_ppro(unsign
 
 extern int is_available_memory(efi_memory_desc_t *);
 
-static inline int page_is_ram(unsigned long pagenr)
+int page_is_ram(unsigned long pagenr)
 {
 	int i;
 	unsigned long addr, end;
diff -puN include/asm-i386/page.h~sparsemem-base-teach-discontig-about-sparse-ranges include/asm-i386/page.h
--- 25/include/asm-i386/page.h~sparsemem-base-teach-discontig-about-sparse-ranges	2005-03-14 18:22:45.000000000 -0800
+++ 25-akpm/include/asm-i386/page.h	2005-03-14 18:22:45.000000000 -0800
@@ -120,6 +120,8 @@ static __inline__ int get_order(unsigned
 
 extern int sysctl_legacy_va_layout;
 
+extern int page_is_ram(unsigned long pagenr);
+
 #endif /* __ASSEMBLY__ */
 
 #ifdef __ASSEMBLY__
_