Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 arch/x86_64/kernel/setup.c   |   54 ++++++++++++++++++++++++++++++++-----------
 arch/x86_64/mm/k8topology.c  |   13 +++++++---
 arch/x86_64/mm/numa.c        |    4 +--
 include/asm-x86_64/apicdef.h |    1 
 4 files changed, 54 insertions(+), 18 deletions(-)

diff -puN arch/x86_64/kernel/setup.c~x86_64-numa-k8-nodeid arch/x86_64/kernel/setup.c
--- devel/arch/x86_64/kernel/setup.c~x86_64-numa-k8-nodeid	2005-09-07 19:46:44.000000000 -0700
+++ devel-akpm/arch/x86_64/kernel/setup.c	2005-09-07 19:46:44.000000000 -0700
@@ -755,6 +755,24 @@ static void __cpuinit display_cacheinfo(
 	}
 }
 
+#ifdef CONFIG_NUMA
+static int nearby_node(int apicid) 
+{
+	int i;
+	for (i = apicid - 1; i >= 0; i--) {
+		int node = apicid_to_node[i];
+		if (node != NUMA_NO_NODE && node_online(node))
+			return node;
+	}
+	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+		int node = apicid_to_node[i];
+		if (node != NUMA_NO_NODE && node_online(node))
+			return node;
+	}
+	return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
 /*
  * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
  * Assumes number of cores is a power of two.
@@ -763,9 +781,11 @@ static void __init amd_detect_cmp(struct
 {
 #ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
-	int node = 0;
 	unsigned bits;
+#ifdef CONFIG_NUMA
+	int node = 0;
 	unsigned apicid = phys_proc_id[cpu];
+#endif
 
 	bits = 0;
 	while ((1 << bits) < c->x86_num_cores)
@@ -777,25 +797,33 @@ static void __init amd_detect_cmp(struct
 	phys_proc_id[cpu] >>= bits;
 
 #ifdef CONFIG_NUMA
-	/* When an ACPI SRAT table is available use the mappings from SRAT
-	   instead. */
 	node = phys_proc_id[cpu];
-	if (acpi_numa > 0) {
-		if (apicid_to_node[apicid] != NUMA_NO_NODE)
-			node = apicid_to_node[apicid];
-		else
-			printk(KERN_ERR 
-			       "SRAT: Didn't specify node for CPU %d(%d)\n",
-			       cpu, apicid);
+	if (apicid_to_node[apicid] != NUMA_NO_NODE)
+		node = apicid_to_node[apicid];
+	if (!node_online(node)) { 
+		/* Two possibilities here: 
+		   - The CPU is missing memory and no node was created.
+		   In that case try picking one from a nearby CPU
+		   - The APIC IDs differ from the HyperTransport node IDs
+		   which the K8 northbridge parsing fills in.
+		   Assume they are all increased by a constant offset,
+		   but in the same order as the HT nodeids.
+		   If that doesn't result in a usable node fall back to the 
+		   path for the previous case.  */
+		int ht_nodeid = apicid - (phys_proc_id[0] << bits); 
+		if (ht_nodeid >= 0 && 
+		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = apicid_to_node[ht_nodeid]; 
+		/* Pick a nearby node */
+		if (!node_online(node))
+			node = nearby_node(apicid); 
 	}
-	if (!node_online(node))
-		node = first_node(node_online_map);
 	cpu_to_node[cpu] = node;
-#endif
 
 	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
 			cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
 #endif
+#endif
 }
 
 static int __init init_amd(struct cpuinfo_x86 *c)
diff -puN arch/x86_64/mm/k8topology.c~x86_64-numa-k8-nodeid arch/x86_64/mm/k8topology.c
--- devel/arch/x86_64/mm/k8topology.c~x86_64-numa-k8-nodeid	2005-09-07 19:46:44.000000000 -0700
+++ devel-akpm/arch/x86_64/mm/k8topology.c	2005-09-07 19:46:44.000000000 -0700
@@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long s
 	unsigned long prevbase;
 	struct node nodes[8];
 	int nodeid, i, nb; 
+	unsigned char nodeids[8];
 	int found = 0;
 	u32 reg;
 	unsigned numnodes;
 	nodemask_t nodes_parsed;
+	unsigned dualcore = 0;
 
 	nodes_clear(nodes_parsed);
 
@@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long s
 	prevbase = 0;
 	for (i = 0; i < 8; i++) { 
 		unsigned long base,limit; 
-
+		u32 nodeid;
+		
+		/* Undefined before E stepping, but hopefully 0 */
+		dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1; 
 		base = read_pci_config(0, nb, 1, 0x40 + i*8);
 		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
 
 		nodeid = limit & 7; 
+		nodeids[i] = nodeid;
 		if ((base & 3) == 0) { 
 			if (i < numnodes)
 				printk("Skipping disabled node %d\n", i); 
@@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long s
 
 	for (i = 0; i < 8; i++) {
 		if (nodes[i].start != nodes[i].end) { 
-			/* assume 1:1 NODE:CPU */
-			cpu_to_node[i] = i; 
+			nodeid = nodeids[i];
+			apicid_to_node[nodeid << dualcore] = i; 
+			apicid_to_node[(nodeid << dualcore) + dualcore] = i; 
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end); 
 		} 
 	}
diff -puN arch/x86_64/mm/numa.c~x86_64-numa-k8-nodeid arch/x86_64/mm/numa.c
--- devel/arch/x86_64/mm/numa.c~x86_64-numa-k8-nodeid	2005-09-07 19:46:44.000000000 -0700
+++ devel-akpm/arch/x86_64/mm/numa.c	2005-09-07 19:46:59.000000000 -0700
@@ -29,8 +29,8 @@ int memnode_shift;
 u8  memnodemap[NODEMAPSIZE];
 
 unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
-unsigned char apicid_to_node[256] __cpuinitdata = {
-	[0 ... NR_CPUS-1] = NUMA_NO_NODE
+unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 cpumask_t     node_to_cpumask[MAX_NUMNODES] __read_mostly;
 
diff -puN include/asm-x86_64/apicdef.h~x86_64-numa-k8-nodeid include/asm-x86_64/apicdef.h
--- devel/include/asm-x86_64/apicdef.h~x86_64-numa-k8-nodeid	2005-09-07 19:46:44.000000000 -0700
+++ devel-akpm/include/asm-x86_64/apicdef.h	2005-09-07 19:46:44.000000000 -0700
@@ -113,6 +113,7 @@
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 
 #define MAX_IO_APICS 128
+#define MAX_LOCAL_APIC 256
 
 /*
  * All x86-64 systems are xAPIC compatible.
_