---

 25-akpm/include/linux/mmzone.h |   12 +++++++--
 25-akpm/mm/page_alloc.c        |   52 ++++++++++++++++++++++++++++-------------
 kernel/sysctl.c                |    0 
 3 files changed, 45 insertions(+), 19 deletions(-)

diff -puN include/linux/mmzone.h~lower-zone-protection-numa-fix-tickle include/linux/mmzone.h
--- 25/include/linux/mmzone.h~lower-zone-protection-numa-fix-tickle	2004-03-19 10:33:36.914803504 -0800
+++ 25-akpm/include/linux/mmzone.h	2004-03-19 10:33:36.919802744 -0800
@@ -79,11 +79,14 @@ struct zone {
 	spinlock_t		lock;
 	unsigned long		free_pages;
 	unsigned long		pages_min, pages_low, pages_high;
-	/* protection a is pre-calculated number of extra pages that must be
+	/*
+	 * protection[] is a pre-calculated number of extra pages that must be
 	 * available in a zone in order for __alloc_pages() to allocate memory
 	 * from the zone. i.e., for a GFP_KERNEL alloc of "order" there must
-	 * be "(1<<order) + protection[GFP_KERNEL]" free pages in the zone.
-	 * It considers both min_free_kbytes and sysctl_lower_zone_protection.
+	 * be "(1<<order) + protection[ZONE_NORMAL]" free pages in the zone
+	 * for us to choose to allocate the page from that zone.
+	 *
+	 * It uses both min_free_kbytes and sysctl_lower_zone_protection.
 	 * The protection values are recalculated if either of these values
 	 * change.  The array elements are in zonelist order:
 	 *	[0] == GFP_DMA, [1] == GFP_KERNEL, [2] == GFP_HIGHMEM.
@@ -239,6 +242,9 @@ void get_zone_counts(unsigned long *acti
 void build_all_zonelists(void);
 void wakeup_kswapd(struct zone *zone);
 
+/*
+ * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
+ */
 #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
 
 /**
diff -puN kernel/sysctl.c~lower-zone-protection-numa-fix-tickle kernel/sysctl.c
diff -puN mm/page_alloc.c~lower-zone-protection-numa-fix-tickle mm/page_alloc.c
--- 25/mm/page_alloc.c~lower-zone-protection-numa-fix-tickle	2004-03-19 10:33:36.917803048 -0800
+++ 25-akpm/mm/page_alloc.c	2004-03-19 10:33:36.921802440 -0800
@@ -1050,6 +1050,8 @@ void show_free_areas(void)
 		nr_free_pages());
 
 	for_each_zone(zone) {
+		int i;
+
 		show_node(zone);
 		printk("%s"
 			" free:%lukB"
@@ -1069,6 +1071,10 @@ void show_free_areas(void)
 			K(zone->nr_inactive),
 			K(zone->present_pages)
 			);
+		printk("protections[]:");
+		for (i = 0; i < MAX_NR_ZONES; i++)
+			printk(" %lu", zone->protection[i]);
+		printk("\n");
 	}
 
 	for_each_zone(zone) {
@@ -1266,7 +1272,7 @@ static void __init build_zonelists(pg_da
  			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
  
 		zonelist->zones[j++] = NULL;
-       }
+	}
 }
 
 #endif	/* CONFIG_NUMA */
@@ -1738,7 +1744,8 @@ void __init page_alloc_init(void)
 	hotcpu_notifier(page_alloc_cpu_notify, 0);
 }
 
-unsigned long higherzone_val(struct zone *z, int max_zone, int alloc_type)
+static unsigned long higherzone_val(struct zone *z, int max_zone,
+					int alloc_type)
 {
 	int z_idx = zone_idx(z);
 	struct zone *higherzone;
@@ -1749,9 +1756,12 @@ unsigned long higherzone_val(struct zone
 		return 0;
 
 	higherzone = &z->zone_pgdat->node_zones[z_idx+1];
+
 	/* We always start with the higher zone's protection value */
 	pages = higherzone->protection[alloc_type];
-	/* We get a lower-zone-protection contribution only if there are
+
+	/*
+	 * We get a lower-zone-protection contribution only if there are
 	 * pages in the higher zone and if we're not the highest zone
 	 * in the current zonelist.  e.g., never happens for GFP_DMA. Happens
 	 * only for ZONE_DMA in a GFP_KERNEL allocation and happens for ZONE_DMA
@@ -1786,27 +1796,36 @@ static void setup_per_zone_protection(vo
 			if (zones[i].present_pages)
 				max_zone = i;
 
-		/* For each of the different allocation types:
-		 * GFP_DMA -> GFP_KERNEL -> GFP_HIGHMEM */
+		/*
+		 * For each of the different allocation types:
+		 * GFP_DMA -> GFP_KERNEL -> GFP_HIGHMEM
+		 */
 		for (i = 0; i < MAX_NR_ZONES; i++) {
-			/* For each of the zones:
-			 * ZONE_HIGHMEM -> ZONE_NORMAL -> ZONE_DMA */
+			/*
+			 * For each of the zones:
+			 * ZONE_HIGHMEM -> ZONE_NORMAL -> ZONE_DMA
+			 */
 			for (j = MAX_NR_ZONES-1; j >= 0; j--) {
 				zone = &zones[j];
 
-				/* We never protect zones that don't have memory in them
-				 * (j>max_zone) or zones that aren't in the zonelists
-				 * for a certain type of allocation (j>i).  We have to
-				 * assign these to zero because the lower zones take
+				/*
+				 * We never protect zones that don't have memory
+				 * in them (j>max_zone) or zones that aren't in
+				 * the zonelists for a certain type of
+				 * allocation (j>i).  We have to assign these to
+				 * zero because the lower zones take
 				 * contributions from the higher zones.
 				 */
 				if (j > max_zone || j > i) {
 					zone->protection[i] = 0;
-				} else {
-					/* The contribution of the next higher zone */
-					zone->protection[i] = higherzone_val(zone, max_zone, i);
-					zone->protection[i] += zone->pages_low;
+					continue;
 				}
+				/*
+				 * The contribution of the next higher zone
+				 */
+				zone->protection[i] = higherzone_val(zone,
+								max_zone, i);
+				zone->protection[i] += zone->pages_low;
 			}
 		}
 	}
@@ -1825,9 +1844,10 @@ static void setup_per_zone_pages_min(voi
 	unsigned long flags;
 
 	/* Calculate total number of !ZONE_HIGHMEM pages */
-	for_each_zone(zone)
+	for_each_zone(zone) {
 		if (!is_highmem(zone))
 			lowmem_pages += zone->present_pages;
+	}
 
 	for_each_zone(zone) {
 		spin_lock_irqsave(&zone->lru_lock, flags);

_