From: Andy Whitcroft <apw@shadowen.org>

For each node there are a defined list of MAX_NR_ZONES zones.  These are
selected as a result of the __GFP_DMA and __GFP_HIGHMEM zone modifier flags
being passed to the memory allocator as part of the GFP mask.  Each node
has a set of zone lists, node_zonelists, which defines the list and order
of zones to scan for each flag combination.  When initialising these lists
we iterate over modifier combinations 0 ..  MAX_NR_ZONES.  However, this is
only correct when there are at most ZONES_SHIFT flags.  If another flag is
introduced zonelists for it would not be initialised.

This patch introduces GFP_ZONETYPES (based on GFP_ZONEMASK) as a bound for
the number of modifier combinations.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/include/linux/mmzone.h |   29 ++++++++++++++++++++++++++++-
 25-akpm/mm/page_alloc.c        |    8 ++++----
 2 files changed, 32 insertions(+), 5 deletions(-)

diff -puN include/linux/mmzone.h~fix-gfp-zone-modifier-interators include/linux/mmzone.h
--- 25/include/linux/mmzone.h~fix-gfp-zone-modifier-interators	Fri Jun 25 14:35:51 2004
+++ 25-akpm/include/linux/mmzone.h	Fri Jun 25 14:35:51 2004
@@ -81,7 +81,34 @@ struct per_cpu_pageset {
 #define MAX_NR_ZONES		3	/* Sync this with ZONES_SHIFT */
 #define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 
+
+/*
+ * When a memory allocation must conform to specific limitations (such
+ * as being suitable for DMA) the caller will pass in hints to the
+ * allocator in the gfp_mask, in the zone modifier bits.  These bits
+ * are used to select a priority ordered list of memory zones which
+ * match the requested limits.  GFP_ZONEMASK defines which bits within
+ * the gfp_mask should be considered as zone modifiers.  Each valid
+ * combination of the zone modifier bits has a corresponding list
+ * of zones (in node_zonelists).  Thus for two zone modifiers there
+ * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
+ * be 8 (2 ** 3) zonelists.  GFP_ZONETYPES defines the number of possible
+ * combinations of zone modifiers in "zone modifier space".
+ */
 #define GFP_ZONEMASK	0x03
+/*
+ * As an optimisation any zone modifier bits which are only valid when
+ * no other zone modifier bits are set (loners) should be placed in
+ * the highest order bits of this field.  This allows us to reduce the
+ * extent of the zonelists thus saving space.  For example in the case
+ * of three zone modifier bits, we could require up to eight zonelists.
+ * If the left most zone modifier is a "loner" then the highest valid
+ * zonelist would be four allowing us to allocate only five zonelists.
+ * Use the first form when the left most bit is not a "loner", otherwise
+ * use the second.
+ */
+/* #define GFP_ZONETYPES	(GFP_ZONEMASK + 1) */		/* Non-loner */
+#define GFP_ZONETYPES	((GFP_ZONEMASK + 1) / 2 + 1)		/* Loner */
 
 /*
  * On machines where it is needed (eg PCs) we divide physical memory
@@ -237,7 +264,7 @@ struct zonelist {
 struct bootmem_data;
 typedef struct pglist_data {
 	struct zone node_zones[MAX_NR_ZONES];
-	struct zonelist node_zonelists[MAX_NR_ZONES];
+	struct zonelist node_zonelists[GFP_ZONETYPES];
 	int nr_zones;
 	struct page *node_mem_map;
 	struct bootmem_data *bdata;
diff -puN mm/page_alloc.c~fix-gfp-zone-modifier-interators mm/page_alloc.c
--- 25/mm/page_alloc.c~fix-gfp-zone-modifier-interators	Fri Jun 25 14:35:51 2004
+++ 25-akpm/mm/page_alloc.c	Fri Jun 25 14:35:51 2004
@@ -1235,7 +1235,7 @@ static void __init build_zonelists(pg_da
 	DECLARE_BITMAP(used_mask, MAX_NUMNODES);
 
 	/* initialize zonelists */
-	for (i = 0; i < MAX_NR_ZONES; i++) {
+	for (i = 0; i < GFP_ZONETYPES; i++) {
 		zonelist = pgdat->node_zonelists + i;
 		memset(zonelist, 0, sizeof(*zonelist));
 		zonelist->zones[0] = NULL;
@@ -1257,7 +1257,7 @@ static void __init build_zonelists(pg_da
 			node_load[node] += load;
 		prev_node = node;
 		load--;
-		for (i = 0; i < MAX_NR_ZONES; i++) {
+		for (i = 0; i < GFP_ZONETYPES; i++) {
 			zonelist = pgdat->node_zonelists + i;
 			for (j = 0; zonelist->zones[j] != NULL; j++);
 
@@ -1280,7 +1280,7 @@ static void __init build_zonelists(pg_da
 	int i, j, k, node, local_node;
 
 	local_node = pgdat->node_id;
-	for (i = 0; i < MAX_NR_ZONES; i++) {
+	for (i = 0; i < GFP_ZONETYPES; i++) {
 		struct zonelist *zonelist;
 
 		zonelist = pgdat->node_zonelists + i;
@@ -1840,7 +1840,7 @@ static void setup_per_zone_protection(vo
 		 * For each of the different allocation types:
 		 * GFP_DMA -> GFP_KERNEL -> GFP_HIGHMEM
 		 */
-		for (i = 0; i < MAX_NR_ZONES; i++) {
+		for (i = 0; i < GFP_ZONETYPES; i++) {
 			/*
 			 * For each of the zones:
 			 * ZONE_HIGHMEM -> ZONE_NORMAL -> ZONE_DMA
_