From: Ingo Molnar <mingo@elte.hu>

i've attached prefault-2.6.0-A0, which:

 - enables prefaulting/populating of private-shared (including
   shared-readonly) mappings. This is the main mapping method
   of ld.so. The cached startup time of larger GUI apps on a
   P4 box got reduced by 3-4%.

this patch is ontop of remap-file-pages-prot-2.6.0-H1.

i've test-booted this patch on x86 SMP and UP, it works fine for a wide
range of workloads.



 25-akpm/mm/filemap.c |  109 +++++++++++++++++++++++++++++++++++++++++++++++++++
 25-akpm/mm/memory.c  |   18 ++++++++
 2 files changed, 127 insertions(+)

diff -puN mm/filemap.c~prefault-2.6.0-A0 mm/filemap.c
--- 25/mm/filemap.c~prefault-2.6.0-A0	Mon Jan  5 14:27:14 2004
+++ 25-akpm/mm/filemap.c	Mon Jan  5 14:27:14 2004
@@ -27,6 +27,7 @@
 #include <linux/pagevec.h>
 #include <linux/blkdev.h>
 #include <linux/security.h>
+#include <linux/rmap-locking.h>
 /*
  * This is needed for the following functions:
  *  - try_to_release_page
@@ -1296,6 +1297,111 @@ err:
 	return NULL;
 }
 
+#define NR_PAGES 8
+
+/*
+ * Use gang lookup in the nonblock case and opencode the whole
+ * prefaulting loop which allows all sorts of shortcuts:
+ */
+static int filemap_populate_nonblock(struct vm_area_struct *vma,
+			unsigned long addr,
+			unsigned long len,
+			pgprot_t prot,
+			unsigned long pgoff)
+{
+	struct file *file = vma->vm_file;
+	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
+	struct inode *inode = mapping->host;
+	unsigned long size, nr_pages, left, range;
+	struct mm_struct *mm = vma->vm_mm;
+	struct page **tmp, *pages[NR_PAGES];
+	struct pte_chain *pte_chain = NULL;
+	pte_t *pte0 = NULL, *pte = NULL;
+	pgd_t *pgd;
+	pmd_t *pmd;
+
+	range = len >> PAGE_CACHE_SHIFT;
+	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	if (pgoff + range > size)
+		return -EINVAL;
+
+	spin_lock(&mm->page_table_lock);
+	spin_lock(&mapping->page_lock);
+
+	while (range) {
+		left = (PMD_SIZE - (addr & ~PMD_MASK));
+		left >>= PAGE_CACHE_SHIFT;
+		if (!left) {
+			if (pte0)
+				pte_unmap(pte0);
+			pte0 = NULL;
+		}
+		if (!pte0) {
+			pgd = pgd_offset(mm, addr);
+			pmd = pmd_alloc(mm, pgd, addr);
+			if (!pmd)
+				goto err_unlock;
+			pte0 = pte = pte_alloc_map(mm, pmd, addr);
+			if (!pte)
+				goto err_unlock;
+		}
+
+		if (left > range)
+			left = range;
+		if (left > NR_PAGES)
+			left = NR_PAGES;
+
+		tmp = pages;
+		range -= left;
+
+		/*
+		 * Look up a block of pages that are cached already,
+		 * and install them. Performance-wise we assume that
+		 * there are blocks of cached pages in this range, if
+		 * there are none then we skip over those holes.
+		 */
+		nr_pages = radix_tree_gang_lookup(&mapping->page_tree,
+						(void **)pages, pgoff, left);
+		while (left) {
+			left--;
+			if (nr_pages && ((*tmp)->index == pgoff) &&
+				    PageUptodate(*tmp) && (pte_none(*pte))) {
+
+				if (!pte_chain) {
+					pte_chain = pte_chain_alloc(GFP_ATOMIC);
+					if (unlikely(!pte_chain))
+						goto err_unlock;
+				}
+				page_cache_get(*tmp);
+
+				mm->rss++;
+				flush_icache_page(vma, *tmp);
+				set_pte(pte, mk_pte(*tmp, prot));
+				pte_chain = page_add_rmap(*tmp, pte, pte_chain);
+				update_mmu_cache(vma, addr, *pte_val);
+
+				tmp++;
+				nr_pages--;
+			}
+			pgoff++;
+			addr += PAGE_SIZE;
+			pte++;
+		}
+	}
+	if (pte0)
+		pte_unmap(pte0);
+	spin_unlock(&mapping->page_lock);
+	spin_unlock(&mm->page_table_lock);
+	pte_chain_free(pte_chain);
+	return 0;
+
+err_unlock:
+	spin_unlock(&mapping->page_lock);
+	spin_unlock(&mm->page_table_lock);
+	pte_chain_free(pte_chain);
+	return -ENOMEM;
+}
+
 static int filemap_populate(struct vm_area_struct *vma,
 			unsigned long addr,
 			unsigned long len,
@@ -1312,6 +1418,9 @@ static int filemap_populate(struct vm_ar
 	struct page *page;
 	int err;
 
+	if (linear && nonblock)
+		return filemap_populate_nonblock(vma, addr, len, prot, pgoff);
+
 	if (!nonblock)
 		force_page_cache_readahead(mapping, vma->vm_file,
 					pgoff, len >> PAGE_CACHE_SHIFT);
diff -puN mm/memory.c~prefault-2.6.0-A0 mm/memory.c
--- 25/mm/memory.c~prefault-2.6.0-A0	Mon Jan  5 14:27:14 2004
+++ 25-akpm/mm/memory.c	Mon Jan  5 14:27:14 2004
@@ -1396,6 +1396,8 @@ out:
 	return ret;
 }
 
+#define PREFAULT_PAGES 7
+
 /*
  * do_no_page() tries to create a new page mapping. It aggressively
  * tries to share with existing pages, but makes a separate copy if
@@ -1439,6 +1441,22 @@ retry:
 	if (new_page == NOPAGE_OOM)
 		return VM_FAULT_OOM;
 
+	if (vma->vm_ops && vma->vm_ops->populate &&
+				!(vma->vm_flags & VM_NONLINEAR)) {
+
+		unsigned long start = (address & PAGE_MASK) + PAGE_SIZE,
+			end = start + PAGE_SIZE * PREFAULT_PAGES, size, pgoff;
+		int nr_pages;
+
+		pgoff = ((start - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+		if (vma->vm_end < end)
+			end = vma->vm_end;
+		nr_pages = (end - start) / PAGE_SIZE;
+		size = nr_pages * PAGE_SIZE;
+
+		if (nr_pages)
+			vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, pgoff, MAP_NONBLOCK);
+	}
 	ret = VM_FAULT_MAJOR;
 	pte_chain = pte_chain_alloc(GFP_KERNEL);
 	if (!pte_chain)

_