# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.636   -> 1.639  
#	         fs/Makefile	1.36    -> 1.37   
#	        fs/Config.in	1.35    -> 1.36   
#	arch/i386/mm/hugetlbpage.c	1.2     -> 1.4    
#	               (new)	        -> 1.3     fs/hugetlbfs/inode.c
#	               (new)	        -> 1.2     fs/hugetlbfs/Makefile
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/09/28	wli@elm3b17.eng.beaverton.ibm.com	1.637
# inode.c:
#   Numerous fixes.
# Makefile, inode.c:
#   new file
# --------------------------------------------
# 02/09/28	wli@elm3b17.eng.beaverton.ibm.com	1.638
# hugetlbpage.c:
#   Add hugetlb_prefault_vma()
# --------------------------------------------
# 02/09/29	wli@elm3b17.eng.beaverton.ibm.com	1.639
# inode.c:
#   hugetlbfs fixes.
# Makefile:
#   hugetlbfs makefile update
# Config.in:
#   Add in a hugetlbfs option.
# Makefile:
#   Add in hugetlbfs.
# hugetlbpage.c:
#   Lots of prefaulter fixes.
# --------------------------------------------
#
diff --minimal -Nru a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
--- a/arch/i386/mm/hugetlbpage.c	Sun Sep 29 14:48:46 2002
+++ b/arch/i386/mm/hugetlbpage.c	Sun Sep 29 14:48:46 2002
@@ -17,7 +17,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
-static struct vm_operations_struct hugetlb_vm_ops;
+struct vm_operations_struct hugetlb_vm_ops;
 struct list_head htlbpage_freelist;
 spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
 extern long htlbpagemem;
@@ -44,24 +44,22 @@
 static struct page *
 alloc_hugetlb_page(void)
 {
-	struct list_head *curr, *head;
+	int i;
 	struct page *page;
 
 	spin_lock(&htlbpage_lock);
-
-	head = &htlbpage_freelist;
-	curr = head->next;
-
-	if (curr == head) {
+	if (list_empty(&htlbpage_freelist)) {
 		spin_unlock(&htlbpage_lock);
 		return NULL;
 	}
-	page = list_entry(curr, struct page, list);
-	list_del(curr);
+
+	page = list_entry(htlbpage_freelist.next, struct page, list);
+	list_del(&page->list);
 	htlbpagemem--;
 	spin_unlock(&htlbpage_lock);
 	set_page_count(page, 1);
-	memset(page_address(page), 0, HPAGE_SIZE);
+	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
+		clear_highpage(&page[i]);
 	return page;
 }
 
@@ -459,6 +457,61 @@
 	 return retval;
 }
 
+int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret = 0;
+
+	printk("hugetlb_prefault()\n");
+
+	BUG_ON(vma->vm_start & ~HPAGE_MASK);
+	BUG_ON(vma->vm_end & ~HPAGE_MASK);
+
+	printk("pgoff=%lx, start=%lx, end=%lx\n",
+			vma->vm_pgoff,
+			vma->vm_start,
+			vma->vm_end);
+	printk("htlbpages needed = %lu\n",
+			(vma->vm_end - vma->vm_start)/HPAGE_SIZE);
+
+	spin_lock(&mm->page_table_lock);
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+		unsigned long idx;
+		pte_t *pte = huge_pte_alloc(mm, addr);
+		struct page *page;
+
+		if (!pte) {
+			ret = -ENOMEM;
+			BUG();
+			goto out;
+		}
+		if (!pte_none(*pte)) {
+			BUG();
+			continue;
+		}
+
+		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+		printk("idx=%lx, addr=%lx\n", idx, addr);
+		page = find_get_page(mapping, idx);
+		if (!page) {
+			page = alloc_hugetlb_page();
+			if (!page) {
+				ret = -ENOMEM;
+				BUG();
+				goto out;
+			}
+			add_to_page_cache(page, mapping, idx);
+		}
+		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+	}
+	printk("dropped out of loop with addr = %lx\n", addr);
+out:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
 static int
 alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len,
 			    int prot, int flag)
@@ -540,6 +593,13 @@
 	return (int) htlbzone_pages;
 }
 
-static struct vm_operations_struct hugetlb_vm_ops = {
+static struct page * hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int unused)
+{
+	BUG();
+	return NULL;
+}
+
+struct vm_operations_struct hugetlb_vm_ops = {
 	.close	= zap_hugetlb_resources,
+	.nopage = hugetlb_nopage,
 };
diff --minimal -Nru a/fs/Config.in b/fs/Config.in
--- a/fs/Config.in	Sun Sep 29 14:48:46 2002
+++ b/fs/Config.in	Sun Sep 29 14:48:46 2002
@@ -56,6 +56,11 @@
 bool 'Virtual memory file system support (former shm fs)' CONFIG_TMPFS
 define_bool CONFIG_RAMFS y
 
+if [ "$CONFIG_HUGETLB_PAGE" = "y" ] ; then
+   bool 'HugeTLB file system support' CONFIG_HUGETLBFS
+   define_bool CONFIG_HUGETLBFS y
+fi
+
 tristate 'ISO 9660 CDROM file system support' CONFIG_ISO9660_FS
 dep_mbool '  Microsoft Joliet CDROM extensions' CONFIG_JOLIET $CONFIG_ISO9660_FS
 dep_mbool '  Transparent decompression extension' CONFIG_ZISOFS $CONFIG_ISO9660_FS
diff --minimal -Nru a/fs/Makefile b/fs/Makefile
--- a/fs/Makefile	Sun Sep 29 14:48:46 2002
+++ b/fs/Makefile	Sun Sep 29 14:48:46 2002
@@ -46,6 +46,7 @@
 obj-$(CONFIG_EXT2_FS)		+= ext2/
 obj-$(CONFIG_CRAMFS)		+= cramfs/
 obj-$(CONFIG_RAMFS)		+= ramfs/
+obj-$(CONFIG_HUGETLBFS)		+= hugetlbfs/
 obj-$(CONFIG_CODA_FS)		+= coda/
 obj-$(CONFIG_INTERMEZZO_FS)	+= intermezzo/
 obj-$(CONFIG_MINIX_FS)		+= minix/
diff --minimal -Nru a/fs/hugetlbfs/Makefile b/fs/hugetlbfs/Makefile
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/hugetlbfs/Makefile	Sun Sep 29 14:48:46 2002
@@ -0,0 +1,9 @@
+#
+# Makefile for the linux ramfs routines.
+#
+
+obj-$(CONFIG_HUGETLBFS) += hugetlbfs.o
+
+hugetlbfs-objs := inode.o
+
+include $(TOPDIR)/Rules.make
diff --minimal -Nru a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/hugetlbfs/inode.c	Sun Sep 29 14:48:46 2002
@@ -0,0 +1,382 @@
+/*
+ * Resizable simple ram filesystem for Linux.
+ *
+ * Copyright (C) 2000 Linus Torvalds.
+ *               2000 Transmeta Corp.
+ *
+ * Usage limits added by David Gibson, Linuxcare Australia.
+ * This file is released under the GPL.
+ */
+
+/*
+ * NOTE! This filesystem is probably most useful
+ * not as a real filesystem, but as an example of
+ * how virtual filesystems can be written.
+ *
+ * It doesn't get much simpler than this. Consider
+ * that this file implements the full semantics of
+ * a POSIX-compliant read-write filesystem.
+ *
+ * Note in particular how the filesystem does not
+ * need to implement any data structures of its own
+ * to keep track of the virtual data: using the VFS
+ * caches is sufficient.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/backing-dev.h>
+
+#include <asm/uaccess.h>
+
+/* some random number */
+#define HUGETLBFS_MAGIC	0x958458f6
+
+static struct super_operations hugetlbfs_ops;
+static struct address_space_operations hugetlbfs_aops;
+struct file_operations hugetlbfs_file_operations;
+static struct inode_operations hugetlbfs_dir_inode_operations;
+
+static struct backing_dev_info hugetlbfs_backing_dev_info = {
+	.ra_pages	= 0,	/* No readahead */
+	.memory_backed	= 1,	/* Does not contribute to dirty memory */
+};
+
+static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct inode *inode =file->f_dentry->d_inode;
+	struct address_space *mapping = inode->i_mapping;
+	int ret;
+
+	printk("inode=%p\n", inode);
+
+	down(&inode->i_sem);
+
+	UPDATE_ATIME(inode);
+	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
+	vma->vm_ops = &hugetlb_vm_ops;
+	printk("hugetlbfs_file_mmap()\n");
+	ret = hugetlb_prefault(mapping, vma);
+
+	up(&inode->i_sem);
+
+	return ret;
+}
+
+/*
+ * Read a page. Again trivial. If it didn't already exist
+ * in the page cache, it is zero-filled.
+ */
+static int hugetlbfs_readpage(struct file *file, struct page * page)
+{
+	BUG();
+	if (!PageUptodate(page)) {
+		char *kaddr = kmap_atomic(page, KM_USER0);
+
+		memset(kaddr, 0, PAGE_CACHE_SIZE);
+		kunmap_atomic(kaddr, KM_USER0);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+	}
+	unlock_page(page);
+	return 0;
+}
+
+static int hugetlbfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+	BUG();
+	if (!PageUptodate(page)) {
+		char *kaddr = kmap_atomic(page, KM_USER0);
+
+		memset(kaddr, 0, PAGE_CACHE_SIZE);
+		flush_dcache_page(page);
+		kunmap_atomic(kaddr, KM_USER0);
+		SetPageUptodate(page);
+	}
+	set_page_dirty(page);
+	return 0;
+}
+
+static int hugetlbfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+	BUG();
+
+	if (pos > inode->i_size)
+		inode->i_size = pos;
+	return 0;
+}
+
+struct inode *hugetlbfs_get_inode(struct super_block *sb, int mode, int dev)
+{
+	struct inode * inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = current->fsuid;
+		inode->i_gid = current->fsgid;
+		inode->i_blksize = PAGE_CACHE_SIZE;
+		inode->i_blocks = 0;
+		inode->i_rdev = NODEV;
+		inode->i_mapping->a_ops = &hugetlbfs_aops;
+		inode->i_mapping->backing_dev_info = &hugetlbfs_backing_dev_info;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		switch (mode & S_IFMT) {
+		default:
+			init_special_inode(inode, mode, dev);
+			break;
+		case S_IFREG:
+			inode->i_fop = &hugetlbfs_file_operations;
+			break;
+		case S_IFDIR:
+			inode->i_op = &hugetlbfs_dir_inode_operations;
+			inode->i_fop = &simple_dir_operations;
+
+			/* directory inodes start off with i_nlink == 2 (for "." entry) */
+			inode->i_nlink++;
+			break;
+		case S_IFLNK:
+			inode->i_op = &page_symlink_inode_operations;
+			break;
+		}
+	}
+	return inode;
+}
+
+/*
+ * File creation. Allocate an inode, and we're done..
+ */
+/* SMP-safe */
+static int hugetlbfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
+{
+	struct inode * inode = hugetlbfs_get_inode(dir->i_sb, mode, dev);
+	int error = -ENOSPC;
+
+	if (inode) {
+		d_instantiate(dentry, inode);
+		dget(dentry);		/* Extra count - pin the dentry in core */
+		error = 0;
+	}
+	return error;
+}
+
+static int hugetlbfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+	int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
+	if (!retval)
+		dir->i_nlink++;
+	return retval;
+}
+
+static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode)
+{
+	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
+}
+
+/*
+ * Link a file..
+ */
+static int hugetlbfs_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry)
+{
+	struct inode *inode = old_dentry->d_inode;
+
+	inode->i_nlink++;
+	atomic_inc(&inode->i_count);	/* New dentry reference */
+	dget(dentry);		/* Extra pinning count for the created dentry */
+	d_instantiate(dentry, inode);
+	return 0;
+}
+
+static inline int hugetlbfs_positive(struct dentry *dentry)
+{
+	return dentry->d_inode && !d_unhashed(dentry);
+}
+
+/*
+ * Check that a directory is empty (this works
+ * for regular files too, they'll just always be
+ * considered empty..).
+ *
+ * Note that an empty directory can still have
+ * children, they just all have to be negative..
+ */
+static int hugetlbfs_empty(struct dentry *dentry)
+{
+	struct list_head *list;
+
+	spin_lock(&dcache_lock);
+	list = dentry->d_subdirs.next;
+
+	while (list != &dentry->d_subdirs) {
+		struct dentry *de = list_entry(list, struct dentry, d_child);
+
+		if (hugetlbfs_positive(de)) {
+			spin_unlock(&dcache_lock);
+			return 0;
+		}
+		list = list->next;
+	}
+	spin_unlock(&dcache_lock);
+	return 1;
+}
+
+/*
+ * Unlink a hugetlbfs entry
+ */
+static int hugetlbfs_unlink(struct inode * dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+
+	inode->i_nlink--;
+	dput(dentry);			/* Undo the count from "create" - this does all the work */
+	return 0;
+}
+
+static int hugetlbfs_rmdir(struct inode * dir, struct dentry *dentry)
+{
+	int retval = -ENOTEMPTY;
+
+	if (hugetlbfs_empty(dentry)) {
+		dentry->d_inode->i_nlink--;
+		hugetlbfs_unlink(dir, dentry);
+		dir->i_nlink--;
+		retval = 0;
+	}
+	return retval;
+}
+
+/*
+ * The VFS layer already does all the dentry stuff for rename,
+ * we just have to decrement the usage count for the target if
+ * it exists so that the VFS layer correctly free's it when it
+ * gets overwritten.
+ */
+static int hugetlbfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry)
+{
+	int error = -ENOTEMPTY;
+
+	if (hugetlbfs_empty(new_dentry)) {
+		struct inode *inode = new_dentry->d_inode;
+		if (inode) {
+			inode->i_nlink--;
+			dput(new_dentry);
+		}
+		if (S_ISDIR(old_dentry->d_inode->i_mode)) {
+			old_dir->i_nlink--;
+			new_dir->i_nlink++;
+		}
+		error = 0;
+	}
+	return error;
+}
+
+static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
+{
+	struct inode *inode;
+	int error = -ENOSPC;
+
+	inode = hugetlbfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+	if (inode) {
+		int l = strlen(symname)+1;
+		error = page_symlink(inode, symname, l);
+		if (!error) {
+			d_instantiate(dentry, inode);
+			dget(dentry);
+		} else
+			iput(inode);
+	}
+	return error;
+}
+
+static int hugetlbfs_sync_file(struct file * file, struct dentry *dentry, int datasync)
+{
+	return 0;
+}
+
+static struct address_space_operations hugetlbfs_aops = {
+	readpage:	hugetlbfs_readpage,
+	writepage:	fail_writepage,
+	prepare_write:	hugetlbfs_prepare_write,
+	commit_write:	hugetlbfs_commit_write
+};
+
+struct file_operations hugetlbfs_file_operations = {
+	read:		generic_file_read,
+	write:		generic_file_write,
+	mmap:		hugetlbfs_file_mmap,
+	fsync:		hugetlbfs_sync_file,
+	sendfile:	generic_file_sendfile,
+};
+
+static struct inode_operations hugetlbfs_dir_inode_operations = {
+	create:		hugetlbfs_create,
+	lookup:		simple_lookup,
+	link:		hugetlbfs_link,
+	unlink:		hugetlbfs_unlink,
+	symlink:	hugetlbfs_symlink,
+	mkdir:		hugetlbfs_mkdir,
+	rmdir:		hugetlbfs_rmdir,
+	mknod:		hugetlbfs_mknod,
+	rename:		hugetlbfs_rename,
+};
+
+static struct super_operations hugetlbfs_ops = {
+	statfs:		simple_statfs,
+	drop_inode:	generic_delete_inode,
+};
+
+static int hugetlbfs_fill_super(struct super_block * sb, void * data, int silent)
+{
+	struct inode * inode;
+	struct dentry * root;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = HUGETLBFS_MAGIC;
+	sb->s_op = &hugetlbfs_ops;
+	inode = hugetlbfs_get_inode(sb, S_IFDIR | 0755, 0);
+	if (!inode)
+		return -ENOMEM;
+
+	root = d_alloc_root(inode);
+	if (!root) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	sb->s_root = root;
+	return 0;
+}
+
+static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type,
+	int flags, char *dev_name, void *data)
+{
+	return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super);
+}
+
+static struct file_system_type hugetlbfs_fs_type = {
+	name:		"hugetlbfs",
+	get_sb:		hugetlbfs_get_sb,
+	kill_sb:	kill_litter_super,
+};
+
+static int __init init_hugetlbfs_fs(void)
+{
+	return register_filesystem(&hugetlbfs_fs_type);
+}
+
+static void __exit exit_hugetlbfs_fs(void)
+{
+	unregister_filesystem(&hugetlbfs_fs_type);
+}
+
+module_init(init_hugetlbfs_fs)
+module_exit(exit_hugetlbfs_fs)
+
+MODULE_LICENSE("GPL");