From: Mingming Cao <cmm@us.ibm.com>

ext3_rsv_mount.patch: Adds features on top of the

ext3_rsv_base.patch: 

- deal with earlier bogus -ENOSPC error

- do block reservation only for regular file 

- make the ext3 reservation feature as a mount option: new mount option
  added: reservation

- A pair of file ioctl commands are added for application to control the
  block reservation window size.


---

 25-akpm/fs/ext3/balloc.c          |   61 ++++++++++++++++++++++++++++----------
 25-akpm/fs/ext3/ialloc.c          |    2 -
 25-akpm/fs/ext3/inode.c           |    2 -
 25-akpm/fs/ext3/ioctl.c           |   20 ++++++++++++
 25-akpm/fs/ext3/super.c           |   20 +++++++++++-
 25-akpm/include/linux/ext3_fs.h   |   42 ++++++++++++++------------
 25-akpm/include/linux/ext3_fs_i.h |    2 -
 7 files changed, 111 insertions(+), 38 deletions(-)

diff -puN fs/ext3/balloc.c~ext3_rsv_mount fs/ext3/balloc.c
--- 25/fs/ext3/balloc.c~ext3_rsv_mount	Wed Apr 14 17:05:45 2004
+++ 25-akpm/fs/ext3/balloc.c	Wed Apr 14 17:05:45 2004
@@ -714,7 +714,7 @@ static int alloc_new_reservation(struct 
 	else
 		start_block = goal + group_first_block;
 
-	size = my_rsv->rsv_goal_size;
+	size = atomic_read(&my_rsv->rsv_goal_size);
 	/* if we have a old reservation, discard it first */
 	if (!rsv_is_empty(my_rsv)) {
 		/*
@@ -862,7 +862,16 @@ ext3_try_to_allocate_with_rsv(struct sup
 		return -1;
 	}
 
-#ifdef EXT3_RESERVATION
+	/*
+	 * we don't deal with reservation when
+	 * filesystem is mounted without reservation
+	 * or the file is not a regular file
+	 * of last attemp of allocating a block with reservation turn on failed
+	 */
+	if (my_rsv == NULL ) {
+		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
+		goto out;
+	}
 	rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
 	/*
 	 * goal is a group relative block number (if there is a goal)
@@ -906,10 +915,7 @@ ext3_try_to_allocate_with_rsv(struct sup
 		if (ret >= 0)
 			break;				/* succeed */
 	}
-#else
-	ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
-#endif
-
+out:
 	if (ret >= 0) {
 		BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
 					"bitmap block");
@@ -937,12 +943,13 @@ ext3_try_to_allocate_with_rsv(struct sup
 int ext3_new_block(handle_t *handle, struct inode *inode,
 			unsigned long goal, int *errp)
 {
-	struct buffer_head *bitmap_bh = NULL;	/* bh */
-	struct buffer_head *gdp_bh;		/* bh2 */
-	int group_no;				/* i */
-	int ret_block;				/* j */
-	int bgi;				/* blockgroup iteration index */
-	int target_block;			/* tmp */
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *gdp_bh;
+	int group_no;
+	int goal_group;
+	int ret_block;
+	int bgi;			/* blockgroup iteration index */
+	int target_block;
 	int fatal = 0, err;
 	int performed_allocation = 0;
 	int free_blocks, root_blocks;
@@ -950,7 +957,7 @@ int ext3_new_block(handle_t *handle, str
 	struct ext3_group_desc *gdp;
 	struct ext3_super_block *es;
 	struct ext3_sb_info *sbi;
-	struct reserve_window *my_rsv = &EXT3_I(inode)->i_rsv_window;
+	struct reserve_window *my_rsv = NULL;
 #ifdef EXT3FS_DEBUG
 	static int goal_hits, goal_attempts;
 #endif
@@ -972,7 +979,10 @@ int ext3_new_block(handle_t *handle, str
 	sbi = EXT3_SB(sb);
 	es = EXT3_SB(sb)->s_es;
 	ext3_debug("goal=%lu.\n", goal);
-
+#ifdef EXT3_RESERVATION
+	if (test_opt(sb, RESERVATION) && S_ISREG(inode->i_mode))
+		my_rsv = &EXT3_I(inode)->i_rsv_window;
+#endif
 	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
 	root_blocks = le32_to_cpu(es->s_r_blocks_count);
 	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
@@ -994,6 +1004,8 @@ int ext3_new_block(handle_t *handle, str
 	if (!gdp)
 		goto io_error;
 
+	goal_group = group_no;
+retry:
 	free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
 	if (free_blocks > 0) {
 		ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
@@ -1037,7 +1049,26 @@ int ext3_new_block(handle_t *handle, str
 		if (ret_block >= 0) 
 			goto allocated;
 	}
-
+#ifdef EXT3_RESERVATION
+	/*
+	 * We may end up a bogus ealier ENOSPC error due to
+	 * filesystem is "full" of reservations, but
+	 * there maybe indeed free blocks avaliable on disk
+	 * In this case, we just forget about the reservations
+	 * just do block allocation as without reservations.
+	 */
+	if (my_rsv) {
+#ifdef EXT3_RESERVATION_DEBUG
+		printk("filesystem is fully reserved. Actual free blocks: %d. "
+			"Try to do allocation without reservation, goal_group "
+			"is %d\n",
+			free_blocks, goal_group);
+#endif
+		my_rsv = NULL;
+		group_no = goal_group;
+		goto retry;
+	}
+#endif
 	/* No space left on the device */
 	*errp = -ENOSPC;
 	goto out;
diff -puN fs/ext3/ialloc.c~ext3_rsv_mount fs/ext3/ialloc.c
--- 25/fs/ext3/ialloc.c~ext3_rsv_mount	Wed Apr 14 17:05:45 2004
+++ 25-akpm/fs/ext3/ialloc.c	Wed Apr 14 17:05:45 2004
@@ -583,7 +583,7 @@ got:
 	ei->i_dtime = 0;
 	ei->i_rsv_window.rsv_start = 0;
 	ei->i_rsv_window.rsv_end= 0;
-	ei->i_rsv_window.rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
+	atomic_set(&ei->i_rsv_window.rsv_goal_size, EXT3_DEFAULT_RESERVE_BLOCKS);
 	INIT_LIST_HEAD(&ei->i_rsv_window.rsv_list);
 	ei->i_block_group = group;
 
diff -puN fs/ext3/inode.c~ext3_rsv_mount fs/ext3/inode.c
--- 25/fs/ext3/inode.c~ext3_rsv_mount	Wed Apr 14 17:05:45 2004
+++ 25-akpm/fs/ext3/inode.c	Wed Apr 14 17:05:45 2004
@@ -2450,7 +2450,7 @@ void ext3_read_inode(struct inode * inod
 	ei->i_block_group = iloc.block_group;
 	ei->i_rsv_window.rsv_start = 0;
 	ei->i_rsv_window.rsv_end= 0;
-	ei->i_rsv_window.rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
+	atomic_set(&ei->i_rsv_window.rsv_goal_size, EXT3_DEFAULT_RESERVE_BLOCKS);
 	INIT_LIST_HEAD(&ei->i_rsv_window.rsv_list);
 	/*
 	 * NOTE! The in-memory inode i_data array is in little-endian order
diff -puN fs/ext3/ioctl.c~ext3_rsv_mount fs/ext3/ioctl.c
--- 25/fs/ext3/ioctl.c~ext3_rsv_mount	Wed Apr 14 17:05:45 2004
+++ 25-akpm/fs/ext3/ioctl.c	Wed Apr 14 17:05:45 2004
@@ -20,6 +20,7 @@ int ext3_ioctl (struct inode * inode, st
 {
 	struct ext3_inode_info *ei = EXT3_I(inode);
 	unsigned int flags;
+	unsigned short rsv_window_size;
 
 	ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
 
@@ -151,6 +152,25 @@ flags_err:
 			return ret;
 		}
 #endif
+#ifdef EXT3_RESERVATION
+	case EXT3_IOC_GETRSVSZ:
+		rsv_window_size = atomic_read(&ei->i_rsv_window.rsv_goal_size);
+		return put_user(rsv_window_size, (int *)arg);
+	case EXT3_IOC_SETRSVSZ:
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EACCES;
+
+		if (get_user(rsv_window_size, (int *)arg))
+			return -EFAULT;
+
+		if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
+			rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
+		atomic_set(&ei->i_rsv_window.rsv_goal_size, rsv_window_size);
+		return 0;
+#endif
 	default:
 		return -ENOTTY;
 	}
diff -puN fs/ext3/super.c~ext3_rsv_mount fs/ext3/super.c
--- 25/fs/ext3/super.c~ext3_rsv_mount	Wed Apr 14 17:05:45 2004
+++ 25-akpm/fs/ext3/super.c	Wed Apr 14 17:05:45 2004
@@ -572,7 +572,8 @@ enum {
 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
-	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
+	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+	Opt_reservation, Opt_noreservation, Opt_noload,
 	Opt_commit, Opt_journal_update, Opt_journal_inum,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -604,6 +605,8 @@ static match_table_t tokens = {
 	{Opt_nouser_xattr, "nouser_xattr"},
 	{Opt_acl, "acl"},
 	{Opt_noacl, "noacl"},
+	{Opt_reservation, "reservation"},
+	{Opt_noreservation, "noreservation"},
 	{Opt_noload, "noload"},
 	{Opt_commit, "commit=%u"},
 	{Opt_journal_update, "journal=update"},
@@ -757,6 +760,19 @@ static int parse_options (char * options
 			printk("EXT3 (no)acl options not supported\n");
 			break;
 #endif
+#ifdef EXT3_RESERVATION
+		case Opt_reservation:
+			set_opt(sbi->s_mount_opt, RESERVATION);
+			break;
+		case Opt_noreservation:
+			clear_opt(sbi->s_mount_opt, RESERVATION);
+			break;
+#else
+		case Opt_reservation:
+		case Opt_noreservation:
+			printk("EXT3 block reservation options not supported\n");
+			break;
+#endif
 		case Opt_journal_update:
 			/* @@@ FIXME */
 			/* Eventually we will want to be able to create
@@ -1456,7 +1472,7 @@ static int ext3_fill_super (struct super
 	INIT_LIST_HEAD(&sbi->s_rsv_window_head.rsv_list);
 	sbi->s_rsv_window_head.rsv_start = 0;
 	sbi->s_rsv_window_head.rsv_end = 0;
-	sbi->s_rsv_window_head.rsv_goal_size = 0;
+	atomic_set(&sbi->s_rsv_window_head.rsv_goal_size, 0);
 
 	/*
 	 * set up enough so that it can read an inode
diff -puN include/linux/ext3_fs.h~ext3_rsv_mount include/linux/ext3_fs.h
--- 25/include/linux/ext3_fs.h~ext3_rsv_mount	Wed Apr 14 17:05:45 2004
+++ 25-akpm/include/linux/ext3_fs.h	Wed Apr 14 17:05:45 2004
@@ -37,6 +37,7 @@ struct statfs;
  */
 #define EXT3_RESERVATION
 #define EXT3_DEFAULT_RESERVE_BLOCKS     8
+#define EXT3_MAX_RESERVE_BLOCKS         1024
 /*
  * Always enable hashed directories
  */
@@ -207,6 +208,10 @@ struct ext3_group_desc
 #ifdef CONFIG_JBD_DEBUG
 #define EXT3_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
 #endif
+#ifdef EXT3_RESERVATION
+#define EXT3_IOC_GETRSVSZ		_IOR('r', 1, long)
+#define EXT3_IOC_SETRSVSZ		_IOW('r', 2, long)
+#endif
 
 /*
  * Structure of an inode on the disk
@@ -305,24 +310,25 @@ struct ext3_inode {
 /*
  * Mount flags
  */
-#define EXT3_MOUNT_CHECK		0x0001	/* Do mount-time checks */
-#define EXT3_MOUNT_OLDALLOC		0x0002  /* Don't use the new Orlov allocator */
-#define EXT3_MOUNT_GRPID		0x0004	/* Create files with directory's group */
-#define EXT3_MOUNT_DEBUG		0x0008	/* Some debugging messages */
-#define EXT3_MOUNT_ERRORS_CONT		0x0010	/* Continue on errors */
-#define EXT3_MOUNT_ERRORS_RO		0x0020	/* Remount fs ro on errors */
-#define EXT3_MOUNT_ERRORS_PANIC		0x0040	/* Panic on errors */
-#define EXT3_MOUNT_MINIX_DF		0x0080	/* Mimics the Minix statfs */
-#define EXT3_MOUNT_NOLOAD		0x0100	/* Don't use existing journal*/
-#define EXT3_MOUNT_ABORT		0x0200	/* Fatal error detected */
-#define EXT3_MOUNT_DATA_FLAGS		0x0C00	/* Mode for data writes: */
-  #define EXT3_MOUNT_JOURNAL_DATA	0x0400	/* Write data to journal */
-  #define EXT3_MOUNT_ORDERED_DATA	0x0800	/* Flush data before commit */
-  #define EXT3_MOUNT_WRITEBACK_DATA	0x0C00	/* No data ordering */
-#define EXT3_MOUNT_UPDATE_JOURNAL	0x1000	/* Update the journal format */
-#define EXT3_MOUNT_NO_UID32		0x2000  /* Disable 32-bit UIDs */
-#define EXT3_MOUNT_XATTR_USER		0x4000	/* Extended user attributes */
-#define EXT3_MOUNT_POSIX_ACL		0x8000	/* POSIX Access Control Lists */
+#define EXT3_MOUNT_CHECK		0x00001	/* Do mount-time checks */
+#define EXT3_MOUNT_OLDALLOC		0x00002  /* Don't use the new Orlov allocator */
+#define EXT3_MOUNT_GRPID		0x00004	/* Create files with directory's group */
+#define EXT3_MOUNT_DEBUG		0x00008	/* Some debugging messages */
+#define EXT3_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
+#define EXT3_MOUNT_ERRORS_RO		0x00020	/* Remount fs ro on errors */
+#define EXT3_MOUNT_ERRORS_PANIC		0x00040	/* Panic on errors */
+#define EXT3_MOUNT_MINIX_DF		0x00080	/* Mimics the Minix statfs */
+#define EXT3_MOUNT_NOLOAD		0x00100	/* Don't use existing journal*/
+#define EXT3_MOUNT_ABORT		0x00200	/* Fatal error detected */
+#define EXT3_MOUNT_DATA_FLAGS		0x00C00	/* Mode for data writes: */
+#define EXT3_MOUNT_JOURNAL_DATA		0x00400	/* Write data to journal */
+#define EXT3_MOUNT_ORDERED_DATA		0x00800	/* Flush data before commit */
+#define EXT3_MOUNT_WRITEBACK_DATA	0x00C00	/* No data ordering */
+#define EXT3_MOUNT_UPDATE_JOURNAL	0x01000	/* Update the journal format */
+#define EXT3_MOUNT_NO_UID32		0x02000  /* Disable 32-bit UIDs */
+#define EXT3_MOUNT_XATTR_USER		0x04000	/* Extended user attributes */
+#define EXT3_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
+#define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
diff -puN include/linux/ext3_fs_i.h~ext3_rsv_mount include/linux/ext3_fs_i.h
--- 25/include/linux/ext3_fs_i.h~ext3_rsv_mount	Wed Apr 14 17:05:45 2004
+++ 25-akpm/include/linux/ext3_fs_i.h	Wed Apr 14 17:05:45 2004
@@ -22,7 +22,7 @@ struct reserve_window {
 	struct list_head 	rsv_list;
 	__u32			rsv_start;
 	__u32			rsv_end;
-	unsigned short		rsv_goal_size;
+	atomic_t		rsv_goal_size;
 };
 
 /*

_