From: Andrea Arcangeli <andrea@suse.de>

I don't think we need an install_swap_bdev/remove_swap_bdev anymore, we should
use the swap_info->bdev, not the swap_bdevs.  the swap_info already has a
->bdev field, the only point of remove_swap_bdev/install_swap_bdev was to
unplug all devices as efficiently as possible, we don't need that anymore with
the page parameter.

Plus the semaphore should be a rwsem to allow parallel unplug from multiple
pages.

After that I don't need to take the semaphore anymore during swapon, no
swapcache with swp_type() pointing to such bdev, will be allowed until swapon
is complete (SWP_ACTIVE is set a lot later after setting p->bdev).

In swapoff I only need a dummy serialization with the readers, after
try_to_unuse is complete:

 	err = try_to_unuse(type);
 	current->flags &= ~PF_SWAPOFF;

 	/* wait for any unplug function to finish */
 	down_write(&swap_unplug_sem);
 	up_write(&swap_unplug_sem);


that's all, no other locking and no install_swap_bdev/remove_swap_bdev.

(and the swap_bdevs[] compression code was busted)


---

 25-akpm/mm/swapfile.c       |   74 ++++++++++++++------------------------------
 drivers/block/ll_rw_blk.c   |    0 
 fs/buffer.c                 |    0 
 include/linux/backing-dev.h |    0 
 include/linux/blkdev.h      |    0 
 include/linux/swap.h        |    0 
 mm/readahead.c              |    0 
 7 files changed, 25 insertions(+), 49 deletions(-)

diff -puN drivers/block/ll_rw_blk.c~swap-speedups-and-fix drivers/block/ll_rw_blk.c
diff -puN fs/buffer.c~swap-speedups-and-fix fs/buffer.c
diff -puN include/linux/backing-dev.h~swap-speedups-and-fix include/linux/backing-dev.h
diff -puN include/linux/blkdev.h~swap-speedups-and-fix include/linux/blkdev.h
diff -puN include/linux/swap.h~swap-speedups-and-fix include/linux/swap.h
diff -puN mm/readahead.c~swap-speedups-and-fix mm/readahead.c
diff -puN mm/swapfile.c~swap-speedups-and-fix mm/swapfile.c
--- 25/mm/swapfile.c~swap-speedups-and-fix	2004-05-12 21:01:25.758751368 -0700
+++ 25-akpm/mm/swapfile.c	2004-05-12 21:01:25.762750760 -0700
@@ -48,61 +48,38 @@ struct swap_info_struct swap_info[MAX_SW
 static DECLARE_MUTEX(swapon_sem);
 
 /*
- * Array of backing blockdevs, for swap_unplug_fn.  We need this because the
- * bdev->unplug_fn can sleep and we cannot hold swap_list_lock while calling
- * the unplug_fn.  And swap_list_lock cannot be turned into a semaphore.
+ * We need this because the bdev->unplug_fn can sleep and we cannot
+ * hold swap_list_lock while calling the unplug_fn. And swap_list_lock
+ * cannot be turned into a semaphore.
  */
-static DECLARE_MUTEX(swap_bdevs_sem);
-static struct block_device *swap_bdevs[MAX_SWAPFILES];
+static DECLARE_RWSEM(swap_unplug_sem);
 
 #define SWAPFILE_CLUSTER 256
 
-/*
- * Caller holds swap_bdevs_sem
- */
-static void install_swap_bdev(struct block_device *bdev)
-{
-	int i;
-
-	for (i = 0; i < MAX_SWAPFILES; i++) {
-		if (swap_bdevs[i] == NULL) {
-			swap_bdevs[i] = bdev;
-			return;
-		}
-	}
-	BUG();
-}
-
-static void remove_swap_bdev(struct block_device *bdev)
-{
-	int i;
-
-	for (i = 0; i < MAX_SWAPFILES; i++) {
-		if (swap_bdevs[i] == bdev) {
-			memcpy(&swap_bdevs[i], &swap_bdevs[i + 1],
-				(MAX_SWAPFILES - i - 1) * sizeof(*swap_bdevs));
-			swap_bdevs[MAX_SWAPFILES - 1] = NULL;
-			return;
-		}
-	}
-	BUG();
-}
-
 void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
 {
-	int i;
+	swp_entry_t entry;
 
-	down(&swap_bdevs_sem);
-	for (i = 0; i < MAX_SWAPFILES; i++) {
-		struct block_device *bdev = swap_bdevs[i];
+	down_read(&swap_unplug_sem);
+	entry.val = page->private;
+	if (PageSwapCache(page)) {
+		struct block_device *bdev = swap_info[swp_type(entry)].bdev;
 		struct backing_dev_info *bdi;
 
-		if (bdev == NULL)
-			break;
+		/*
+		 * If the page is removed from swapcache from under us (with a
+		 * racy try_to_unuse/swapoff) we need an additional reference
+		 * count to avoid reading garbage from page->private above. If
+		 * the WARN_ON triggers during a swapoff it maybe the race
+		 * condition and it's harmless. However if it triggers without
+		 * swapoff it signals a problem.
+		 */
+		WARN_ON(page_count(page) <= 1);
+
 		bdi = bdev->bd_inode->i_mapping->backing_dev_info;
 		bdi->unplug_io_fn(bdi, page);
 	}
-	up(&swap_bdevs_sem);
+	up_read(&swap_unplug_sem);
 }
 
 static inline int scan_swap_map(struct swap_info_struct *si)
@@ -1136,6 +1113,11 @@ asmlinkage long sys_swapoff(const char _
 	current->flags |= PF_SWAPOFF;
 	err = try_to_unuse(type);
 	current->flags &= ~PF_SWAPOFF;
+
+	/* wait for any unplug function to finish */
+	down_write(&swap_unplug_sem);
+	up_write(&swap_unplug_sem);
+
 	if (err) {
 		/* re-insert swap space back into swap_list */
 		swap_list_lock();
@@ -1154,7 +1136,6 @@ asmlinkage long sys_swapoff(const char _
 		goto out_dput;
 	}
 	down(&swapon_sem);
-	down(&swap_bdevs_sem);
 	swap_list_lock();
 	swap_device_lock(p);
 	swap_file = p->swap_file;
@@ -1166,8 +1147,6 @@ asmlinkage long sys_swapoff(const char _
 	destroy_swap_extents(p);
 	swap_device_unlock(p);
 	swap_list_unlock();
-	remove_swap_bdev(p->bdev);
-	up(&swap_bdevs_sem);
 	up(&swapon_sem);
 	vfree(swap_map);
 	if (S_ISBLK(mapping->host->i_mode)) {
@@ -1511,7 +1490,6 @@ asmlinkage long sys_swapon(const char __
 		goto bad_swap;
 
 	down(&swapon_sem);
-	down(&swap_bdevs_sem);
 	swap_list_lock();
 	swap_device_lock(p);
 	p->flags = SWP_ACTIVE;
@@ -1537,8 +1515,6 @@ asmlinkage long sys_swapon(const char __
 	}
 	swap_device_unlock(p);
 	swap_list_unlock();
-	install_swap_bdev(p->bdev);
-	up(&swap_bdevs_sem);
 	up(&swapon_sem);
 	error = 0;
 	goto out;

_