From: Neil Brown <neilb@cse.unsw.edu.au>

Fix bug #2661

Raid currently calls ->unplug_fn under spin_lock_irqsave(), but unplug_fns
can sleep.


After a morning of scratching my head and trying to come up with some that
does less locking, the following is the best I can come up with.  I'm not
proud of it but it should work.

If I move "nr_pending" out or rdev into the per-personality structures
(e.g.  mirror_info), and if I had "atomic_inc_if_nonzero" I could do with
without locking so much, but random atomic* functions don't seem trivial


---

 25-akpm/drivers/md/multipath.c |    9 +++++++++
 25-akpm/drivers/md/raid1.c     |    8 +++++++-
 25-akpm/drivers/md/raid5.c     |   21 ++++++++++++++-------
 25-akpm/drivers/md/raid6main.c |   22 ++++++++++++++--------
 4 files changed, 44 insertions(+), 16 deletions(-)

diff -puN drivers/md/multipath.c~raid-locking-fix drivers/md/multipath.c
--- 25/drivers/md/multipath.c~raid-locking-fix	2004-05-10 19:32:17.734367632 -0700
+++ 25-akpm/drivers/md/multipath.c	2004-05-10 19:32:17.743366264 -0700
@@ -159,16 +159,25 @@ static void unplug_slaves(mddev_t *mddev
 {
 	multipath_conf_t *conf = mddev_to_conf(mddev);
 	int i;
+	unsigned long flags;
 
+	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
 		if (rdev && !rdev->faulty) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
+			atomic_inc(&rdev->nr_pending);
+			spin_unlock_irqrestore(&conf->device_lock, flags);
+
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
+
+			spin_lock_irqsave(&conf->device_lock, flags);
+			atomic_dec(&rdev->nr_pending);
 		}
 	}
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 static void multipath_unplug(request_queue_t *q)
 {
diff -puN drivers/md/raid1.c~raid-locking-fix drivers/md/raid1.c
--- 25/drivers/md/raid1.c~raid-locking-fix	2004-05-10 19:32:17.735367480 -0700
+++ 25-akpm/drivers/md/raid1.c	2004-05-10 19:32:17.744366112 -0700
@@ -459,11 +459,17 @@ static void unplug_slaves(mddev_t *mddev
 	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
-		if (rdev && !rdev->faulty) {
+		if (rdev && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
+			atomic_inc(&rdev->nr_pending);
+			spin_unlock_irqrestore(&conf->device_lock, flags);
+
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
+
+			spin_lock_irqsave(&conf->device_lock, flags);
+			atomic_dec(&rdev->nr_pending);
 		}
 	}
 	spin_unlock_irqrestore(&conf->device_lock, flags);
diff -puN drivers/md/raid5.c~raid-locking-fix drivers/md/raid5.c
--- 25/drivers/md/raid5.c~raid-locking-fix	2004-05-10 19:32:17.737367176 -0700
+++ 25-akpm/drivers/md/raid5.c	2004-05-10 19:32:17.745365960 -0700
@@ -1301,18 +1301,25 @@ static void unplug_slaves(mddev_t *mddev
 {
 	raid5_conf_t *conf = mddev_to_conf(mddev);
 	int i;
+	unsigned long flags;
 
+	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
-		if (rdev && !rdev->faulty) {
-			struct block_device *bdev = rdev->bdev;
-			if (bdev) {
-				request_queue_t *r_queue = bdev_get_queue(bdev);
-				if (r_queue && r_queue->unplug_fn)
-					r_queue->unplug_fn(r_queue);
-			}
+		if (rdev && atomic_read(&rdev->nr_pending)) {
+			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
+
+			atomic_inc(&rdev->nr_pending);
+			spin_unlock_irqrestore(&conf->device_lock, flags);
+
+			if (r_queue && r_queue->unplug_fn)
+				r_queue->unplug_fn(r_queue);
+
+			spin_lock_irqsave(&conf->device_lock, flags);
+			atomic_dec(&rdev->nr_pending);
 		}
 	}
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
 static void raid5_unplug_device(request_queue_t *q)
diff -puN drivers/md/raid6main.c~raid-locking-fix drivers/md/raid6main.c
--- 25/drivers/md/raid6main.c~raid-locking-fix	2004-05-10 19:32:17.739366872 -0700
+++ 25-akpm/drivers/md/raid6main.c	2004-05-10 19:32:17.747365656 -0700
@@ -1461,21 +1461,27 @@ static inline void raid6_activate_delaye
 
 static void unplug_slaves(mddev_t *mddev)
 {
-	/* note: this is always called with device_lock held */
 	raid6_conf_t *conf = mddev_to_conf(mddev);
 	int i;
+	unsigned long flags;
 
+	spin_lock_irqsave(&conf->device_lock, flags);
 	for (i=0; i<mddev->raid_disks; i++) {
 		mdk_rdev_t *rdev = conf->disks[i].rdev;
-		if (rdev && !rdev->faulty) {
-			struct block_device *bdev = rdev->bdev;
-			if (bdev) {
-				request_queue_t *r_queue = bdev_get_queue(bdev);
-				if (r_queue && r_queue->unplug_fn)
-					r_queue->unplug_fn(r_queue);
-			}
+		if (rdev && atomic_read(&rdev->nr_pending)) {
+			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
+
+			atomic_inc(&rdev->nr_pending);
+			spin_unlock_irqrestore(&conf->device_lock, flags);
+
+			if (r_queue && r_queue->unplug_fn)
+				r_queue->unplug_fn(r_queue);
+
+			spin_lock_irqsave(&conf->device_lock, flags);
+			atomic_dec(&rdev->nr_pending);
 		}
 	}
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
 static void raid6_unplug_device(request_queue_t *q)

_