From: Nick Piggin <piggin@cyberone.com.au>

The following patch gets batching working how it should be.

After a process is woken up, it is allowed to allocate up to 32 requests
for 20ms.  It does not stop other processes submitting requests if it isn't
submitting though.  This should allow less context switches, and allow
batches of requests from each process to be sent to the io scheduler
instead of 1 request from each process.

tiobench sequential writes are more than tripled, random writes are nearly
doubled over mm1.  In earlier tests I generally saw better CPU efficiency
but it doesn't show here.  There is still debug to be taken out.  Its also
only on UP.

                                Avg     Maximum     Lat%   Lat%   CPU
 Identifier    Rate  (CPU%)  Latency   Latency     >2s    >10s   Eff
 ------------------- ------ --------- ---------- ------- ------ ----
 -2.5.71-mm1   11.13 3.783%    46.10    24668.01   0.84   0.02   294
 +2.5.71-mm1   13.21 4.489%    37.37     5691.66   0.76   0.00   294

 Random Reads
 ------------------- ------ --------- ---------- ------- ------ ----
 -2.5.71-mm1    0.97 0.582%   519.86     6444.66  11.93   0.00   167
 +2.5.71-mm1    1.01 0.604%   484.59     6604.93  10.73   0.00   167

 Sequential Writes
 ------------------- ------ --------- ---------- ------- ------ ----
 -2.5.71-mm1    4.85 4.456%    77.80    99359.39   0.18   0.13   109
 +2.5.71-mm1   14.11 14.19%    10.07    22805.47   0.09   0.04    99

 Random Writes
 ------------------- ------ --------- ---------- ------- ------ ----
 -2.5.71-mm1    0.46 0.371%    14.48     6173.90   0.23   0.00   125
 +2.5.71-mm1    0.86 0.744%    24.08     8753.66   0.31   0.00   115



 drivers/block/ll_rw_blk.c |   91 ++++++++++++++++++++++++++++++++++------------
 include/linux/blkdev.h    |    6 +++
 2 files changed, 74 insertions(+), 23 deletions(-)

diff -puN drivers/block/ll_rw_blk.c~blk-request-batching drivers/block/ll_rw_blk.c
--- 25/drivers/block/ll_rw_blk.c~blk-request-batching	2003-06-26 17:57:33.000000000 -0700
+++ 25-akpm/drivers/block/ll_rw_blk.c	2003-06-26 17:57:33.000000000 -0700
@@ -51,10 +51,11 @@ static struct workqueue_struct *kblockd_
 
 unsigned long blk_max_low_pfn, blk_max_pfn;
 
-static inline int batch_requests(struct request_queue *q)
-{
-	return q->nr_requests - min(q->nr_requests / 8, 8UL) - 1;
-}
+/* Amount of time in which a process may batch requests */
+#define BLK_BATCH_TIME	(HZ/50UL)
+
+/* Number of requests a "batching" process may submit */
+#define BLK_BATCH_REQ	32
 
 /*
  * Return the threshold (number of used requests) at which the queue is
@@ -1305,24 +1306,56 @@ static inline struct request *blk_alloc_
 	return NULL;
 }
 
+/*
+ * ioc_batching returns true if the ioc is a valid batching request and
+ * should be given priority access to a request.
+ */
+static inline int ioc_batching(struct io_context *ioc)
+{
+	if (!ioc)
+		return 0;
+
+	return ioc->nr_batch_requests == BLK_BATCH_REQ ||
+		(ioc->nr_batch_requests > 0
+		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
+}
+
+/*
+ * ioc_set_batching sets ioc to be a new "batcher" if it is not one
+ */
+void ioc_set_batching(struct io_context *ioc)
+{
+	if (!ioc || ioc_batching(ioc))
+		return;
+
+	ioc->nr_batch_requests = BLK_BATCH_REQ;
+	ioc->last_waited = jiffies;
+}
+
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
 /*
  * Get a free request, queue_lock must not be held
  */
-static struct request *
-get_request(request_queue_t *q, int rw, int gfp_mask, int force)
+static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
+	struct io_context *ioc = get_io_context();
 
 	spin_lock_irq(q->queue_lock);
-	if (rl->count[rw] == q->nr_requests)
-		blk_set_queue_full(q, rw);
+	if (rl->count[rw]+1 >= q->nr_requests) {
+		if (!blk_queue_full(q, rw)) {
+			ioc_set_batching(ioc);
+			blk_set_queue_full(q, rw);
+		}
+	}
 
-	if (blk_queue_full(q, rw) && !force && !elv_may_queue(q, rw)) {
+	if (blk_queue_full(q, rw)
+			&& !ioc_batching(ioc) && !elv_may_queue(q, rw)) {
 		spin_unlock_irq(q->queue_lock);
 		goto out;
 	}
+
 	rl->count[rw]++;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, rw);
@@ -1335,10 +1368,11 @@ get_request(request_queue_t *q, int rw, 
 		if (rl->count[rw] < queue_congestion_off_threshold(q))
                         clear_queue_congested(q, rw);
 
-		if (rl->count[rw] <= batch_requests(q)) {
+		if (rl->count[rw]+1 <= q->nr_requests) {
+			smp_mb();
 			if (waitqueue_active(&rl->wait[rw]))
 				wake_up(&rl->wait[rw]);
-			else
+			if (!waitqueue_active(&rl->wait[rw]))
 				blk_clear_queue_full(q, rw);
 		}
 
@@ -1367,6 +1401,7 @@ get_request(request_queue_t *q, int rw, 
 	rq->sense = NULL;
 
 out:
+	put_io_context(ioc);
 	return rq;
 }
 
@@ -1378,7 +1413,6 @@ static struct request *get_request_wait(
 {
 	DEFINE_WAIT(wait);
 	struct request *rq;
-	int waited = 0;
 
 	generic_unplug_device(q);
 	do {
@@ -1387,11 +1421,15 @@ static struct request *get_request_wait(
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 				TASK_UNINTERRUPTIBLE);
 
-		rq = get_request(q, rw, GFP_NOIO, waited);
+		rq = get_request(q, rw, GFP_NOIO);
 
 		if (!rq) {
+			struct io_context *ioc;
+
 			io_schedule();
-			waited = 1;
+			ioc = get_io_context();
+			ioc_set_batching(ioc);
+			put_io_context(ioc);
 		}
 		finish_wait(&rl->wait[rw], &wait);
 	} while (!rq);
@@ -1408,7 +1446,7 @@ struct request *blk_get_request(request_
 	if (gfp_mask & __GFP_WAIT)
 		rq = get_request_wait(q, rw);
 	else
-		rq = get_request(q, rw, gfp_mask, 0);
+		rq = get_request(q, rw, gfp_mask);
 
 	return rq;
 }
@@ -1560,10 +1598,11 @@ void __blk_put_request(request_queue_t *
 		if (rl->count[rw] < queue_congestion_off_threshold(q))
 			clear_queue_congested(q, rw);
 
-		if (rl->count[rw] <= batch_requests(q)) {
+		if (rl->count[rw]+1 <= q->nr_requests) {
+			smp_mb();
 			if (waitqueue_active(&rl->wait[rw]))
 				wake_up(&rl->wait[rw]);
-			else
+			if (!waitqueue_active(&rl->wait[rw]))
 				blk_clear_queue_full(q, rw);
 		}
 	}
@@ -1808,7 +1847,7 @@ get_rq:
 		freereq = NULL;
 	} else {
 		spin_unlock_irq(q->queue_lock);
-		if ((freereq = get_request(q, rw, GFP_ATOMIC, 0)) == NULL) {
+		if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
 			/*
 			 * READA bit set
 			 */
@@ -2378,6 +2417,7 @@ int __init blk_dev_init(void)
 	return 0;
 }
 
+static atomic_t nr_io_contexts = ATOMIC_INIT(0);
 
 /*
  * IO Context helper functions
@@ -2393,6 +2433,7 @@ void put_io_context(struct io_context *i
 		if (ioc->aic && ioc->aic->dtor)
 			ioc->aic->dtor(ioc->aic);
 		kfree(ioc);
+		atomic_dec(&nr_io_contexts);
 	}
 }
 
@@ -2409,7 +2450,8 @@ void exit_io_context(void)
 			ioc->aic->exit(ioc->aic);
 		put_io_context(ioc);
 		current->io_context = NULL;
-	}
+	} else
+		WARN_ON(1);
 	local_irq_restore(flags);
 }
 
@@ -2432,8 +2474,11 @@ struct io_context *get_io_context(void)
 	if (ret == NULL) {
 		ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 		if (ret) {
+			atomic_inc(&nr_io_contexts);
 			atomic_set(&ret->refcount, 1);
 			ret->pid = tsk->pid;
+			ret->last_waited = jiffies; /* doesn't matter... */
+			ret->nr_batch_requests = 0; /* because this is 0 */
 			ret->aic = NULL;
 			tsk->io_context = ret;
 		}
@@ -2515,16 +2560,16 @@ queue_requests_store(struct request_queu
 
 	if (rl->count[READ] >= q->nr_requests) {
 		blk_set_queue_full(q, READ);
-	} else if (rl->count[READ] <= batch_requests(q)) {
+	} else if (rl->count[READ]+1 <= q->nr_requests) {
 		blk_clear_queue_full(q, READ);
-		wake_up_all(&rl->wait[READ]);
+		wake_up(&rl->wait[READ]);
 	}
 
 	if (rl->count[WRITE] >= q->nr_requests) {
 		blk_set_queue_full(q, WRITE);
-	} else if (rl->count[WRITE] <= batch_requests(q)) {
+	} else if (rl->count[WRITE]+1 <= q->nr_requests) {
 		blk_clear_queue_full(q, WRITE);
-		wake_up_all(&rl->wait[WRITE]);
+		wake_up(&rl->wait[WRITE]);
 	}
 	return ret;
 }
diff -puN include/linux/blkdev.h~blk-request-batching include/linux/blkdev.h
--- 25/include/linux/blkdev.h~blk-request-batching	2003-06-26 17:57:33.000000000 -0700
+++ 25-akpm/include/linux/blkdev.h	2003-06-26 17:57:33.000000000 -0700
@@ -59,6 +59,12 @@ struct io_context {
 	atomic_t refcount;
 	pid_t pid;
 
+	/*
+	 * For request batching
+	 */
+	unsigned long last_waited; /* Time last woken after wait for request */
+	int nr_batch_requests;     /* Number of requests left in the batch */
+
 	struct as_io_context *aic;
 };
 

_