virtio_blk: Use blk-iopoll for host->guest notify [Kernel]

Prev: x86 pci: Add option to not assign BAR's if not already assigned
Next: [GIT PULL] One additional x86 fix for 2.6.34

From: Brian Jackson on 14 May 2010 18:40

On Friday, May 14, 2010 03:47:37 pm Stefan Hajnoczi wrote:
> This patch adds blk-iopoll interrupt mitigation to virtio-blk. Instead
> of processing completed requests inside the virtqueue interrupt handler,
> a softirq is scheduled to process up to a maximum number of completed
> requests in one go.
>
> If the number of complete requests exceeds the maximum number, then another
> softirq is scheduled to continue polling. Otherwise the virtqueue
> interrupt is enabled again and we return to interrupt-driven mode.
>
> The patch sets the maximum number of completed requests (aka budget, aka
> weight) to 4. This is a low number but reflects the expensive context
> switch between guest and host virtio-blk emulation.
>
> The blk-iopoll infrastructure is enabled system-wide by default:
>
> kernel.blk_iopoll = 1
>
> It can be disabled to always use interrupt-driven mode (useful for
> comparison):
>
> kernel.blk_iopoll = 0

Any preliminary numbers? latency, throughput, cpu use? What about comparing
different "weights"?

>
> Signed-off-by: Stefan Hajnoczi <stefanha(a)linux.vnet.ibm.com>
> ---
> No performance figures yet.
>
> drivers/block/virtio_blk.c | 71
> ++++++++++++++++++++++++++++++++++++++----- 1 files changed, 62
> insertions(+), 9 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 2138a7a..1523895 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -6,6 +6,7 @@
> #include <linux/virtio.h>
> #include <linux/virtio_blk.h>
> #include <linux/scatterlist.h>
> +#include <linux/blk-iopoll.h>
>
> #define PART_BITS 4
>
> @@ -26,6 +27,9 @@ struct virtio_blk
>
> mempool_t *pool;
>
> + /* Host->guest notify mitigation */
> + struct blk_iopoll iopoll;
> +
> /* What host tells us, plus 2 for header & tailer. */
> unsigned int sg_elems;
>
> @@ -42,16 +46,18 @@ struct virtblk_req
> u8 status;
> };
>
> -static void blk_done(struct virtqueue *vq)
> +/* Assumes vblk->lock held */
> +static int __virtblk_end_requests(struct virtio_blk *vblk, int weight)
> {
> - struct virtio_blk *vblk = vq->vdev->priv;
> struct virtblk_req *vbr;
> unsigned int len;
> - unsigned long flags;
> + int error;
> + int work = 0;
>
> - spin_lock_irqsave(&vblk->lock, flags);
> - while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
> - int error;
> + while (!weight || work < weight) {
> + vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len);
> + if (!vbr)
> + break;
>
> switch (vbr->status) {
> case VIRTIO_BLK_S_OK:
> @@ -74,10 +80,53 @@ static void blk_done(struct virtqueue *vq)
> __blk_end_request_all(vbr->req, error);
> list_del(&vbr->list);
> mempool_free(vbr, vblk->pool);
> + work++;
> }
> +
> /* In case queue is stopped waiting for more buffers. */
> blk_start_queue(vblk->disk->queue);
> + return work;
> +}
> +
> +static int virtblk_iopoll(struct blk_iopoll *iopoll, int weight)
> +{
> + struct virtio_blk *vblk =
> + container_of(iopoll, struct virtio_blk, iopoll);
> + unsigned long flags;
> + int work;
> +
> + spin_lock_irqsave(&vblk->lock, flags);
> +
> + work = __virtblk_end_requests(vblk, weight);
> + if (work < weight) {
> + /* Keep polling if there are pending requests. */
> + if (vblk->vq->vq_ops->enable_cb(vblk->vq))
> + __blk_iopoll_complete(&vblk->iopoll);
> + else
> + vblk->vq->vq_ops->disable_cb(vblk->vq);
> + }
> +
> spin_unlock_irqrestore(&vblk->lock, flags);
> + return work;
> +}
> +
> +static void blk_done(struct virtqueue *vq)
> +{
> + struct virtio_blk *vblk = vq->vdev->priv;
> + unsigned long flags;
> +
> + if (blk_iopoll_enabled) {
> + if (!blk_iopoll_sched_prep(&vblk->iopoll)) {
> + spin_lock_irqsave(&vblk->lock, flags);
> + vblk->vq->vq_ops->disable_cb(vblk->vq);
> + spin_unlock_irqrestore(&vblk->lock, flags);
> + blk_iopoll_sched(&vblk->iopoll);
> + }
> + } else {
> + spin_lock_irqsave(&vblk->lock, flags);
> + __virtblk_end_requests(vblk, 0);
> + spin_unlock_irqrestore(&vblk->lock, flags);
> + }
> }
>
> static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> @@ -289,11 +338,14 @@ static int __devinit virtblk_probe(struct
> virtio_device *vdev) goto out_free_vq;
> }
>
> + blk_iopoll_init(&vblk->iopoll, 4 /* budget */, virtblk_iopoll);
> + blk_iopoll_enable(&vblk->iopoll);
> +
> /* FIXME: How many partitions? How long is a piece of string? */
> vblk->disk = alloc_disk(1 << PART_BITS);
> if (!vblk->disk) {
> err = -ENOMEM;
> - goto out_mempool;
> + goto out_iopoll;
> }
>
> q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
> @@ -401,13 +453,13 @@ static int __devinit virtblk_probe(struct
> virtio_device *vdev) if (!err && opt_io_size)
> blk_queue_io_opt(q, blk_size * opt_io_size);
>
> -
> add_disk(vblk->disk);
> return 0;
>
> out_put_disk:
> put_disk(vblk->disk);
> -out_mempool:
> +out_iopoll:
> + blk_iopoll_disable(&vblk->iopoll);
> mempool_destroy(vblk->pool);
> out_free_vq:
> vdev->config->del_vqs(vdev);
> @@ -430,6 +482,7 @@ static void __devexit virtblk_remove(struct
> virtio_device *vdev) del_gendisk(vblk->disk);
> blk_cleanup_queue(vblk->disk->queue);
> put_disk(vblk->disk);
> + blk_iopoll_disable(&vblk->iopoll);
> mempool_destroy(vblk->pool);
> vdev->config->del_vqs(vdev);
> kfree(vblk);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Stefan Hajnoczi on 18 May 2010 04:10

On Fri, May 14, 2010 at 05:30:56PM -0500, Brian Jackson wrote:
> Any preliminary numbers? latency, throughput, cpu use? What about comparing
> different "weights"?

I am running benchmarks and will report results when they are in.

Stefan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Jens Axboe on 18 May 2010 09:10

On Tue, May 18 2010, Stefan Hajnoczi wrote:
> On Fri, May 14, 2010 at 05:30:56PM -0500, Brian Jackson wrote:
> > Any preliminary numbers? latency, throughput, cpu use? What about comparing
> > different "weights"?
>
> I am running benchmarks and will report results when they are in.

I'm very interested as well, I have been hoping for some more adoption
of this. I have mptsas and mpt2sas patches pending as well.

I have not done enough and fully exhaustive weight analysis, so note me
down for wanting such an analysis on virtio_blk as well.

--
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

|
Pages: 1
Prev: x86 pci: Add option to not assign BAR's if not already assigned
Next: [GIT PULL] One additional x86 fix for 2.6.34