From: Michael S. Tsirkin on
On Thu, May 27, 2010 at 03:07:52PM +0300, Avi Kivity wrote:
> I missed the spec patch, can you repost it?

Still work in progress, but here it is.
Note I am still debating with myself whether we should split
avail idx and flags into separate cache lines.

diff --git a/virtio-spec.lyx b/virtio-spec.lyx
index ed35893..150e5a8 100644
--- a/virtio-spec.lyx
+++ b/virtio-spec.lyx
@@ -1803,6 +1803,36 @@ next
\emph default
descriptor entry (modulo the ring size).
This starts at 0, and increases.
+\change_inserted 0 1274966643
+
+\end_layout
+
+\begin_layout Standard
+
+\change_inserted 0 1274968378
+When PUBLISH_USED feature flag has
+\emph on
+not
+\emph default
+ been negotiated, the ring follows the
+\begin_inset Quotes eld
+\end_inset
+
+flags
+\begin_inset Quotes erd
+\end_inset
+
+ and the
+\begin_inset Quotes eld
+\end_inset
+
+idx
+\begin_inset Quotes erd
+\end_inset
+
+ fields:
+\change_unchanged
+
\end_layout

\begin_layout Standard
@@ -1845,7 +1875,134 @@ struct vring_avail {

\end_layout

+\begin_layout Standard
+
+\change_inserted 0 1274968432
+\begin_inset CommandInset label
+LatexCommand label
+name "PUBLISH_USED-feature"
+
+\end_inset
+
+When PUBLISH_USED feature flag has been negotiated, the control structure
+ including the
+\begin_inset Quotes eld
+\end_inset
+
+flags and the
+\begin_inset Quotes eld
+\end_inset
+
+idx
+\begin_inset Quotes erd
+\end_inset
+
+ fields follows the ring.
+ This leaves the room for the
+\begin_inset Quotes eld
+\end_inset
+
+last_seen_used_idx
+\begin_inset Quotes erd
+\end_inset
+
+ field, which indicates the most recent
+\begin_inset Quotes eld
+\end_inset
+
+idx
+\begin_inset Quotes erd
+\end_inset
+
+ value observed by guest in the used ring (see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "sub:Used-Ring"
+
+\end_inset
+
+ below):
+\end_layout
+
+\begin_layout Standard
+
+\change_inserted 0 1274967396
+\begin_inset listings
+inline false
+status open
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274967404
+
+struct vring_avail {
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274967405
+
+ u16 ring[qsz]; /* qsz is the Queue Size field read from device */
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274967533
+
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274967533
+
+ u16 flags;
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274967533
+
+ u16 idx;
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274968345
+
+ u16 last_seen_used_idx;
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274967396
+
+};
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+
+\change_inserted 0 1274967715
+If the ring is large enough, the second layout maintains the control and
+ ring structures on separate cache lines.
+\end_layout
+
\begin_layout Subsection
+
+\change_inserted 0 1274968415
+\begin_inset CommandInset label
+LatexCommand label
+name "sub:Used-Ring"
+
+\end_inset
+
+
+\change_unchanged
Used Ring
\end_layout

@@ -2391,12 +2548,20 @@ status open

\begin_layout Plain Layout

-while (vq->last_seen_used != vring->used.idx) {
+while (vq->last_seen_used
+\change_inserted 0 1274968316
+_idx
+\change_unchanged
+ != vring->used.idx) {
\end_layout

\begin_layout Plain Layout

- struct vring_used_elem *e = vring.used->ring[vq->last_seen_used%vsz];
+ struct vring_used_elem *e = vring.used->ring[vq->last_seen_used
+\change_inserted 0 1274968326
+_idx
+\change_unchanged
+%vsz];
\end_layout

\begin_layout Plain Layout
@@ -2406,7 +2571,11 @@ while (vq->last_seen_used != vring->used.idx) {

\begin_layout Plain Layout

- vq->last_seen_used++;
+ vq->last_seen_used
+\change_inserted 0 1274968321
+_idx
+\change_unchanged
+++;
\end_layout

\begin_layout Plain Layout
@@ -2419,6 +2588,13 @@ while (vq->last_seen_used != vring->used.idx) {

\end_layout

+\begin_layout Standard
+
+\change_inserted 0 1274968252
+If PUBLISH_USED feature is negotiated, last_seen_used value should be published
+ to the device in the avail ring.
+\end_layout
+
\begin_layout Subsection
Dealing With Configuration Changes
\end_layout
@@ -2986,6 +3162,47 @@ struct vring_avail {
\begin_layout Plain Layout

};
+\change_inserted 0 1274966477
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274966484
+
+struct vring_avail_ctrl {
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274966489
+
+ __u16 flags;
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274966494
+
+ __u16 idx;
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274966499
+
+ __u16 last_used_idx;
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 0 1274966474
+
+};
\end_layout

\begin_layout Plain Layout
@@ -3349,6 +3566,28 @@ reference "sub:Indirect-Descriptors"
\end_inset

.
+\change_inserted 0 1274967762
+
+\end_layout
+
+\begin_layout Description
+
+\change_inserted 0 1274967926
+VIRTIO_F_RING_PUBLISH_USED
+\begin_inset space ~
+\end_inset
+
+(29) Negotiating this feature indicates that the avail ring layout includes
+ the used index observed by driver, see
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "PUBLISH_USED-feature"
+
+\end_inset
+
+.
+\change_unchanged
+
\end_layout

\begin_layout Description
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Michael S. Tsirkin on
On Fri, May 28, 2010 at 11:56:54AM +0200, Jes Sorensen wrote:
> On 05/26/10 21:50, Michael S. Tsirkin wrote:
> > Here's a rewrite of the original patch with a new layout.
> > I haven't tested it yet so no idea how this performs, but
> > I think this addresses the cache bounce issue raised by Avi.
> > Posting for early flames/comments.
> >
> > Generally, the Host end of the virtio ring doesn't need to see where
> > Guest is up to in consuming the ring. However, to completely understand
> > what's going on from the outside, this information must be exposed.
> > For example, host can reduce the number of interrupts by detecting
> > that the guest is currently handling previous buffers.
> >
> > We add a feature bit so the guest can tell the host that it's writing
> > out the current value there, if it wants to use that.
> >
> > This differs from original approach in that the used index
> > is put after avail index (they are typically written out together).
> > To avoid cache bounces on descriptor access,
> > and make future extensions easier, we put the ring itself at start of
> > page, and move the control after it.
>
> Hi Michael,
>
> It looks pretty good to me, however one thing I have been thinking of
> while reading through it:
>
> Rather than storing a pointer within the ring struct, pointing into a
> position within the same struct. How about storing a byte offset instead
> and using a cast to get to the pointer position? That would avoid the
> pointer dereference, which is less effective cache wise and harder for
> the CPU to predict.
>
> Not sure whether it really matters performance wise, just a thought.
>
> Cheers,
> Jes

I think this won't work: when PUBLUSH_USED_IDX is negotiated,
the pointer is to within the ring.

--
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Michael S. Tsirkin on
On Mon, May 31, 2010 at 05:16:42PM +0930, Rusty Russell wrote:
> On Thu, 27 May 2010 05:20:35 am Michael S. Tsirkin wrote:
> > Here's a rewrite of the original patch with a new layout.
> > I haven't tested it yet so no idea how this performs, but
> > I think this addresses the cache bounce issue raised by Avi.
> > Posting for early flames/comments.
>
> Sorry, not without some evidence that it'll actually reduce cacheline
> bouncing. I *think* it will, but it's not obvious: the host may keep
> looking at avail_idx as we're updating last_seen. Or does qemu always
> look at both together anyway?
> Can someone convince me this is a win?
> Rusty.

What really happens is host looks at flags and last_seen together.
And flags happens to be in the same cache line with avail idx.
So to get an obvious win, we should put flags and last_seen
in a separate cache line from avail, which us easy - just add some padding.

And I'll relax the requirement from guest to only require it to update
last_seen when interrupts are enabled. This way flags and
last_seen are written together and read together.

Makes sense?
--
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Michael S. Tsirkin on
On Mon, May 31, 2010 at 09:36:00AM +0200, Jes Sorensen wrote:
> On 05/30/10 13:22, Michael S. Tsirkin wrote:
> > On Fri, May 28, 2010 at 11:56:54AM +0200, Jes Sorensen wrote:
> >> It looks pretty good to me, however one thing I have been thinking of
> >> while reading through it:
> >>
> >> Rather than storing a pointer within the ring struct, pointing into a
> >> position within the same struct. How about storing a byte offset instead
> >> and using a cast to get to the pointer position? That would avoid the
> >> pointer dereference, which is less effective cache wise and harder for
> >> the CPU to predict.
> >>
> >> Not sure whether it really matters performance wise, just a thought.
> >
> > I think this won't work: when PUBLUSH_USED_IDX is negotiated,
> > the pointer is to within the ring.
>
> Hmmm shame, it would be a nice optimization.
>
> Maybe it's time to introduce the v2 ring format, rather than having
> adding more kludges to the existing one?
>
> Cheers,
> Jes

There has been discussion about a ring format that does not
use indexes at all. My guess is that would be a good point
for v2 ring format. But making that a product
and tuning might take a while. So definitely something to
keep in mind but I would not want that to block this optimization.

--
MST
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/