qemu/hw/virtio/vhost-shadow-virtqueue.h
Hawkins Jiawei 5d410557de vhost: fix possible wrap in SVQ descriptor ring
QEMU invokes vhost_svq_add() when adding a guest's element
into SVQ. In vhost_svq_add(), it uses vhost_svq_available_slots()
to check whether QEMU can add the element into SVQ. If there is
enough space, then QEMU combines some out descriptors and some
in descriptors into one descriptor chain, and adds it into
`svq->vring.desc` by vhost_svq_vring_write_descs().

Yet the problem is that, `svq->shadow_avail_idx - svq->shadow_used_idx`
in vhost_svq_available_slots() returns the number of occupied elements,
or the number of descriptor chains, instead of the number of occupied
descriptors, which may cause wrapping in SVQ descriptor ring.

Here is an example. In vhost_handle_guest_kick(), QEMU forwards
as many available buffers to device by virtqueue_pop() and
vhost_svq_add_element(). virtqueue_pop() returns a guest's element,
and then this element is added into SVQ by vhost_svq_add_element(),
a wrapper to vhost_svq_add(). If QEMU invokes virtqueue_pop() and
vhost_svq_add_element() `svq->vring.num` times,
vhost_svq_available_slots() thinks QEMU just ran out of slots and
everything should work fine. But in fact, virtqueue_pop() returns
`svq->vring.num` elements or descriptor chains, more than
`svq->vring.num` descriptors due to guest memory fragmentation,
and this causes wrapping in SVQ descriptor ring.

This bug is valid even before marking the descriptors used.
If the guest memory is fragmented, SVQ must add chains
so it can try to add more descriptors than possible.

This patch solves it by adding `num_free` field in
VhostShadowVirtqueue structure and updating this field
in vhost_svq_add() and vhost_svq_get_buf(), to record
the number of free descriptors.

Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding")
Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
Acked-by: Eugenio Pérez <eperezma@redhat.com>
Message-Id: <20230509084817.3973-1-yin31149@gmail.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
2023-05-19 01:36:09 -04:00

142 lines
4.2 KiB
C

/*
* vhost shadow virtqueue
*
* SPDX-FileCopyrightText: Red Hat, Inc. 2021
* SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef VHOST_SHADOW_VIRTQUEUE_H
#define VHOST_SHADOW_VIRTQUEUE_H
#include "qemu/event_notifier.h"
#include "hw/virtio/virtio.h"
#include "standard-headers/linux/vhost_types.h"
#include "hw/virtio/vhost-iova-tree.h"
typedef struct SVQDescState {
VirtQueueElement *elem;
/*
* Number of descriptors exposed to the device. May or may not match
* guest's
*/
unsigned int ndescs;
} SVQDescState;
typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
/**
* Callback to handle an avail buffer.
*
* @svq: Shadow virtqueue
* @elem: Element placed in the queue by the guest
* @vq_callback_opaque: Opaque
*
* Returns 0 if the vq is running as expected.
*
* Note that ownership of elem is transferred to the callback.
*/
typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
VirtQueueElement *elem,
void *vq_callback_opaque);
typedef struct VhostShadowVirtqueueOps {
VirtQueueAvailCallback avail_handler;
} VhostShadowVirtqueueOps;
/* Shadow virtqueue to relay notifications */
typedef struct VhostShadowVirtqueue {
/* Shadow vring */
struct vring vring;
/* Shadow kick notifier, sent to vhost */
EventNotifier hdev_kick;
/* Shadow call notifier, sent to vhost */
EventNotifier hdev_call;
/*
* Borrowed virtqueue's guest to host notifier. To borrow it in this event
* notifier allows to recover the VhostShadowVirtqueue from the event loop
* easily. If we use the VirtQueue's one, we don't have an easy way to
* retrieve VhostShadowVirtqueue.
*
* So shadow virtqueue must not clean it, or we would lose VirtQueue one.
*/
EventNotifier svq_kick;
/* Guest's call notifier, where the SVQ calls guest. */
EventNotifier svq_call;
/* Virtio queue shadowing */
VirtQueue *vq;
/* Virtio device */
VirtIODevice *vdev;
/* IOVA mapping */
VhostIOVATree *iova_tree;
/* SVQ vring descriptors state */
SVQDescState *desc_state;
/* Next VirtQueue element that guest made available */
VirtQueueElement *next_guest_avail_elem;
/*
* Backup next field for each descriptor so we can recover securely, not
* needing to trust the device access.
*/
uint16_t *desc_next;
/* Caller callbacks */
const VhostShadowVirtqueueOps *ops;
/* Caller callbacks opaque */
void *ops_opaque;
/* Next head to expose to the device */
uint16_t shadow_avail_idx;
/* Next free descriptor */
uint16_t free_head;
/* Last seen used idx */
uint16_t shadow_used_idx;
/* Next head to consume from the device */
uint16_t last_used_idx;
/* Size of SVQ vring free descriptors */
uint16_t num_free;
} VhostShadowVirtqueue;
bool vhost_svq_valid_features(uint64_t features, Error **errp);
void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
const VirtQueueElement *elem, uint32_t len);
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
size_t out_num, const struct iovec *in_sg, size_t in_num,
VirtQueueElement *elem);
size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
struct vhost_vring_addr *addr);
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
VirtQueue *vq, VhostIOVATree *iova_tree);
void vhost_svq_stop(VhostShadowVirtqueue *svq);
VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops,
void *ops_opaque);
void vhost_svq_free(gpointer vq);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
#endif