diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c index 216885aa3c..9d2d6a7af2 100644 --- a/hw/virtio/vhost-iova-tree.c +++ b/hw/virtio/vhost-iova-tree.c @@ -31,6 +31,9 @@ struct VhostIOVATree { /* Allocated IOVA addresses */ IOVATree *iova_map; + + /* GPA->IOVA address memory maps */ + IOVATree *gpa_iova_map; }; /** @@ -48,6 +51,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last) tree->iova_taddr_map = iova_tree_new(); tree->iova_map = iova_tree_new(); + tree->gpa_iova_map = gpa_tree_new(); return tree; } @@ -58,6 +62,7 @@ void vhost_iova_tree_delete(VhostIOVATree *iova_tree) { iova_tree_destroy(iova_tree->iova_taddr_map); iova_tree_destroy(iova_tree->iova_map); + iova_tree_destroy(iova_tree->gpa_iova_map); g_free(iova_tree); } @@ -122,3 +127,65 @@ void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map) iova_tree_remove(iova_tree->iova_taddr_map, map); iova_tree_remove(iova_tree->iova_map, map); } + +/** + * Find the IOVA address stored from a guest memory address (GPA) + * + * @tree: The VhostIOVATree + * @map: The map with the guest memory address + * + * Returns the stored GPA->IOVA mapping, or NULL if not found. + */ +const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *tree, + const DMAMap *map) +{ + return iova_tree_find_iova(tree->gpa_iova_map, map); +} + +/** + * Allocate a new IOVA range and add the mapping to the GPA->IOVA tree + * + * @tree: The VhostIOVATree + * @map: The IOVA mapping + * @taddr: The translated address (GPA) + * + * Returns: + * - IOVA_OK if the map fits both containers + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow) + * - IOVA_ERR_NOMEM if the IOVA-only tree cannot allocate more space + * + * It returns an assigned IOVA in map->iova if the return value is IOVA_OK. + */ +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr taddr) +{ + int ret; + + /* Some vhost devices don't like addr 0. Skip first page */ + hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size(); + + if (taddr + map->size < taddr || map->perm == IOMMU_NONE) { + return IOVA_ERR_INVALID; + } + + /* Allocate a node in the IOVA-only tree */ + ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last); + if (unlikely(ret != IOVA_OK)) { + return ret; + } + + /* Insert a node in the GPA->IOVA tree */ + map->translated_addr = taddr; + return gpa_tree_insert(tree->gpa_iova_map, map); +} + +/** + * Remove existing mappings from the IOVA-only and GPA->IOVA trees + * + * @tree: The VhostIOVATree + * @map: The map to remove + */ +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map) +{ + iova_tree_remove(iova_tree->gpa_iova_map, map); + iova_tree_remove(iova_tree->iova_map, map); +} diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h index 525ce72b1d..0c4ba5abd5 100644 --- a/hw/virtio/vhost-iova-tree.h +++ b/hw/virtio/vhost-iova-tree.h @@ -24,5 +24,10 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map, hwaddr taddr); void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); +const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *iova_tree, + const DMAMap *map); +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map, + hwaddr taddr); +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map); #endif diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 37aca8b431..30ba565f03 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -78,24 +78,39 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) * @vaddr: Translated IOVA addresses * @iovec: Source qemu's VA addresses * @num: Length of iovec and minimum length of vaddr + * @gpas: Descriptors' GPAs, if backed by guest memory */ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, hwaddr *addrs, const struct iovec *iovec, - size_t num) + size_t num, const hwaddr *gpas) { if (num == 0) { return true; } for (size_t i = 0; i < num; ++i) { - DMAMap needle = { - .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, - .size = iovec[i].iov_len, - }; Int128 needle_last, map_last; size_t off; + const DMAMap *map; + DMAMap needle; + + /* Check if the descriptor is backed by guest memory */ + if (gpas) { + /* Search the GPA->IOVA tree */ + needle = (DMAMap) { + .translated_addr = gpas[i], + .size = iovec[i].iov_len, + }; + map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle); + } else { + /* Search the IOVA->HVA tree */ + needle = (DMAMap) { + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, + .size = iovec[i].iov_len, + }; + map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); + } - const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); /* * Map cannot be NULL since iova map contains all guest space and * qemu already has a physical address mapped @@ -130,6 +145,7 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, * @sg: Cache for hwaddr * @iovec: The iovec from the guest * @num: iovec length + * @addr: Descriptors' GPAs, if backed by guest memory * @more_descs: True if more descriptors come in the chain * @write: True if they are writeable descriptors * @@ -137,7 +153,8 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, */ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, const struct iovec *iovec, size_t num, - bool more_descs, bool write) + const hwaddr *addr, bool more_descs, + bool write) { uint16_t i = svq->free_head, last = svq->free_head; unsigned n; @@ -149,7 +166,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, return true; } - ok = vhost_svq_translate_addr(svq, sg, iovec, num); + ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr); if (unlikely(!ok)) { return false; } @@ -174,8 +191,9 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, const struct iovec *out_sg, size_t out_num, + const hwaddr *out_addr, const struct iovec *in_sg, size_t in_num, - unsigned *head) + const hwaddr *in_addr, unsigned *head) { unsigned avail_idx; vring_avail_t *avail = svq->vring.avail; @@ -191,13 +209,14 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, return false; } - ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, - false); + ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr, + in_num > 0, false); if (unlikely(!ok)) { return false; } - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false, + true); if (unlikely(!ok)) { return false; } @@ -247,8 +266,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full */ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - size_t out_num, const struct iovec *in_sg, size_t in_num, - VirtQueueElement *elem) + size_t out_num, const hwaddr *out_addr, + const struct iovec *in_sg, size_t in_num, + const hwaddr *in_addr, VirtQueueElement *elem) { unsigned qemu_head; unsigned ndescs = in_num + out_num; @@ -258,7 +278,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, return -ENOSPC; } - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); + ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num, + in_addr, &qemu_head); if (unlikely(!ok)) { return -EINVAL; } @@ -274,8 +295,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, static int vhost_svq_add_element(VhostShadowVirtqueue *svq, VirtQueueElement *elem) { - return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, - elem->in_num, elem); + return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->out_addr, + elem->in_sg, elem->in_num, elem->in_addr, elem); } /** diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index 19c842a15b..9c273739d6 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -118,8 +118,9 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq); void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const VirtQueueElement *elem, uint32_t len); int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - size_t out_num, const struct iovec *in_sg, size_t in_num, - VirtQueueElement *elem); + size_t out_num, const hwaddr *out_addr, + const struct iovec *in_sg, size_t in_num, + const hwaddr *in_addr, VirtQueueElement *elem); size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num); void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 703dcfc929..7efbde3d4c 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -360,17 +360,17 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); if (s->shadow_data) { int r; - hwaddr hw_vaddr = (hwaddr)(uintptr_t)vaddr; + hwaddr gpa = section->offset_within_address_space; mem_region.size = int128_get64(llsize) - 1, mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), - r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region, hw_vaddr); + r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region, gpa); if (unlikely(r != IOVA_OK)) { error_report("Can't allocate a mapping (%d)", r); - if (mem_region.translated_addr == hw_vaddr) { - error_report("Insertion to IOVA->HVA tree failed"); + if (mem_region.translated_addr == gpa) { + error_report("Insertion to GPA->IOVA tree failed"); /* Remove the mapping from the IOVA-only tree */ goto fail_map; } @@ -392,7 +392,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, fail_map: if (s->shadow_data) { - vhost_iova_tree_remove(s->iova_tree, mem_region); + vhost_iova_tree_remove_gpa(s->iova_tree, mem_region); } fail: @@ -446,21 +446,18 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, if (s->shadow_data) { const DMAMap *result; - const void *vaddr = memory_region_get_ram_ptr(section->mr) + - section->offset_within_region + - (iova - section->offset_within_address_space); DMAMap mem_region = { - .translated_addr = (hwaddr)(uintptr_t)vaddr, + .translated_addr = section->offset_within_address_space, .size = int128_get64(llsize) - 1, }; - result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region); + result = vhost_iova_tree_find_gpa(s->iova_tree, &mem_region); if (!result) { /* The memory listener map wasn't mapped */ return; } iova = result->iova; - vhost_iova_tree_remove(s->iova_tree, *result); + vhost_iova_tree_remove_gpa(s->iova_tree, *result); } vhost_vdpa_iotlb_batch_begin_once(s); /* diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h index 44a45931d5..16d354a814 100644 --- a/include/qemu/iova-tree.h +++ b/include/qemu/iova-tree.h @@ -40,6 +40,28 @@ typedef struct DMAMap { } QEMU_PACKED DMAMap; typedef gboolean (*iova_tree_iterator)(DMAMap *map); +/** + * gpa_tree_new: + * + * Create a new GPA->IOVA tree. + * + * Returns: the tree point on success, or NULL otherwise. + */ +IOVATree *gpa_tree_new(void); + +/** + * gpa_tree_insert: + * + * @tree: The GPA->IOVA tree we're inserting the mapping to + * @map: The GPA->IOVA mapping to insert + * + * Inserts a GPA range to the GPA->IOVA tree. If there are overlapped + * ranges, IOVA_ERR_OVERLAP will be returned. + * + * Return: 0 if successful, < 0 otherwise. + */ +int gpa_tree_insert(IOVATree *tree, const DMAMap *map); + /** * iova_tree_new: * diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 5a3a57203d..bd01866878 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -649,7 +649,7 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); int r; - r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL); + r = vhost_svq_add(svq, out_sg, out_num, NULL, in_sg, in_num, NULL, NULL); if (unlikely(r != 0)) { if (unlikely(r == -ENOSPC)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", diff --git a/util/iova-tree.c b/util/iova-tree.c index 06295e2755..5b0c95ff15 100644 --- a/util/iova-tree.c +++ b/util/iova-tree.c @@ -257,3 +257,49 @@ void iova_tree_destroy(IOVATree *tree) g_tree_destroy(tree->tree); g_free(tree); } + +static int gpa_tree_compare(gconstpointer a, gconstpointer b, gpointer data) +{ + const DMAMap *m1 = a, *m2 = b; + + if (m1->translated_addr > m2->translated_addr + m2->size) { + return 1; + } + + if (m1->translated_addr + m1->size < m2->translated_addr) { + return -1; + } + + /* Overlapped */ + return 0; +} + +IOVATree *gpa_tree_new(void) +{ + IOVATree *gpa_tree = g_new0(IOVATree, 1); + + gpa_tree->tree = g_tree_new_full(gpa_tree_compare, NULL, g_free, NULL); + + return gpa_tree; +} + +int gpa_tree_insert(IOVATree *tree, const DMAMap *map) +{ + DMAMap *new; + + if (map->translated_addr + map->size < map->translated_addr || + map->perm == IOMMU_NONE) { + return IOVA_ERR_INVALID; + } + + /* We don't allow inserting ranges that overlap with existing ones */ + if (iova_tree_find(tree, map)) { + return IOVA_ERR_OVERLAP; + } + + new = g_new0(DMAMap, 1); + memcpy(new, map, sizeof(*new)); + iova_tree_insert_internal(tree->tree, new); + + return IOVA_OK; +}