 bed150be5b
			
		
	
	
		bed150be5b
		
	
	
	
	
		
			
			In net_init_af_xdp() we parse the arguments and allocate a buffer of ints into sock_fds. However, although we free this in the error exit path, we don't ever free it in the successful return path. Coverity spots this leak. Switch to g_autofree so we don't need to manually free the array. Resolves: Coverity CID 1534906 Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Thomas Huth <thuth@redhat.com> Message-id: 20240312183810.557768-4-peter.maydell@linaro.org
		
			
				
	
	
		
			525 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			525 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * AF_XDP network backend.
 | |
|  *
 | |
|  * Copyright (c) 2023 Red Hat, Inc.
 | |
|  *
 | |
|  * Authors:
 | |
|  *  Ilya Maximets <i.maximets@ovn.org>
 | |
|  *
 | |
|  * This work is licensed under the terms of the GNU GPL, version 2 or later.
 | |
|  * See the COPYING file in the top-level directory.
 | |
|  */
 | |
| 
 | |
| 
 | |
| #include "qemu/osdep.h"
 | |
| #include <bpf/bpf.h>
 | |
| #include <linux/if_link.h>
 | |
| #include <linux/if_xdp.h>
 | |
| #include <net/if.h>
 | |
| #include <xdp/xsk.h>
 | |
| 
 | |
| #include "clients.h"
 | |
| #include "monitor/monitor.h"
 | |
| #include "net/net.h"
 | |
| #include "qapi/error.h"
 | |
| #include "qemu/cutils.h"
 | |
| #include "qemu/error-report.h"
 | |
| #include "qemu/iov.h"
 | |
| #include "qemu/main-loop.h"
 | |
| #include "qemu/memalign.h"
 | |
| 
 | |
| 
 | |
| typedef struct AFXDPState {
 | |
|     NetClientState       nc;
 | |
| 
 | |
|     struct xsk_socket    *xsk;
 | |
|     struct xsk_ring_cons rx;
 | |
|     struct xsk_ring_prod tx;
 | |
|     struct xsk_ring_cons cq;
 | |
|     struct xsk_ring_prod fq;
 | |
| 
 | |
|     char                 ifname[IFNAMSIZ];
 | |
|     int                  ifindex;
 | |
|     bool                 read_poll;
 | |
|     bool                 write_poll;
 | |
|     uint32_t             outstanding_tx;
 | |
| 
 | |
|     uint64_t             *pool;
 | |
|     uint32_t             n_pool;
 | |
|     char                 *buffer;
 | |
|     struct xsk_umem      *umem;
 | |
| 
 | |
|     uint32_t             n_queues;
 | |
|     uint32_t             xdp_flags;
 | |
|     bool                 inhibit;
 | |
| } AFXDPState;
 | |
| 
 | |
| #define AF_XDP_BATCH_SIZE 64
 | |
| 
 | |
| static void af_xdp_send(void *opaque);
 | |
| static void af_xdp_writable(void *opaque);
 | |
| 
 | |
| /* Set the event-loop handlers for the af-xdp backend. */
 | |
| static void af_xdp_update_fd_handler(AFXDPState *s)
 | |
| {
 | |
|     qemu_set_fd_handler(xsk_socket__fd(s->xsk),
 | |
|                         s->read_poll ? af_xdp_send : NULL,
 | |
|                         s->write_poll ? af_xdp_writable : NULL,
 | |
|                         s);
 | |
| }
 | |
| 
 | |
| /* Update the read handler. */
 | |
| static void af_xdp_read_poll(AFXDPState *s, bool enable)
 | |
| {
 | |
|     if (s->read_poll != enable) {
 | |
|         s->read_poll = enable;
 | |
|         af_xdp_update_fd_handler(s);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Update the write handler. */
 | |
| static void af_xdp_write_poll(AFXDPState *s, bool enable)
 | |
| {
 | |
|     if (s->write_poll != enable) {
 | |
|         s->write_poll = enable;
 | |
|         af_xdp_update_fd_handler(s);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void af_xdp_poll(NetClientState *nc, bool enable)
 | |
| {
 | |
|     AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
 | |
| 
 | |
|     if (s->read_poll != enable || s->write_poll != enable) {
 | |
|         s->write_poll = enable;
 | |
|         s->read_poll  = enable;
 | |
|         af_xdp_update_fd_handler(s);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void af_xdp_complete_tx(AFXDPState *s)
 | |
| {
 | |
|     uint32_t idx = 0;
 | |
|     uint32_t done, i;
 | |
|     uint64_t *addr;
 | |
| 
 | |
|     done = xsk_ring_cons__peek(&s->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx);
 | |
| 
 | |
|     for (i = 0; i < done; i++) {
 | |
|         addr = (void *) xsk_ring_cons__comp_addr(&s->cq, idx++);
 | |
|         s->pool[s->n_pool++] = *addr;
 | |
|         s->outstanding_tx--;
 | |
|     }
 | |
| 
 | |
|     if (done) {
 | |
|         xsk_ring_cons__release(&s->cq, done);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * The fd_write() callback, invoked if the fd is marked as writable
 | |
|  * after a poll.
 | |
|  */
 | |
| static void af_xdp_writable(void *opaque)
 | |
| {
 | |
|     AFXDPState *s = opaque;
 | |
| 
 | |
|     /* Try to recover buffers that are already sent. */
 | |
|     af_xdp_complete_tx(s);
 | |
| 
 | |
|     /*
 | |
|      * Unregister the handler, unless we still have packets to transmit
 | |
|      * and kernel needs a wake up.
 | |
|      */
 | |
|     if (!s->outstanding_tx || !xsk_ring_prod__needs_wakeup(&s->tx)) {
 | |
|         af_xdp_write_poll(s, false);
 | |
|     }
 | |
| 
 | |
|     /* Flush any buffered packets. */
 | |
|     qemu_flush_queued_packets(&s->nc);
 | |
| }
 | |
| 
 | |
| static ssize_t af_xdp_receive(NetClientState *nc,
 | |
|                               const uint8_t *buf, size_t size)
 | |
| {
 | |
|     AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
 | |
|     struct xdp_desc *desc;
 | |
|     uint32_t idx;
 | |
|     void *data;
 | |
| 
 | |
|     /* Try to recover buffers that are already sent. */
 | |
|     af_xdp_complete_tx(s);
 | |
| 
 | |
|     if (size > XSK_UMEM__DEFAULT_FRAME_SIZE) {
 | |
|         /* We can't transmit packet this size... */
 | |
|         return size;
 | |
|     }
 | |
| 
 | |
|     if (!s->n_pool || !xsk_ring_prod__reserve(&s->tx, 1, &idx)) {
 | |
|         /*
 | |
|          * Out of buffers or space in tx ring.  Poll until we can write.
 | |
|          * This will also kick the Tx, if it was waiting on CQ.
 | |
|          */
 | |
|         af_xdp_write_poll(s, true);
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     desc = xsk_ring_prod__tx_desc(&s->tx, idx);
 | |
|     desc->addr = s->pool[--s->n_pool];
 | |
|     desc->len = size;
 | |
| 
 | |
|     data = xsk_umem__get_data(s->buffer, desc->addr);
 | |
|     memcpy(data, buf, size);
 | |
| 
 | |
|     xsk_ring_prod__submit(&s->tx, 1);
 | |
|     s->outstanding_tx++;
 | |
| 
 | |
|     if (xsk_ring_prod__needs_wakeup(&s->tx)) {
 | |
|         af_xdp_write_poll(s, true);
 | |
|     }
 | |
| 
 | |
|     return size;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Complete a previous send (backend --> guest) and enable the
 | |
|  * fd_read callback.
 | |
|  */
 | |
| static void af_xdp_send_completed(NetClientState *nc, ssize_t len)
 | |
| {
 | |
|     AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
 | |
| 
 | |
|     af_xdp_read_poll(s, true);
 | |
| }
 | |
| 
 | |
| static void af_xdp_fq_refill(AFXDPState *s, uint32_t n)
 | |
| {
 | |
|     uint32_t i, idx = 0;
 | |
| 
 | |
|     /* Leave one packet for Tx, just in case. */
 | |
|     if (s->n_pool < n + 1) {
 | |
|         n = s->n_pool;
 | |
|     }
 | |
| 
 | |
|     if (!n || !xsk_ring_prod__reserve(&s->fq, n, &idx)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < n; i++) {
 | |
|         *xsk_ring_prod__fill_addr(&s->fq, idx++) = s->pool[--s->n_pool];
 | |
|     }
 | |
|     xsk_ring_prod__submit(&s->fq, n);
 | |
| 
 | |
|     if (xsk_ring_prod__needs_wakeup(&s->fq)) {
 | |
|         /* Receive was blocked by not having enough buffers.  Wake it up. */
 | |
|         af_xdp_read_poll(s, true);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void af_xdp_send(void *opaque)
 | |
| {
 | |
|     uint32_t i, n_rx, idx = 0;
 | |
|     AFXDPState *s = opaque;
 | |
| 
 | |
|     n_rx = xsk_ring_cons__peek(&s->rx, AF_XDP_BATCH_SIZE, &idx);
 | |
|     if (!n_rx) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < n_rx; i++) {
 | |
|         const struct xdp_desc *desc;
 | |
|         struct iovec iov;
 | |
| 
 | |
|         desc = xsk_ring_cons__rx_desc(&s->rx, idx++);
 | |
| 
 | |
|         iov.iov_base = xsk_umem__get_data(s->buffer, desc->addr);
 | |
|         iov.iov_len = desc->len;
 | |
| 
 | |
|         s->pool[s->n_pool++] = desc->addr;
 | |
| 
 | |
|         if (!qemu_sendv_packet_async(&s->nc, &iov, 1,
 | |
|                                      af_xdp_send_completed)) {
 | |
|             /*
 | |
|              * The peer does not receive anymore.  Packet is queued, stop
 | |
|              * reading from the backend until af_xdp_send_completed().
 | |
|              */
 | |
|             af_xdp_read_poll(s, false);
 | |
| 
 | |
|             /* Return unused descriptors to not break the ring cache. */
 | |
|             xsk_ring_cons__cancel(&s->rx, n_rx - i - 1);
 | |
|             n_rx = i + 1;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* Release actually sent descriptors and try to re-fill. */
 | |
|     xsk_ring_cons__release(&s->rx, n_rx);
 | |
|     af_xdp_fq_refill(s, AF_XDP_BATCH_SIZE);
 | |
| }
 | |
| 
 | |
| /* Flush and close. */
 | |
| static void af_xdp_cleanup(NetClientState *nc)
 | |
| {
 | |
|     AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
 | |
| 
 | |
|     qemu_purge_queued_packets(nc);
 | |
| 
 | |
|     af_xdp_poll(nc, false);
 | |
| 
 | |
|     xsk_socket__delete(s->xsk);
 | |
|     s->xsk = NULL;
 | |
|     g_free(s->pool);
 | |
|     s->pool = NULL;
 | |
|     xsk_umem__delete(s->umem);
 | |
|     s->umem = NULL;
 | |
|     qemu_vfree(s->buffer);
 | |
|     s->buffer = NULL;
 | |
| 
 | |
|     /* Remove the program if it's the last open queue. */
 | |
|     if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags
 | |
|         && bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) {
 | |
|         fprintf(stderr,
 | |
|                 "af-xdp: unable to remove XDP program from '%s', ifindex: %d\n",
 | |
|                 s->ifname, s->ifindex);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp)
 | |
| {
 | |
|     struct xsk_umem_config config = {
 | |
|         .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 | |
|         .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
 | |
|         .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
 | |
|         .frame_headroom = 0,
 | |
|     };
 | |
|     uint64_t n_descs;
 | |
|     uint64_t size;
 | |
|     int64_t i;
 | |
|     int ret;
 | |
| 
 | |
|     /* Number of descriptors if all 4 queues (rx, tx, cq, fq) are full. */
 | |
|     n_descs = (XSK_RING_PROD__DEFAULT_NUM_DESCS
 | |
|                + XSK_RING_CONS__DEFAULT_NUM_DESCS) * 2;
 | |
|     size = n_descs * XSK_UMEM__DEFAULT_FRAME_SIZE;
 | |
| 
 | |
|     s->buffer = qemu_memalign(qemu_real_host_page_size(), size);
 | |
|     memset(s->buffer, 0, size);
 | |
| 
 | |
|     if (sock_fd < 0) {
 | |
|         ret = xsk_umem__create(&s->umem, s->buffer, size,
 | |
|                                &s->fq, &s->cq, &config);
 | |
|     } else {
 | |
|         ret = xsk_umem__create_with_fd(&s->umem, sock_fd, s->buffer, size,
 | |
|                                        &s->fq, &s->cq, &config);
 | |
|     }
 | |
| 
 | |
|     if (ret) {
 | |
|         qemu_vfree(s->buffer);
 | |
|         error_setg_errno(errp, errno,
 | |
|                          "failed to create umem for %s queue_index: %d",
 | |
|                          s->ifname, s->nc.queue_index);
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     s->pool = g_new(uint64_t, n_descs);
 | |
|     /* Fill the pool in the opposite order, because it's a LIFO queue. */
 | |
|     for (i = n_descs; i >= 0; i--) {
 | |
|         s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
 | |
|     }
 | |
|     s->n_pool = n_descs;
 | |
| 
 | |
|     af_xdp_fq_refill(s, XSK_RING_PROD__DEFAULT_NUM_DESCS);
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static int af_xdp_socket_create(AFXDPState *s,
 | |
|                                 const NetdevAFXDPOptions *opts, Error **errp)
 | |
| {
 | |
|     struct xsk_socket_config cfg = {
 | |
|         .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
 | |
|         .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 | |
|         .libxdp_flags = 0,
 | |
|         .bind_flags = XDP_USE_NEED_WAKEUP,
 | |
|         .xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
 | |
|     };
 | |
|     int queue_id, error = 0;
 | |
| 
 | |
|     s->inhibit = opts->has_inhibit && opts->inhibit;
 | |
|     if (s->inhibit) {
 | |
|         cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD;
 | |
|     }
 | |
| 
 | |
|     if (opts->has_force_copy && opts->force_copy) {
 | |
|         cfg.bind_flags |= XDP_COPY;
 | |
|     }
 | |
| 
 | |
|     queue_id = s->nc.queue_index;
 | |
|     if (opts->has_start_queue && opts->start_queue > 0) {
 | |
|         queue_id += opts->start_queue;
 | |
|     }
 | |
| 
 | |
|     if (opts->has_mode) {
 | |
|         /* Specific mode requested. */
 | |
|         cfg.xdp_flags |= (opts->mode == AFXDP_MODE_NATIVE)
 | |
|                          ? XDP_FLAGS_DRV_MODE : XDP_FLAGS_SKB_MODE;
 | |
|         if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
 | |
|                                s->umem, &s->rx, &s->tx, &cfg)) {
 | |
|             error = errno;
 | |
|         }
 | |
|     } else {
 | |
|         /* No mode requested, try native first. */
 | |
|         cfg.xdp_flags |= XDP_FLAGS_DRV_MODE;
 | |
| 
 | |
|         if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
 | |
|                                s->umem, &s->rx, &s->tx, &cfg)) {
 | |
|             /* Can't use native mode, try skb. */
 | |
|             cfg.xdp_flags &= ~XDP_FLAGS_DRV_MODE;
 | |
|             cfg.xdp_flags |= XDP_FLAGS_SKB_MODE;
 | |
| 
 | |
|             if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
 | |
|                                    s->umem, &s->rx, &s->tx, &cfg)) {
 | |
|                 error = errno;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (error) {
 | |
|         error_setg_errno(errp, error,
 | |
|                          "failed to create AF_XDP socket for %s queue_id: %d",
 | |
|                          s->ifname, queue_id);
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     s->xdp_flags = cfg.xdp_flags;
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /* NetClientInfo methods. */
 | |
| static NetClientInfo net_af_xdp_info = {
 | |
|     .type = NET_CLIENT_DRIVER_AF_XDP,
 | |
|     .size = sizeof(AFXDPState),
 | |
|     .receive = af_xdp_receive,
 | |
|     .poll = af_xdp_poll,
 | |
|     .cleanup = af_xdp_cleanup,
 | |
| };
 | |
| 
 | |
| static int *parse_socket_fds(const char *sock_fds_str,
 | |
|                              int64_t n_expected, Error **errp)
 | |
| {
 | |
|     gchar **substrings = g_strsplit(sock_fds_str, ":", -1);
 | |
|     int64_t i, n_sock_fds = g_strv_length(substrings);
 | |
|     int *sock_fds = NULL;
 | |
| 
 | |
|     if (n_sock_fds != n_expected) {
 | |
|         error_setg(errp, "expected %"PRIi64" socket fds, got %"PRIi64,
 | |
|                    n_expected, n_sock_fds);
 | |
|         goto exit;
 | |
|     }
 | |
| 
 | |
|     sock_fds = g_new(int, n_sock_fds);
 | |
| 
 | |
|     for (i = 0; i < n_sock_fds; i++) {
 | |
|         sock_fds[i] = monitor_fd_param(monitor_cur(), substrings[i], errp);
 | |
|         if (sock_fds[i] < 0) {
 | |
|             g_free(sock_fds);
 | |
|             sock_fds = NULL;
 | |
|             goto exit;
 | |
|         }
 | |
|     }
 | |
| 
 | |
| exit:
 | |
|     g_strfreev(substrings);
 | |
|     return sock_fds;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * The exported init function.
 | |
|  *
 | |
|  * ... -netdev af-xdp,ifname="..."
 | |
|  */
 | |
| int net_init_af_xdp(const Netdev *netdev,
 | |
|                     const char *name, NetClientState *peer, Error **errp)
 | |
| {
 | |
|     const NetdevAFXDPOptions *opts = &netdev->u.af_xdp;
 | |
|     NetClientState *nc, *nc0 = NULL;
 | |
|     unsigned int ifindex;
 | |
|     uint32_t prog_id = 0;
 | |
|     g_autofree int *sock_fds = NULL;
 | |
|     int64_t i, queues;
 | |
|     Error *err = NULL;
 | |
|     AFXDPState *s;
 | |
| 
 | |
|     ifindex = if_nametoindex(opts->ifname);
 | |
|     if (!ifindex) {
 | |
|         error_setg_errno(errp, errno, "failed to get ifindex for '%s'",
 | |
|                          opts->ifname);
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     queues = opts->has_queues ? opts->queues : 1;
 | |
|     if (queues < 1) {
 | |
|         error_setg(errp, "invalid number of queues (%" PRIi64 ") for '%s'",
 | |
|                    queues, opts->ifname);
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) {
 | |
|         error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa");
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     if (opts->sock_fds) {
 | |
|         sock_fds = parse_socket_fds(opts->sock_fds, queues, errp);
 | |
|         if (!sock_fds) {
 | |
|             return -1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < queues; i++) {
 | |
|         nc = qemu_new_net_client(&net_af_xdp_info, peer, "af-xdp", name);
 | |
|         qemu_set_info_str(nc, "af-xdp%"PRIi64" to %s", i, opts->ifname);
 | |
|         nc->queue_index = i;
 | |
| 
 | |
|         if (!nc0) {
 | |
|             nc0 = nc;
 | |
|         }
 | |
| 
 | |
|         s = DO_UPCAST(AFXDPState, nc, nc);
 | |
| 
 | |
|         pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname);
 | |
|         s->ifindex = ifindex;
 | |
|         s->n_queues = queues;
 | |
| 
 | |
|         if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp)
 | |
|             || af_xdp_socket_create(s, opts, errp)) {
 | |
|             /* Make sure the XDP program will be removed. */
 | |
|             s->n_queues = i;
 | |
|             error_propagate(errp, err);
 | |
|             goto err;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (nc0) {
 | |
|         s = DO_UPCAST(AFXDPState, nc, nc0);
 | |
|         if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) {
 | |
|             error_setg_errno(errp, errno,
 | |
|                              "no XDP program loaded on '%s', ifindex: %d",
 | |
|                              s->ifname, s->ifindex);
 | |
|             goto err;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     af_xdp_read_poll(s, true); /* Initially only poll for reads. */
 | |
| 
 | |
|     return 0;
 | |
| 
 | |
| err:
 | |
|     if (nc0) {
 | |
|         qemu_del_net_client(nc0);
 | |
|     }
 | |
| 
 | |
|     return -1;
 | |
| }
 |