From ab1cb3683bd0462695a75aa9a8c1d07731caf304 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 5 Feb 2025 13:13:53 -0300 Subject: [PATCH 01/22] crypto: Allow gracefully ending the TLS session MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QEMU's TLS session code provides no way to call gnutls_bye() to terminate a TLS session. Callers of qcrypto_tls_session_read() can choose to ignore a GNUTLS_E_PREMATURE_TERMINATION error by setting the gracefulTermination argument. The QIOChannelTLS ignores the premature termination error whenever shutdown() has already been issued. This was found to be not enough for the migration code because shutdown() might not have been issued before the connection is terminated. Add support for calling gnutls_bye() in the tlssession layer so users of QIOChannelTLS can clearly identify the end of a TLS session. Reviewed-by: Daniel P. Berrangé Acked-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas --- crypto/tlssession.c | 41 +++++++++++++++++++++++++++++++++++++ include/crypto/tlssession.h | 22 ++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/crypto/tlssession.c b/crypto/tlssession.c index 77286e23f4..d769d7a304 100644 --- a/crypto/tlssession.c +++ b/crypto/tlssession.c @@ -585,6 +585,40 @@ qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *session) } } +int +qcrypto_tls_session_bye(QCryptoTLSSession *session, Error **errp) +{ + int ret; + + if (!session->handshakeComplete) { + return 0; + } + + ret = gnutls_bye(session->handle, GNUTLS_SHUT_WR); + + if (!ret) { + return QCRYPTO_TLS_BYE_COMPLETE; + } + + if (ret == GNUTLS_E_INTERRUPTED || ret == GNUTLS_E_AGAIN) { + int direction = gnutls_record_get_direction(session->handle); + return direction ? QCRYPTO_TLS_BYE_SENDING : QCRYPTO_TLS_BYE_RECVING; + } + + if (session->rerr || session->werr) { + error_setg(errp, "TLS termination failed: %s: %s", gnutls_strerror(ret), + error_get_pretty(session->rerr ? + session->rerr : session->werr)); + } else { + error_setg(errp, "TLS termination failed: %s", gnutls_strerror(ret)); + } + + error_free(session->rerr); + error_free(session->werr); + session->rerr = session->werr = NULL; + + return -1; +} int qcrypto_tls_session_get_key_size(QCryptoTLSSession *session, @@ -699,6 +733,13 @@ qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess) } +int +qcrypto_tls_session_bye(QCryptoTLSSession *session, Error **errp) +{ + return QCRYPTO_TLS_BYE_COMPLETE; +} + + int qcrypto_tls_session_get_key_size(QCryptoTLSSession *sess, Error **errp) diff --git a/include/crypto/tlssession.h b/include/crypto/tlssession.h index f694a5c3c5..c0f64ce989 100644 --- a/include/crypto/tlssession.h +++ b/include/crypto/tlssession.h @@ -323,6 +323,28 @@ typedef enum { QCryptoTLSSessionHandshakeStatus qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess); +typedef enum { + QCRYPTO_TLS_BYE_COMPLETE, + QCRYPTO_TLS_BYE_SENDING, + QCRYPTO_TLS_BYE_RECVING, +} QCryptoTLSSessionByeStatus; + +/** + * qcrypto_tls_session_bye: + * @session: the TLS session object + * @errp: pointer to a NULL-initialized error object + * + * Start, or continue, a TLS termination sequence. If the underlying + * data channel is non-blocking, then this method may return control + * before the termination is complete. The return value will indicate + * whether the termination has completed, or is waiting to send or + * receive data. In the latter cases, the caller should setup an event + * loop watch and call this method again once the underlying data + * channel is ready to read or write again. + */ +int +qcrypto_tls_session_bye(QCryptoTLSSession *session, Error **errp); + /** * qcrypto_tls_session_get_key_size: * @sess: the TLS session object From 30ee88622edfa962154222b4a674361488ed823b Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 5 Feb 2025 13:15:00 -0300 Subject: [PATCH 02/22] io: tls: Add qio_channel_tls_bye MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a task dispatcher for gnutls_bye similar to the qio_channel_tls_handshake_task(). The gnutls_bye() call might be interrupted and so it needs to be rescheduled. The migration code will make use of this to help the migration destination identify a premature EOF. Once the session termination is in place, any EOF that happens before the source issued gnutls_bye() will be considered an error. Reviewed-by: Daniel P. Berrangé Acked-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas --- include/io/channel-tls.h | 12 ++++++ io/channel-tls.c | 84 ++++++++++++++++++++++++++++++++++++++++ io/trace-events | 5 +++ 3 files changed, 101 insertions(+) diff --git a/include/io/channel-tls.h b/include/io/channel-tls.h index 26c67f17e2..7e9023570d 100644 --- a/include/io/channel-tls.h +++ b/include/io/channel-tls.h @@ -49,8 +49,20 @@ struct QIOChannelTLS { QCryptoTLSSession *session; QIOChannelShutdown shutdown; guint hs_ioc_tag; + guint bye_ioc_tag; }; +/** + * qio_channel_tls_bye: + * @ioc: the TLS channel object + * @errp: pointer to a NULL-initialized error object + * + * Perform the TLS session termination. This method will return + * immediately and the termination will continue in the background, + * provided the main loop is running. + */ +void qio_channel_tls_bye(QIOChannelTLS *ioc, Error **errp); + /** * qio_channel_tls_new_server: * @master: the underlying channel object diff --git a/io/channel-tls.c b/io/channel-tls.c index aab630e5ae..517ce190a4 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c @@ -247,6 +247,85 @@ void qio_channel_tls_handshake(QIOChannelTLS *ioc, qio_channel_tls_handshake_task(ioc, task, context); } +static gboolean qio_channel_tls_bye_io(QIOChannel *ioc, GIOCondition condition, + gpointer user_data); + +static void qio_channel_tls_bye_task(QIOChannelTLS *ioc, QIOTask *task, + GMainContext *context) +{ + GIOCondition condition; + QIOChannelTLSData *data; + int status; + Error *err = NULL; + + status = qcrypto_tls_session_bye(ioc->session, &err); + + if (status < 0) { + trace_qio_channel_tls_bye_fail(ioc); + qio_task_set_error(task, err); + qio_task_complete(task); + return; + } + + if (status == QCRYPTO_TLS_BYE_COMPLETE) { + qio_task_complete(task); + return; + } + + data = g_new0(typeof(*data), 1); + data->task = task; + data->context = context; + + if (context) { + g_main_context_ref(context); + } + + if (status == QCRYPTO_TLS_BYE_SENDING) { + condition = G_IO_OUT; + } else { + condition = G_IO_IN; + } + + trace_qio_channel_tls_bye_pending(ioc, status); + ioc->bye_ioc_tag = qio_channel_add_watch_full(ioc->master, condition, + qio_channel_tls_bye_io, + data, NULL, context); +} + + +static gboolean qio_channel_tls_bye_io(QIOChannel *ioc, GIOCondition condition, + gpointer user_data) +{ + QIOChannelTLSData *data = user_data; + QIOTask *task = data->task; + GMainContext *context = data->context; + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(qio_task_get_source(task)); + + tioc->bye_ioc_tag = 0; + g_free(data); + qio_channel_tls_bye_task(tioc, task, context); + + if (context) { + g_main_context_unref(context); + } + + return FALSE; +} + +static void propagate_error(QIOTask *task, gpointer opaque) +{ + qio_task_propagate_error(task, opaque); +} + +void qio_channel_tls_bye(QIOChannelTLS *ioc, Error **errp) +{ + QIOTask *task; + + task = qio_task_new(OBJECT(ioc), propagate_error, errp, NULL); + + trace_qio_channel_tls_bye_start(ioc); + qio_channel_tls_bye_task(ioc, task, NULL); +} static void qio_channel_tls_init(Object *obj G_GNUC_UNUSED) { @@ -379,6 +458,11 @@ static int qio_channel_tls_close(QIOChannel *ioc, g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove); } + if (tioc->bye_ioc_tag) { + trace_qio_channel_tls_bye_cancel(ioc); + g_clear_handle_id(&tioc->bye_ioc_tag, g_source_remove); + } + return qio_channel_close(tioc->master, errp); } diff --git a/io/trace-events b/io/trace-events index d4c0f84a9a..dc3a63ba1f 100644 --- a/io/trace-events +++ b/io/trace-events @@ -44,6 +44,11 @@ qio_channel_tls_handshake_pending(void *ioc, int status) "TLS handshake pending qio_channel_tls_handshake_fail(void *ioc) "TLS handshake fail ioc=%p" qio_channel_tls_handshake_complete(void *ioc) "TLS handshake complete ioc=%p" qio_channel_tls_handshake_cancel(void *ioc) "TLS handshake cancel ioc=%p" +qio_channel_tls_bye_start(void *ioc) "TLS termination start ioc=%p" +qio_channel_tls_bye_pending(void *ioc, int status) "TLS termination pending ioc=%p status=%d" +qio_channel_tls_bye_fail(void *ioc) "TLS termination fail ioc=%p" +qio_channel_tls_bye_complete(void *ioc) "TLS termination complete ioc=%p" +qio_channel_tls_bye_cancel(void *ioc) "TLS termination cancel ioc=%p" qio_channel_tls_credentials_allow(void *ioc) "TLS credentials allow ioc=%p" qio_channel_tls_credentials_deny(void *ioc) "TLS credentials deny ioc=%p" From 0b8a70d70f65fbcf3ad62c975a64a356779095a9 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 6 Feb 2025 15:34:08 -0300 Subject: [PATCH 03/22] crypto: Remove qcrypto_tls_session_get_handshake_status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The correct way of calling qcrypto_tls_session_handshake() requires calling qcrypto_tls_session_get_handshake_status() right after it so there's no reason to have a separate method. Refactor qcrypto_tls_session_handshake() to inform the status in its own return value and alter the callers accordingly. No functional change. Suggested-by: Daniel P. Berrangé Reviewed-by: Daniel P. Berrangé Acked-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas --- crypto/tlssession.c | 57 ++++++++++------------------- include/crypto/tlssession.h | 32 ++++------------ io/channel-tls.c | 7 ++-- tests/unit/test-crypto-tlssession.c | 12 ++---- 4 files changed, 36 insertions(+), 72 deletions(-) diff --git a/crypto/tlssession.c b/crypto/tlssession.c index d769d7a304..6d8f8df623 100644 --- a/crypto/tlssession.c +++ b/crypto/tlssession.c @@ -546,45 +546,35 @@ qcrypto_tls_session_handshake(QCryptoTLSSession *session, Error **errp) { int ret = gnutls_handshake(session->handle); - if (ret == 0) { + if (!ret) { session->handshakeComplete = true; - } else { - if (ret == GNUTLS_E_INTERRUPTED || - ret == GNUTLS_E_AGAIN) { - ret = 1; - } else { - if (session->rerr || session->werr) { - error_setg(errp, "TLS handshake failed: %s: %s", - gnutls_strerror(ret), - error_get_pretty(session->rerr ? - session->rerr : session->werr)); - } else { - error_setg(errp, "TLS handshake failed: %s", - gnutls_strerror(ret)); - } - ret = -1; - } + return QCRYPTO_TLS_HANDSHAKE_COMPLETE; } + + if (ret == GNUTLS_E_INTERRUPTED || ret == GNUTLS_E_AGAIN) { + int direction = gnutls_record_get_direction(session->handle); + return direction ? QCRYPTO_TLS_HANDSHAKE_SENDING : + QCRYPTO_TLS_HANDSHAKE_RECVING; + } + + if (session->rerr || session->werr) { + error_setg(errp, "TLS handshake failed: %s: %s", + gnutls_strerror(ret), + error_get_pretty(session->rerr ? + session->rerr : session->werr)); + } else { + error_setg(errp, "TLS handshake failed: %s", + gnutls_strerror(ret)); + } + error_free(session->rerr); error_free(session->werr); session->rerr = session->werr = NULL; - return ret; + return -1; } -QCryptoTLSSessionHandshakeStatus -qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *session) -{ - if (session->handshakeComplete) { - return QCRYPTO_TLS_HANDSHAKE_COMPLETE; - } else if (gnutls_record_get_direction(session->handle) == 0) { - return QCRYPTO_TLS_HANDSHAKE_RECVING; - } else { - return QCRYPTO_TLS_HANDSHAKE_SENDING; - } -} - int qcrypto_tls_session_bye(QCryptoTLSSession *session, Error **errp) { @@ -726,13 +716,6 @@ qcrypto_tls_session_handshake(QCryptoTLSSession *sess, } -QCryptoTLSSessionHandshakeStatus -qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess) -{ - return QCRYPTO_TLS_HANDSHAKE_COMPLETE; -} - - int qcrypto_tls_session_bye(QCryptoTLSSession *session, Error **errp) { diff --git a/include/crypto/tlssession.h b/include/crypto/tlssession.h index c0f64ce989..d77ae0d423 100644 --- a/include/crypto/tlssession.h +++ b/include/crypto/tlssession.h @@ -75,12 +75,14 @@ * GINT_TO_POINTER(fd)); * * while (1) { - * if (qcrypto_tls_session_handshake(sess, errp) < 0) { + * int ret = qcrypto_tls_session_handshake(sess, errp); + * + * if (ret < 0) { * qcrypto_tls_session_free(sess); * return -1; * } * - * switch(qcrypto_tls_session_get_handshake_status(sess)) { + * switch(ret) { * case QCRYPTO_TLS_HANDSHAKE_COMPLETE: * if (qcrypto_tls_session_check_credentials(sess, errp) < )) { * qcrypto_tls_session_free(sess); @@ -170,7 +172,7 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoTLSSession, qcrypto_tls_session_free) * * Validate the peer's credentials after a successful * TLS handshake. It is an error to call this before - * qcrypto_tls_session_get_handshake_status() returns + * qcrypto_tls_session_handshake() returns * QCRYPTO_TLS_HANDSHAKE_COMPLETE * * Returns 0 if the credentials validated, -1 on error @@ -226,7 +228,7 @@ void qcrypto_tls_session_set_callbacks(QCryptoTLSSession *sess, * registered with qcrypto_tls_session_set_callbacks() * * It is an error to call this before - * qcrypto_tls_session_get_handshake_status() returns + * qcrypto_tls_session_handshake() returns * QCRYPTO_TLS_HANDSHAKE_COMPLETE * * Returns: the number of bytes sent, @@ -256,7 +258,7 @@ ssize_t qcrypto_tls_session_write(QCryptoTLSSession *sess, * opposed to an error. * * It is an error to call this before - * qcrypto_tls_session_get_handshake_status() returns + * qcrypto_tls_session_handshake() returns * QCRYPTO_TLS_HANDSHAKE_COMPLETE * * Returns: the number of bytes received, @@ -289,8 +291,7 @@ size_t qcrypto_tls_session_check_pending(QCryptoTLSSession *sess); * the underlying data channel is non-blocking, then * this method may return control before the handshake * is complete. On non-blocking channels the - * qcrypto_tls_session_get_handshake_status() method - * should be used to determine whether the handshake + * return value determines whether the handshake * has completed, or is waiting to send or receive * data. In the latter cases, the caller should setup * an event loop watch and call this method again @@ -306,23 +307,6 @@ typedef enum { QCRYPTO_TLS_HANDSHAKE_RECVING, } QCryptoTLSSessionHandshakeStatus; -/** - * qcrypto_tls_session_get_handshake_status: - * @sess: the TLS session object - * - * Check the status of the TLS handshake. This - * is used with non-blocking data channels to - * determine whether the handshake is waiting - * to send or receive further data to/from the - * remote peer. - * - * Once this returns QCRYPTO_TLS_HANDSHAKE_COMPLETE - * it is permitted to send/receive payload data on - * the channel - */ -QCryptoTLSSessionHandshakeStatus -qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess); - typedef enum { QCRYPTO_TLS_BYE_COMPLETE, QCRYPTO_TLS_BYE_SENDING, diff --git a/io/channel-tls.c b/io/channel-tls.c index 517ce190a4..ecde6b57bf 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c @@ -162,16 +162,17 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc, GMainContext *context) { Error *err = NULL; - QCryptoTLSSessionHandshakeStatus status; + int status; - if (qcrypto_tls_session_handshake(ioc->session, &err) < 0) { + status = qcrypto_tls_session_handshake(ioc->session, &err); + + if (status < 0) { trace_qio_channel_tls_handshake_fail(ioc); qio_task_set_error(task, err); qio_task_complete(task); return; } - status = qcrypto_tls_session_get_handshake_status(ioc->session); if (status == QCRYPTO_TLS_HANDSHAKE_COMPLETE) { trace_qio_channel_tls_handshake_complete(ioc); if (qcrypto_tls_session_check_credentials(ioc->session, diff --git a/tests/unit/test-crypto-tlssession.c b/tests/unit/test-crypto-tlssession.c index 3395f73560..554054e934 100644 --- a/tests/unit/test-crypto-tlssession.c +++ b/tests/unit/test-crypto-tlssession.c @@ -158,8 +158,7 @@ static void test_crypto_tls_session_psk(void) rv = qcrypto_tls_session_handshake(serverSess, &error_abort); g_assert(rv >= 0); - if (qcrypto_tls_session_get_handshake_status(serverSess) == - QCRYPTO_TLS_HANDSHAKE_COMPLETE) { + if (rv == QCRYPTO_TLS_HANDSHAKE_COMPLETE) { serverShake = true; } } @@ -167,8 +166,7 @@ static void test_crypto_tls_session_psk(void) rv = qcrypto_tls_session_handshake(clientSess, &error_abort); g_assert(rv >= 0); - if (qcrypto_tls_session_get_handshake_status(clientSess) == - QCRYPTO_TLS_HANDSHAKE_COMPLETE) { + if (rv == QCRYPTO_TLS_HANDSHAKE_COMPLETE) { clientShake = true; } } @@ -352,8 +350,7 @@ static void test_crypto_tls_session_x509(const void *opaque) rv = qcrypto_tls_session_handshake(serverSess, &error_abort); g_assert(rv >= 0); - if (qcrypto_tls_session_get_handshake_status(serverSess) == - QCRYPTO_TLS_HANDSHAKE_COMPLETE) { + if (rv == QCRYPTO_TLS_HANDSHAKE_COMPLETE) { serverShake = true; } } @@ -361,8 +358,7 @@ static void test_crypto_tls_session_x509(const void *opaque) rv = qcrypto_tls_session_handshake(clientSess, &error_abort); g_assert(rv >= 0); - if (qcrypto_tls_session_get_handshake_status(clientSess) == - QCRYPTO_TLS_HANDSHAKE_COMPLETE) { + if (rv == QCRYPTO_TLS_HANDSHAKE_COMPLETE) { clientShake = true; } } From a25b013019672ab456ef8b51912eadcdda418b73 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 7 Feb 2025 10:46:17 -0300 Subject: [PATCH 04/22] io: Add flags argument to qio_channel_readv_full_all_eof MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to pass flags into qio_channel_tls_readv() but qio_channel_readv_full_all_eof() doesn't take a flags argument. No functional change. Reviewed-by: Daniel P. Berrangé Acked-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas --- hw/remote/mpqemu-link.c | 2 +- include/io/channel.h | 2 ++ io/channel.c | 9 ++++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c index e25f97680d..49885a1db6 100644 --- a/hw/remote/mpqemu-link.c +++ b/hw/remote/mpqemu-link.c @@ -110,7 +110,7 @@ static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds, bql_unlock(); } - ret = qio_channel_readv_full_all_eof(ioc, &iov, 1, fds, nfds, errp); + ret = qio_channel_readv_full_all_eof(ioc, &iov, 1, fds, nfds, 0, errp); if (drop_bql && !iothread && !qemu_in_coroutine()) { bql_lock(); diff --git a/include/io/channel.h b/include/io/channel.h index bdf0bca92a..58940eead5 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -885,6 +885,7 @@ void qio_channel_set_aio_fd_handler(QIOChannel *ioc, * @niov: the length of the @iov array * @fds: an array of file handles to read * @nfds: number of file handles in @fds + * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) * @errp: pointer to a NULL-initialized error object * * @@ -903,6 +904,7 @@ int coroutine_mixed_fn qio_channel_readv_full_all_eof(QIOChannel *ioc, const struct iovec *iov, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp); /** diff --git a/io/channel.c b/io/channel.c index e3f17c24a0..ebd9322765 100644 --- a/io/channel.c +++ b/io/channel.c @@ -115,7 +115,8 @@ int coroutine_mixed_fn qio_channel_readv_all_eof(QIOChannel *ioc, size_t niov, Error **errp) { - return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp); + return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, 0, + errp); } int coroutine_mixed_fn qio_channel_readv_all(QIOChannel *ioc, @@ -130,6 +131,7 @@ int coroutine_mixed_fn qio_channel_readv_full_all_eof(QIOChannel *ioc, const struct iovec *iov, size_t niov, int **fds, size_t *nfds, + int flags, Error **errp) { int ret = -1; @@ -155,7 +157,7 @@ int coroutine_mixed_fn qio_channel_readv_full_all_eof(QIOChannel *ioc, while ((nlocal_iov > 0) || local_fds) { ssize_t len; len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, - local_nfds, 0, errp); + local_nfds, flags, errp); if (len == QIO_CHANNEL_ERR_BLOCK) { if (qemu_in_coroutine()) { qio_channel_yield(ioc, G_IO_IN); @@ -222,7 +224,8 @@ int coroutine_mixed_fn qio_channel_readv_full_all(QIOChannel *ioc, int **fds, size_t *nfds, Error **errp) { - int ret = qio_channel_readv_full_all_eof(ioc, iov, niov, fds, nfds, errp); + int ret = qio_channel_readv_full_all_eof(ioc, iov, niov, fds, nfds, 0, + errp); if (ret == 0) { error_setg(errp, "Unexpected end-of-file before all data were read"); From 322d873b634dc515220f154e29626a33f528bbfb Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 7 Feb 2025 10:48:21 -0300 Subject: [PATCH 05/22] io: Add a read flag for relaxed EOF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a read flag that can inform a channel that it's ok to receive an EOF at any moment. Channels that have some form of strict EOF tracking, such as TLS session termination, may choose to ignore EOF errors with the use of this flag. This is being added for compatibility with older migration streams that do not include a TLS termination step. Reviewed-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas --- include/io/channel.h | 1 + io/channel-tls.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/io/channel.h b/include/io/channel.h index 58940eead5..62b657109c 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -35,6 +35,7 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 #define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 +#define QIO_CHANNEL_READ_FLAG_RELAXED_EOF 0x2 typedef enum QIOChannelFeature QIOChannelFeature; diff --git a/io/channel-tls.c b/io/channel-tls.c index ecde6b57bf..caf8301a9e 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c @@ -359,6 +359,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, tioc->session, iov[i].iov_base, iov[i].iov_len, + flags & QIO_CHANNEL_READ_FLAG_RELAXED_EOF || qatomic_load_acquire(&tioc->shutdown) & QIO_CHANNEL_SHUTDOWN_READ, errp); if (ret == QCRYPTO_TLS_SESSION_ERR_BLOCK) { From 48796f6b44df1dd0f78d18757889d5ac478c33e4 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 5 Feb 2025 13:17:22 -0300 Subject: [PATCH 06/22] migration/multifd: Terminate the TLS connection The multifd recv side has been getting a TLS error of GNUTLS_E_PREMATURE_TERMINATION at the end of migration when the send side closes the sockets without ending the TLS session. This has been masked by the code not checking the migration error after loadvm. Start ending the TLS session at multifd_send_shutdown() so the recv side always sees a clean termination (EOF) and we can start to differentiate that from an actual premature termination that might possibly happen in the middle of the migration. There's nothing to be done if a previous migration error has already broken the connection, so add a comment explaining it and ignore any errors coming from gnutls_bye(). This doesn't break compat with older recv-side QEMUs because EOF has always caused the recv thread to exit cleanly. Reviewed-by: Peter Xu Signed-off-by: Fabiano Rosas --- migration/multifd.c | 38 +++++++++++++++++++++++++++++++++++++- migration/tls.c | 5 +++++ migration/tls.h | 2 +- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c index ab73d6d984..0296758c08 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -490,6 +490,36 @@ void multifd_send_shutdown(void) return; } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + /* thread_created implies the TLS handshake has succeeded */ + if (p->tls_thread_created && p->thread_created) { + Error *local_err = NULL; + /* + * The destination expects the TLS session to always be + * properly terminated. This helps to detect a premature + * termination in the middle of the stream. Note that + * older QEMUs always break the connection on the source + * and the destination always sees + * GNUTLS_E_PREMATURE_TERMINATION. + */ + migration_tls_channel_end(p->c, &local_err); + + /* + * The above can return an error in case the migration has + * already failed. If the migration succeeded, errors are + * not expected but there's no need to kill the source. + */ + if (local_err && !migration_has_failed(migrate_get_current())) { + warn_report( + "multifd_send_%d: Failed to terminate TLS connection: %s", + p->id, error_get_pretty(local_err)); + break; + } + } + } + multifd_send_terminate_threads(); for (i = 0; i < migrate_multifd_channels(); i++) { @@ -1141,7 +1171,13 @@ static void *multifd_recv_thread(void *opaque) ret = qio_channel_read_all_eof(p->c, (void *)p->packet, p->packet_len, &local_err); - if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ + if (!ret) { + /* EOF */ + assert(!local_err); + break; + } + + if (ret == -1) { break; } diff --git a/migration/tls.c b/migration/tls.c index fa03d9136c..5cbf952383 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -156,6 +156,11 @@ void migration_tls_channel_connect(MigrationState *s, NULL); } +void migration_tls_channel_end(QIOChannel *ioc, Error **errp) +{ + qio_channel_tls_bye(QIO_CHANNEL_TLS(ioc), errp); +} + bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc) { if (!migrate_tls()) { diff --git a/migration/tls.h b/migration/tls.h index 5797d153cb..58b25e1228 100644 --- a/migration/tls.h +++ b/migration/tls.h @@ -36,7 +36,7 @@ void migration_tls_channel_connect(MigrationState *s, QIOChannel *ioc, const char *hostname, Error **errp); - +void migration_tls_channel_end(QIOChannel *ioc, Error **errp); /* Whether the QIO channel requires further TLS handshake? */ bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc); From 9b3b192f65b1cf635719a2981dd2d4b70892d2ec Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 7 Feb 2025 10:50:49 -0300 Subject: [PATCH 07/22] migration/multifd: Add a compat property for TLS termination We're currently changing the way the source multifd migration handles the shutdown of the multifd channels when TLS is in use to perform a clean termination by calling gnutls_bye(). Older src QEMUs will always close the channel without terminating the TLS session. New dst QEMUs treat an unclean termination as an error. Add multifd_clean_tls_termination (default true) that can be switched on the destination whenever a src QEMU <= 9.2 is in use. (Note that the compat property is only strictly necessary for src QEMUs older than 9.1. Due to synchronization coincidences, src QEMUs 9.1 and 9.2 can put the destination in a condition where it doesn't see the unclean termination. Still, make the property more inclusive to facilitate potential backports.) Reviewed-by: Peter Xu Signed-off-by: Fabiano Rosas --- hw/core/machine.c | 1 + migration/migration.h | 33 +++++++++++++++++++++++++++++++++ migration/multifd.c | 14 ++++++++++++-- migration/multifd.h | 2 ++ migration/options.c | 2 ++ 5 files changed, 50 insertions(+), 2 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 254cc20c4c..02cff735b3 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -42,6 +42,7 @@ GlobalProperty hw_compat_9_2[] = { { "virtio-balloon-pci-transitional", "vectors", "0" }, { "virtio-balloon-pci-non-transitional", "vectors", "0" }, { "virtio-mem-pci", "vectors", "0" }, + { "migration", "multifd-clean-tls-termination", "false" }, }; const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); diff --git a/migration/migration.h b/migration/migration.h index eaebcc2042..eb84f75b4a 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -443,6 +443,39 @@ struct MigrationState { * Default value is false. (since 8.1) */ bool multifd_flush_after_each_section; + + /* + * This variable only makes sense when set on the machine that is + * the destination of a multifd migration with TLS enabled. It + * affects the behavior of the last send->recv iteration with + * regards to termination of the TLS session. + * + * When set: + * + * - the destination QEMU instance can expect to never get a + * GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error + * message: "The TLS connection was non-properly terminated". + * + * When clear: + * + * - the destination QEMU instance can expect to see a + * GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel + * whenever the last recv() call of that channel happens after + * the source QEMU instance has already issued shutdown() on the + * channel. + * + * Commit 637280aeb2 (since 9.1) introduced a side effect that + * causes the destination instance to not be affected by the + * premature termination, while commit 1d457daf86 (since 10.0) + * causes the premature termination condition to be once again + * reachable. + * + * NOTE: Regardless of the state of this option, a premature + * termination of the TLS connection might happen due to error at + * any moment prior to the last send->recv iteration. + */ + bool multifd_clean_tls_termination; + /* * This decides the size of guest memory chunk that will be used * to track dirty bitmap clearing. The size of memory chunk will diff --git a/migration/multifd.c b/migration/multifd.c index 0296758c08..554035e095 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -1151,6 +1151,7 @@ void multifd_recv_sync_main(void) static void *multifd_recv_thread(void *opaque) { + MigrationState *s = migrate_get_current(); MultiFDRecvParams *p = opaque; Error *local_err = NULL; bool use_packets = multifd_use_packets(); @@ -1159,18 +1160,27 @@ static void *multifd_recv_thread(void *opaque) trace_multifd_recv_thread_start(p->id); rcu_register_thread(); + if (!s->multifd_clean_tls_termination) { + p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF; + } + while (true) { uint32_t flags = 0; bool has_data = false; p->normal_num = 0; if (use_packets) { + struct iovec iov = { + .iov_base = (void *)p->packet, + .iov_len = p->packet_len + }; + if (multifd_recv_should_exit()) { break; } - ret = qio_channel_read_all_eof(p->c, (void *)p->packet, - p->packet_len, &local_err); + ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL, + p->read_flags, &local_err); if (!ret) { /* EOF */ assert(!local_err); diff --git a/migration/multifd.h b/migration/multifd.h index bd785b9873..cf408ff721 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -244,6 +244,8 @@ typedef struct { uint32_t zero_num; /* used for de-compression methods */ void *compress_data; + /* Flags for the QIOChannel */ + int read_flags; } MultiFDRecvParams; typedef struct { diff --git a/migration/options.c b/migration/options.c index 4db340b502..bb259d192a 100644 --- a/migration/options.c +++ b/migration/options.c @@ -99,6 +99,8 @@ const Property migration_properties[] = { clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, preempt_pre_7_2, false), + DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState, + multifd_clean_tls_termination, true), /* Migration parameters */ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, From e0ad300fe1a0e0b12b90994bab6e4df77dd1ee8a Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 5 Feb 2025 13:23:55 -0300 Subject: [PATCH 08/22] migration: Check migration error after loadvm We're currently only checking the QEMUFile error after qemu_loadvm_state(). This was causing a TLS termination error from multifd recv threads to be ignored. Start checking the migration error as well to avoid missing further errors. Regarding compatibility concerning the TLS termination error that was being ignored, for QEMUs <= 9.2 - if the old QEMU is being used as migration source - the recently added migration property multifd-tls-clean-termination needs to be set to OFF in the *destination* machine. Reviewed-by: Peter Xu Signed-off-by: Fabiano Rosas --- migration/savevm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/migration/savevm.c b/migration/savevm.c index bc375db282..4046faf009 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2940,7 +2940,11 @@ int qemu_loadvm_state(QEMUFile *f) /* When reaching here, it must be precopy */ if (ret == 0) { - ret = qemu_file_get_error(f); + if (migrate_has_error(migrate_get_current())) { + ret = -EINVAL; + } else { + ret = qemu_file_get_error(f); + } } /* From a47f0cfba8d33f7001bcc4616f96e42dbd553135 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:19 -0300 Subject: [PATCH 09/22] migration: Set migration error outside of migrate_cancel There's no point passing the error into migration cancel only for it to call migrate_set_error(). Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-2-farosas@suse.de> Signed-off-by: Fabiano Rosas --- migration/migration.c | 7 ++----- migration/migration.h | 2 +- migration/ram.c | 4 +++- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 396928513a..7728f52aef 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -342,11 +342,8 @@ void migration_bh_schedule(QEMUBHFunc *cb, void *opaque) qemu_bh_schedule(bh); } -void migration_cancel(const Error *error) +void migration_cancel() { - if (error) { - migrate_set_error(current_migration, error); - } if (migrate_dirty_limit()) { qmp_cancel_vcpu_dirty_limit(false, -1, NULL); } @@ -365,7 +362,7 @@ void migration_shutdown(void) * Cancel the current migration - that will (eventually) * stop the migration using this structure */ - migration_cancel(NULL); + migration_cancel(); object_unref(OBJECT(current_migration)); /* diff --git a/migration/migration.h b/migration/migration.h index eb84f75b4a..f083f4f87e 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -563,7 +563,7 @@ void migration_make_urgent_request(void); void migration_consume_urgent_request(void); bool migration_rate_limit(void); void migration_bh_schedule(QEMUBHFunc *cb, void *opaque); -void migration_cancel(const Error *error); +void migration_cancel(void); void migration_populate_vfio_info(MigrationInfo *info); void migration_reset_vfio_bytes_transferred(void); diff --git a/migration/ram.c b/migration/ram.c index 6f460fd22d..589b6505eb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4465,8 +4465,10 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, * Abort and indicate a proper reason. */ error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr); - migration_cancel(err); + migrate_set_error(migrate_get_current(), err); error_free(err); + + migration_cancel(); } switch (ps) { From 8444d0938112b7da8d88cc6a7481b9eb33654997 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:20 -0300 Subject: [PATCH 10/22] migration: Unify migration_cancel and migrate_fd_cancel There's no need for two separate functions and this _fd_ is a historic artifact that makes little sense nowadays. Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-3-farosas@suse.de> Signed-off-by: Fabiano Rosas --- migration/migration.c | 18 +++++++----------- migration/trace-events | 2 +- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 7728f52aef..e37842fdd2 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -106,7 +106,6 @@ static GSList *migration_blockers[MIG_MODE__MAX]; static bool migration_object_check(MigrationState *ms, Error **errp); static bool migration_switchover_start(MigrationState *s, Error **errp); -static void migrate_fd_cancel(MigrationState *s); static bool close_return_path_on_source(MigrationState *s); static void migration_completion_end(MigrationState *s); static void migrate_hup_delete(MigrationState *s); @@ -342,14 +341,6 @@ void migration_bh_schedule(QEMUBHFunc *cb, void *opaque) qemu_bh_schedule(bh); } -void migration_cancel() -{ - if (migrate_dirty_limit()) { - qmp_cancel_vcpu_dirty_limit(false, -1, NULL); - } - migrate_fd_cancel(current_migration); -} - void migration_shutdown(void) { /* @@ -1555,12 +1546,17 @@ static void migrate_fd_error(MigrationState *s, const Error *error) migrate_set_error(s, error); } -static void migrate_fd_cancel(MigrationState *s) +void migration_cancel(void) { + MigrationState *s = migrate_get_current(); int old_state ; bool setup = (s->state == MIGRATION_STATUS_SETUP); - trace_migrate_fd_cancel(); + trace_migration_cancel(); + + if (migrate_dirty_limit()) { + qmp_cancel_vcpu_dirty_limit(false, -1, NULL); + } WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { if (s->rp_state.from_dst_file) { diff --git a/migration/trace-events b/migration/trace-events index 12b262f8ee..d22600abe6 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -156,7 +156,7 @@ multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostnam migrate_set_state(const char *new_state) "new state %s" migrate_fd_cleanup(void) "" migrate_error(const char *error_desc) "error=%s" -migrate_fd_cancel(void) "" +migration_cancel(void) "" migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx" migrate_pending_exact(uint64_t size, uint64_t pre, uint64_t post) "exact pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")" migrate_pending_estimate(uint64_t size, uint64_t pre, uint64_t post) "estimate pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")" From 4bbadfc55e6ec608df75911b4360e6e995daa28c Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:21 -0300 Subject: [PATCH 11/22] migration: Change migrate_fd_ to migration_ Remove all instances of _fd_ from the migration generic code. These functions have grown over time and the _fd_ part is now just confusing. migration_fd_error() -> migration_error() makes it a little vague. Since it's only used for migration_connect() failures, change it to migration_connect_set_error(). Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-4-farosas@suse.de> Signed-off-by: Fabiano Rosas --- migration/channel.c | 4 ++-- migration/migration.c | 30 +++++++++++++++--------------- migration/migration.h | 2 +- migration/multifd.c | 2 +- migration/rdma.c | 2 +- migration/trace-events | 2 +- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/migration/channel.c b/migration/channel.c index f9de064f3b..24a91ef911 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -74,7 +74,7 @@ void migration_channel_connect(MigrationState *s, if (!error) { /* tls_channel_connect will call back to this * function after the TLS handshake, - * so we mustn't call migrate_fd_connect until then + * so we mustn't call migration_connect until then */ return; @@ -89,7 +89,7 @@ void migration_channel_connect(MigrationState *s, qemu_mutex_unlock(&s->qemu_file_lock); } } - migrate_fd_connect(s, error); + migration_connect(s, error); error_free(error); } diff --git a/migration/migration.c b/migration/migration.c index e37842fdd2..c39cedef3b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1423,12 +1423,12 @@ static void migration_cleanup_json_writer(MigrationState *s) g_clear_pointer(&s->vmdesc, json_writer_free); } -static void migrate_fd_cleanup(MigrationState *s) +static void migration_cleanup(MigrationState *s) { MigrationEventType type; QEMUFile *tmp = NULL; - trace_migrate_fd_cleanup(); + trace_migration_cleanup(); migration_cleanup_json_writer(s); @@ -1485,9 +1485,9 @@ static void migrate_fd_cleanup(MigrationState *s) yank_unregister_instance(MIGRATION_YANK_INSTANCE); } -static void migrate_fd_cleanup_bh(void *opaque) +static void migration_cleanup_bh(void *opaque) { - migrate_fd_cleanup(opaque); + migration_cleanup(opaque); } void migrate_set_error(MigrationState *s, const Error *error) @@ -1517,7 +1517,7 @@ static void migrate_error_free(MigrationState *s) } } -static void migrate_fd_error(MigrationState *s, const Error *error) +static void migration_connect_set_error(MigrationState *s, const Error *error) { MigrationStatus current = s->state; MigrationStatus next; @@ -2198,7 +2198,7 @@ void qmp_migrate(const char *uri, bool has_channels, out: if (local_err) { - migrate_fd_error(s, local_err); + migration_connect_set_error(s, local_err); error_propagate(errp, local_err); } } @@ -2243,7 +2243,7 @@ static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, if (!resume_requested) { yank_unregister_instance(MIGRATION_YANK_INSTANCE); } - migrate_fd_error(s, local_err); + migration_connect_set_error(s, local_err); error_propagate(errp, local_err); return; } @@ -3427,7 +3427,7 @@ static void migration_iteration_finish(MigrationState *s) break; } - migration_bh_schedule(migrate_fd_cleanup_bh, s); + migration_bh_schedule(migration_cleanup_bh, s); bql_unlock(); } @@ -3455,7 +3455,7 @@ static void bg_migration_iteration_finish(MigrationState *s) break; } - migration_bh_schedule(migrate_fd_cleanup_bh, s); + migration_bh_schedule(migration_cleanup_bh, s); bql_unlock(); } @@ -3837,7 +3837,7 @@ fail_setup: return NULL; } -void migrate_fd_connect(MigrationState *s, Error *error_in) +void migration_connect(MigrationState *s, Error *error_in) { Error *local_err = NULL; uint64_t rate_limit; @@ -3847,24 +3847,24 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) /* * If there's a previous error, free it and prepare for another one. * Meanwhile if migration completes successfully, there won't have an error - * dumped when calling migrate_fd_cleanup(). + * dumped when calling migration_cleanup(). */ migrate_error_free(s); s->expected_downtime = migrate_downtime_limit(); if (error_in) { - migrate_fd_error(s, error_in); + migration_connect_set_error(s, error_in); if (resume) { /* * Don't do cleanup for resume if channel is invalid, but only dump * the error. We wait for another channel connect from the user. * The error_report still gives HMP user a hint on what failed. - * It's normally done in migrate_fd_cleanup(), but call it here + * It's normally done in migration_cleanup(), but call it here * explicitly. */ error_report_err(error_copy(s->error)); } else { - migrate_fd_cleanup(s); + migration_cleanup(s); } return; } @@ -3944,7 +3944,7 @@ fail: migrate_set_error(s, local_err); migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); error_report_err(local_err); - migrate_fd_cleanup(s); + migration_cleanup(s); } static void migration_class_init(ObjectClass *klass, void *data) diff --git a/migration/migration.h b/migration/migration.h index f083f4f87e..4639e2a7e4 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -517,7 +517,7 @@ bool migration_has_all_channels(void); void migrate_set_error(MigrationState *s, const Error *error); bool migrate_has_error(MigrationState *s); -void migrate_fd_connect(MigrationState *s, Error *error_in); +void migration_connect(MigrationState *s, Error *error_in); int migration_call_notifiers(MigrationState *s, MigrationEventType type, Error **errp); diff --git a/migration/multifd.c b/migration/multifd.c index 554035e095..215ad0414a 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -444,7 +444,7 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) * channels have no I/O handler callback registered when reaching * here, because migration thread will wait for all multifd channel * establishments to complete during setup. Since - * migrate_fd_cleanup() will be scheduled in main thread too, all + * migration_cleanup() will be scheduled in main thread too, all * previous callbacks should guarantee to be completed when * reaching here. See multifd_send_state.channels_created and its * usage. In the future, we could replace this with an assert diff --git a/migration/rdma.c b/migration/rdma.c index 855753c671..76fb034923 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4174,7 +4174,7 @@ void rdma_start_outgoing_migration(void *opaque, s->to_dst_file = rdma_new_output(rdma); s->rdma_migration = true; - migrate_fd_connect(s, NULL); + migration_connect(s, NULL); return; return_path_err: qemu_rdma_cleanup(rdma); diff --git a/migration/trace-events b/migration/trace-events index d22600abe6..58c0f07f5b 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -154,7 +154,7 @@ multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostnam # migration.c migrate_set_state(const char *new_state) "new state %s" -migrate_fd_cleanup(void) "" +migration_cleanup(void) "" migrate_error(const char *error_desc) "error=%s" migration_cancel(void) "" migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx" From 2b667a8c0f7ad423c9141b3a487898c50a6ff5e0 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:22 -0300 Subject: [PATCH 12/22] migration: Fix hang after error in destination setup phase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the destination side fails at migration_ioc_process_incoming() before starting the coroutine, it will report the error but QEMU will not exit. Set the migration state to FAILED and exit the process if exit-on-error allows. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2633 Reported-by: Daniel P. Berrangé Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-5-farosas@suse.de> Signed-off-by: Fabiano Rosas --- migration/channel.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/migration/channel.c b/migration/channel.c index 24a91ef911..a547b1fbfe 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -33,6 +33,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) { MigrationState *s = migrate_get_current(); + MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; trace_migration_set_incoming_channel( @@ -47,6 +48,10 @@ void migration_channel_process_incoming(QIOChannel *ioc) if (local_err) { error_report_err(local_err); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + if (mis->exit_on_error) { + exit(EXIT_FAILURE); + } } } From 646119088f8a1d9925239e70b0a7b426bfb6e58a Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:23 -0300 Subject: [PATCH 13/22] migration: Reject qmp_migrate_cancel after postcopy After postcopy has started, it's not possible to recover the source machine in case a migration error occurs because the destination has already been changing the state of the machine. For that same reason, it doesn't make sense to try to cancel the migration after postcopy has started. Reject the cancel command during postcopy. Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-6-farosas@suse.de> Signed-off-by: Fabiano Rosas --- migration/migration.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index c39cedef3b..48c9ad3c96 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2251,7 +2251,18 @@ static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, void qmp_migrate_cancel(Error **errp) { - migration_cancel(NULL); + /* + * After postcopy migration has started, the source machine is not + * recoverable in case of a migration error. This also means the + * cancel command cannot be used as cancel should allow the + * machine to continue operation. + */ + if (migration_in_postcopy()) { + error_setg(errp, "Postcopy migration in progress, cannot cancel."); + return; + } + + migration_cancel(); } void qmp_migrate_continue(MigrationStatus state, Error **errp) From 4a228bcc994ea8ab05d4927e23e7916f32cc1168 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:24 -0300 Subject: [PATCH 14/22] migration: Don't set FAILED state when cancelling The expected outcome from qmp_migrate_cancel() is that the source migration goes to the terminal state MIGRATION_STATUS_CANCELLED. Anything different from this is a bug when cancelling. Make sure there is never a state transition from an unspecified state into FAILED. Code that sets FAILED, should always either make sure that the old state is not CANCELLING or specify the old state. Note that the destination is allowed to go into FAILED, so there's no issue there. (I don't think this is relevant as a backport because cancelling does work, it just doesn't show the right state at the end) Fixes: 3dde8fdbad ("migration: Merge precopy/postcopy on switchover start") Fixes: d0edb8a173 ("migration: Create the postcopy preempt channel asynchronously") Fixes: 8518278a6a ("migration: implementation of background snapshot thread") Fixes: bf78a046b9 ("migration: refactor migrate_fd_connect failures") Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-7-farosas@suse.de> Signed-off-by: Fabiano Rosas --- migration/migration.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 48c9ad3c96..c597aa707e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2648,7 +2648,10 @@ static int postcopy_start(MigrationState *ms, Error **errp) if (migrate_postcopy_preempt()) { migration_wait_main_channel(ms); if (postcopy_preempt_establish_channel(ms)) { - migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED); + if (ms->state != MIGRATION_STATUS_CANCELLING) { + migrate_set_state(&ms->state, ms->state, + MIGRATION_STATUS_FAILED); + } error_setg(errp, "%s: Failed to establish preempt channel", __func__); return -1; @@ -2986,7 +2989,9 @@ fail: error_free(local_err); } - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + if (s->state != MIGRATION_STATUS_CANCELLING) { + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + } } /** @@ -3009,7 +3014,7 @@ static void bg_migration_completion(MigrationState *s) qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); qemu_fflush(s->to_dst_file); } else if (s->state == MIGRATION_STATUS_CANCELLING) { - goto fail; + return; } if (qemu_file_get_error(s->to_dst_file)) { @@ -3953,7 +3958,9 @@ void migration_connect(MigrationState *s, Error *error_in) fail: migrate_set_error(s, local_err); - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + if (s->state != MIGRATION_STATUS_CANCELLING) { + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + } error_report_err(local_err); migration_cleanup(s); } From aabb2a5b5d54acde0992d933b647a306e59362b4 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:25 -0300 Subject: [PATCH 15/22] tests/qtest/migration: Introduce migration_test_add_suffix Introduce a new migration_test_add_suffix to allow programmatic creation of tests based on a suffix. Pass the test name into the test so it can know which variant to run. Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-8-farosas@suse.de> Signed-off-by: Fabiano Rosas --- tests/qtest/migration/migration-util.c | 24 ++++++++++++++++++++++++ tests/qtest/migration/migration-util.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/tests/qtest/migration/migration-util.c b/tests/qtest/migration/migration-util.c index 6261d80e4a..642cf50c8d 100644 --- a/tests/qtest/migration/migration-util.c +++ b/tests/qtest/migration/migration-util.c @@ -236,6 +236,7 @@ char *resolve_machine_version(const char *alias, const char *var1, typedef struct { char *name; void (*func)(void); + void (*func_full)(void *); } MigrationTest; static void migration_test_destroy(gpointer data) @@ -265,6 +266,29 @@ void migration_test_add(const char *path, void (*fn)(void)) migration_test_destroy); } +static void migration_test_wrapper_full(const void *data) +{ + MigrationTest *test = (MigrationTest *)data; + + g_test_message("Running /%s%s", qtest_get_arch(), test->name); + test->func_full(test->name); +} + +void migration_test_add_suffix(const char *path, const char *suffix, + void (*fn)(void *)) +{ + MigrationTest *test = g_new0(MigrationTest, 1); + + g_assert(g_str_has_suffix(path, "/")); + g_assert(!g_str_has_prefix(suffix, "/")); + + test->func_full = fn; + test->name = g_strconcat(path, suffix, NULL); + + qtest_add_data_func_full(test->name, test, migration_test_wrapper_full, + migration_test_destroy); +} + #ifdef O_DIRECT /* * Probe for O_DIRECT support on the filesystem. Since this is used diff --git a/tests/qtest/migration/migration-util.h b/tests/qtest/migration/migration-util.h index f5f2e4650e..44815e9c42 100644 --- a/tests/qtest/migration/migration-util.h +++ b/tests/qtest/migration/migration-util.h @@ -51,6 +51,8 @@ static inline bool probe_o_direct_support(const char *tmpfs) bool ufd_version_check(bool *uffd_feature_thread_id); bool kvm_dirty_ring_supported(void); void migration_test_add(const char *path, void (*fn)(void)); +void migration_test_add_suffix(const char *path, const char *suffix, + void (*fn)(void *)); char *migrate_get_connect_uri(QTestState *who); void migrate_set_ports(QTestState *to, QList *channel_list); From 538e03d28001a325a93154df2313d215721b2241 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:26 -0300 Subject: [PATCH 16/22] tests/qtest/migration: Add a cancel test The qmp_migrate_cancel() command is poorly tested and code inspection reveals that there might be concurrency issues with its usage. Add a test that runs a migration and calls qmp_migrate_cancel() at specific moments. In order to make the test more deterministic, instead of calling qmp_migrate_cancel() at random moments during migration, do it after the migration status change events are seen. The expected result is that qmp_migrate_cancel() on the source ends migration on the source with the "cancelled" state and ends migration on the destination with the "failed" state. The only exception is that a failed migration should continue in the failed state. Cancelling is not allowed during postcopy (no test is added for this because it's a trivial check in the code). Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-9-farosas@suse.de> Signed-off-by: Fabiano Rosas --- tests/qtest/migration/precopy-tests.c | 176 ++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c index 162fa69531..ba273d10b9 100644 --- a/tests/qtest/migration/precopy-tests.c +++ b/tests/qtest/migration/precopy-tests.c @@ -20,6 +20,7 @@ #include "migration/migration-util.h" #include "ppc-util.h" #include "qobject/qlist.h" +#include "qapi-types-migration.h" #include "qemu/module.h" #include "qemu/option.h" #include "qemu/range.h" @@ -536,6 +537,161 @@ static void test_multifd_tcp_cancel(void) migrate_end(from, to2, true); } +static void test_cancel_src_after_failed(QTestState *from, QTestState *to, + const char *uri, const char *phase) +{ + /* + * No migrate_incoming_qmp() at the start to force source into + * failed state during migrate_qmp(). + */ + + wait_for_serial("src_serial"); + migrate_ensure_converge(from); + + migrate_qmp(from, to, uri, NULL, "{}"); + + migration_event_wait(from, phase); + migrate_cancel(from); + + /* cancelling will not move the migration out of 'failed' */ + + wait_for_migration_status(from, "failed", + (const char * []) { "completed", NULL }); + + /* + * Not waiting for the destination because it never started + * migration. + */ +} + +static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to, + const char *uri, const char *phase) +{ + migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); + + wait_for_serial("src_serial"); + migrate_ensure_converge(from); + + migrate_qmp(from, to, uri, NULL, "{}"); + + /* To move to cancelled/cancelling */ + migrate_cancel(from); + migration_event_wait(from, phase); + + /* The migrate_cancel under test */ + migrate_cancel(from); + + wait_for_migration_status(from, "cancelled", + (const char * []) { "completed", NULL }); + + wait_for_migration_status(to, "failed", + (const char * []) { "completed", NULL }); +} + +static void test_cancel_src_after_complete(QTestState *from, QTestState *to, + const char *uri, const char *phase) +{ + migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); + + wait_for_serial("src_serial"); + migrate_ensure_converge(from); + + migrate_qmp(from, to, uri, NULL, "{}"); + + migration_event_wait(from, phase); + migrate_cancel(from); + + /* + * qmp_migrate_cancel() exits early if migration is not running + * anymore, the status will not change to cancelled. + */ + wait_for_migration_complete(from); + wait_for_migration_complete(to); +} + +static void test_cancel_src_after_none(QTestState *from, QTestState *to, + const char *uri, const char *phase) +{ + /* + * Test that cancelling without a migration happening does not + * affect subsequent migrations + */ + migrate_cancel(to); + + wait_for_serial("src_serial"); + migrate_cancel(from); + + migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); + + migrate_ensure_converge(from); + migrate_qmp(from, to, uri, NULL, "{}"); + + wait_for_migration_complete(from); + wait_for_migration_complete(to); +} + +static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to, + const char *uri, const char *phase) +{ + migrate_set_capability(from, "pause-before-switchover", true); + migrate_set_capability(to, "pause-before-switchover", true); + + migrate_set_capability(from, "multifd", true); + migrate_set_capability(to, "multifd", true); + + migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); + + wait_for_serial("src_serial"); + migrate_ensure_converge(from); + + migrate_qmp(from, to, uri, NULL, "{}"); + + migration_event_wait(from, phase); + migrate_cancel(from); + migration_event_wait(from, "cancelling"); + + wait_for_migration_status(from, "cancelled", + (const char * []) { "completed", NULL }); + + wait_for_migration_status(to, "failed", + (const char * []) { "completed", NULL }); +} + +static void test_cancel_src_after_status(void *opaque) +{ + const char *test_path = opaque; + g_autofree char *phase = g_path_get_basename(test_path); + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + QTestState *from, *to; + MigrateStart args = { + .hide_stderr = true, + }; + + if (migrate_start(&from, &to, "defer", &args)) { + return; + } + + if (g_str_equal(phase, "cancelling") || + g_str_equal(phase, "cancelled")) { + test_cancel_src_after_cancelled(from, to, uri, phase); + + } else if (g_str_equal(phase, "completed")) { + test_cancel_src_after_complete(from, to, uri, phase); + + } else if (g_str_equal(phase, "failed")) { + test_cancel_src_after_failed(from, to, uri, phase); + + } else if (g_str_equal(phase, "none")) { + test_cancel_src_after_none(from, to, uri, phase); + + } else { + /* any state that comes before pre-switchover */ + test_cancel_src_pre_switchover(from, to, uri, phase); + } + + migrate_end(from, to, false); +} + static void calc_dirty_rate(QTestState *who, uint64_t calc_time) { qtest_qmp_assert_success(who, @@ -1018,4 +1174,24 @@ void migration_test_add_precopy(MigrationTestEnv *env) test_vcpu_dirty_limit); } } + + /* ensure new status don't go unnoticed */ + assert(MIGRATION_STATUS__MAX == 15); + + for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) { + switch (i) { + case MIGRATION_STATUS_DEVICE: /* happens too fast */ + case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */ + case MIGRATION_STATUS_COLO: /* no support in tests */ + case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */ + case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: + case MIGRATION_STATUS_POSTCOPY_RECOVER: + continue; + default: + migration_test_add_suffix("/migration/cancel/src/after/", + MigrationStatus_str(i), + test_cancel_src_after_status); + } + } } From 24f4c80cfc31ab4ed4cb6553c9289e3cf8ca63ac Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 13 Feb 2025 14:59:27 -0300 Subject: [PATCH 17/22] migration: Update migrate_cancel documentation Update the migrate_cancel command documentation with a few words about postcopy and the expected state of the machine after migration. Acked-by: Markus Armbruster Reviewed-by: Peter Xu Message-ID: <20250213175927.19642-10-farosas@suse.de> Signed-off-by: Fabiano Rosas --- qapi/migration.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/qapi/migration.json b/qapi/migration.json index 43babd1df4..8b9c53595c 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1524,7 +1524,9 @@ ## # @migrate_cancel: # -# Cancel the current executing migration process. +# Cancel the currently executing migration process. Allows a new +# migration to be started right after. When postcopy-ram is in use, +# cancelling is not allowed after the postcopy phase has started. # # .. note:: This command succeeds even if there is no migration # process running. From b451705e3b90e55c6070338fa97aaae274721a5c Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 5 Feb 2025 12:54:01 -0800 Subject: [PATCH 18/22] migration: use parameters.mode in cpr_state_save qmp_migrate guarantees that cpr_channel is not null for MIG_MODE_CPR_TRANSFER when cpr_state_save is called: qmp_migrate() if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) { return; } cpr_state_save(cpr_channel) but cpr_state_save checks for mode differently before using channel, and Coverity cannot infer that they are equivalent in outgoing QEMU, and warns that channel may be NULL: cpr_state_save(channel) MigMode mode = migrate_mode(); if (mode == MIG_MODE_CPR_TRANSFER) { f = cpr_transfer_output(channel, errp); To make Coverity happy, assert that channel != NULL in cpr_state_save. Resolves: Coverity CID 1590980 Reported-by: Peter Maydell Signed-off-by: Steve Sistare Message-ID: <1738788841-211843-1-git-send-email-steven.sistare@oracle.com> [assert instead of using parameters.mode in cpr_state_save] Signed-off-by: Fabiano Rosas --- migration/cpr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/cpr.c b/migration/cpr.c index 584b0b98f7..180faab247 100644 --- a/migration/cpr.c +++ b/migration/cpr.c @@ -137,6 +137,7 @@ int cpr_state_save(MigrationChannel *channel, Error **errp) trace_cpr_state_save(MigMode_str(mode)); if (mode == MIG_MODE_CPR_TRANSFER) { + g_assert(channel); f = cpr_transfer_output(channel, errp); } else { return 0; From 32a1bb21c6f4d569427099e4e495f1d07d017fdb Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Fri, 14 Feb 2025 18:55:23 +0800 Subject: [PATCH 19/22] guestperf: Support deferred migration for multifd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The way to enable multifd migration has been changed by commit, 82137e6c8c (migration: enforce multifd and postcopy preempt to be set before incoming), and guestperf has not made the necessary changes. If multifd migration had been enabled in the previous manner, the following error would have occurred: Multifd must be set before incoming starts Supporting deferred migration will fix it. Signed-off-by: Hyman Huang Reviewed-by: Daniel P. Berrangé Message-ID: <8874e170f890ce0bc6f25cb0d9b9ae307ce2e070.1739530098.git.yong.huang@smartx.com> Signed-off-by: Fabiano Rosas --- tests/migration-stress/guestperf/engine.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/migration-stress/guestperf/engine.py b/tests/migration-stress/guestperf/engine.py index 608d7270f6..4b15322e8d 100644 --- a/tests/migration-stress/guestperf/engine.py +++ b/tests/migration-stress/guestperf/engine.py @@ -106,7 +106,8 @@ class Engine(object): info.get("dirty-limit-ring-full-time", 0), ) - def _migrate(self, hardware, scenario, src, dst, connect_uri): + def _migrate(self, hardware, scenario, src, + dst, connect_uri, defer_migrate): src_qemu_time = [] src_vcpu_time = [] src_pid = src.get_pid() @@ -220,6 +221,8 @@ class Engine(object): resp = src.cmd("migrate-set-parameters", vcpu_dirty_limit=scenario._vcpu_dirty_limit) + if defer_migrate: + resp = dst.cmd("migrate-incoming", uri=connect_uri) resp = src.cmd("migrate", uri=connect_uri) post_copy = False @@ -373,11 +376,14 @@ class Engine(object): def _get_src_args(self, hardware): return self._get_common_args(hardware) - def _get_dst_args(self, hardware, uri): + def _get_dst_args(self, hardware, uri, defer_migrate): tunnelled = False if self._dst_host != "localhost": tunnelled = True argv = self._get_common_args(hardware, tunnelled) + + if defer_migrate: + return argv + ["-incoming", "defer"] return argv + ["-incoming", uri] @staticmethod @@ -424,6 +430,7 @@ class Engine(object): def run(self, hardware, scenario, result_dir=os.getcwd()): abs_result_dir = os.path.join(result_dir, scenario._name) + defer_migrate = False if self._transport == "tcp": uri = "tcp:%s:9000" % self._dst_host @@ -439,6 +446,9 @@ class Engine(object): except: pass + if scenario._multifd: + defer_migrate = True + if self._dst_host != "localhost": dstmonaddr = ("localhost", 9001) else: @@ -452,7 +462,7 @@ class Engine(object): monitor_address=srcmonaddr) dst = QEMUMachine(self._binary, - args=self._get_dst_args(hardware, uri), + args=self._get_dst_args(hardware, uri, defer_migrate), wrapper=self._get_dst_wrapper(hardware), name="qemu-dst-%d" % os.getpid(), monitor_address=dstmonaddr) @@ -461,7 +471,8 @@ class Engine(object): src.launch() dst.launch() - ret = self._migrate(hardware, scenario, src, dst, uri) + ret = self._migrate(hardware, scenario, src, + dst, uri, defer_migrate) progress_history = ret[0] qemu_timings = ret[1] vcpu_timings = ret[2] From 42f5975cd84eed96661468ae0b895eed0e16074b Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Fri, 14 Feb 2025 18:55:24 +0800 Subject: [PATCH 20/22] guestperf: Nitpick the inconsistent parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Hyman Huang Reviewed-by: Fabiano Rosas Reviewed-by: Daniel P. Berrangé Message-ID: Signed-off-by: Fabiano Rosas --- tests/migration-stress/guestperf/comparison.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/migration-stress/guestperf/comparison.py b/tests/migration-stress/guestperf/comparison.py index 42cc0372d1..40e9d2eb1d 100644 --- a/tests/migration-stress/guestperf/comparison.py +++ b/tests/migration-stress/guestperf/comparison.py @@ -127,7 +127,7 @@ COMPARISONS = [ # varying numbers of channels Comparison("compr-multifd", scenarios = [ Scenario("compr-multifd-channels-4", - multifd=True, multifd_channels=2), + multifd=True, multifd_channels=4), Scenario("compr-multifd-channels-8", multifd=True, multifd_channels=8), Scenario("compr-multifd-channels-32", From 45f34156e4d9c3f4215402b34d7da32f00073066 Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Fri, 14 Feb 2025 18:55:25 +0800 Subject: [PATCH 21/22] guestperf: Introduce multifd compression option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guestperf tool does not cover the multifd compression option currently, it is worth supporting so that developers can analysis the migration performance with different compression algorithms. Multifd support 4 compression algorithms currently: zlib, zstd, qpl, uadk To request that multifd with the specified compression algorithm such as zlib: $ ./tests/migration-stress/guestperf.py \ --multifd --multifd-channels 4 --multifd-compression zlib \ --output output.json To run the entire standardized set of multifd compression comparisons, with unix migration: $ ./tests/migration-stress/guestperf-batch.py \ --dst-host localhost --transport unix \ --filter compr-multifd-compression* --output outputdir Signed-off-by: Hyman Huang Reviewed-by: Daniel P. Berrangé Message-ID: Signed-off-by: Fabiano Rosas --- tests/migration-stress/guestperf/comparison.py | 13 +++++++++++++ tests/migration-stress/guestperf/engine.py | 14 ++++++++++++++ tests/migration-stress/guestperf/scenario.py | 7 +++++-- tests/migration-stress/guestperf/shell.py | 3 +++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/migration-stress/guestperf/comparison.py b/tests/migration-stress/guestperf/comparison.py index 40e9d2eb1d..dee3ac25e4 100644 --- a/tests/migration-stress/guestperf/comparison.py +++ b/tests/migration-stress/guestperf/comparison.py @@ -158,4 +158,17 @@ COMPARISONS = [ Scenario("compr-dirty-limit-50MB", dirty_limit=True, vcpu_dirty_limit=50), ]), + + # Looking at effect of multifd with + # different compression algorithms + Comparison("compr-multifd-compression", scenarios = [ + Scenario("compr-multifd-compression-zlib", + multifd=True, multifd_channels=2, multifd_compression="zlib"), + Scenario("compr-multifd-compression-zstd", + multifd=True, multifd_channels=2, multifd_compression="zstd"), + Scenario("compr-multifd-compression-qpl", + multifd=True, multifd_channels=2, multifd_compression="qpl"), + Scenario("compr-multifd-compression-uadk", + multifd=True, multifd_channels=2, multifd_compression="uadk"), + ]), ] diff --git a/tests/migration-stress/guestperf/engine.py b/tests/migration-stress/guestperf/engine.py index 4b15322e8d..e11f6a8496 100644 --- a/tests/migration-stress/guestperf/engine.py +++ b/tests/migration-stress/guestperf/engine.py @@ -31,6 +31,8 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'python')) from qemu.machine import QEMUMachine +# multifd supported compression algorithms +MULTIFD_CMP_ALGS = ("zlib", "zstd", "qpl", "uadk") class Engine(object): @@ -191,6 +193,12 @@ class Engine(object): scenario._compression_xbzrle_cache)) if scenario._multifd: + if (scenario._multifd_compression and + (scenario._multifd_compression not in MULTIFD_CMP_ALGS)): + raise Exception("unsupported multifd compression " + "algorithm: %s" % + scenario._multifd_compression) + resp = src.cmd("migrate-set-capabilities", capabilities = [ { "capability": "multifd", @@ -206,6 +214,12 @@ class Engine(object): resp = dst.cmd("migrate-set-parameters", multifd_channels=scenario._multifd_channels) + if scenario._multifd_compression: + resp = src.cmd("migrate-set-parameters", + multifd_compression=scenario._multifd_compression) + resp = dst.cmd("migrate-set-parameters", + multifd_compression=scenario._multifd_compression) + if scenario._dirty_limit: if not hardware._dirty_ring_size: raise Exception("dirty ring size must be configured when " diff --git a/tests/migration-stress/guestperf/scenario.py b/tests/migration-stress/guestperf/scenario.py index 154c4f5d5f..4be7fafebf 100644 --- a/tests/migration-stress/guestperf/scenario.py +++ b/tests/migration-stress/guestperf/scenario.py @@ -30,7 +30,7 @@ class Scenario(object): auto_converge=False, auto_converge_step=10, compression_mt=False, compression_mt_threads=1, compression_xbzrle=False, compression_xbzrle_cache=10, - multifd=False, multifd_channels=2, + multifd=False, multifd_channels=2, multifd_compression="", dirty_limit=False, x_vcpu_dirty_limit_period=500, vcpu_dirty_limit=1): @@ -61,6 +61,7 @@ class Scenario(object): self._multifd = multifd self._multifd_channels = multifd_channels + self._multifd_compression = multifd_compression self._dirty_limit = dirty_limit self._x_vcpu_dirty_limit_period = x_vcpu_dirty_limit_period @@ -85,6 +86,7 @@ class Scenario(object): "compression_xbzrle_cache": self._compression_xbzrle_cache, "multifd": self._multifd, "multifd_channels": self._multifd_channels, + "multifd_compression": self._multifd_compression, "dirty_limit": self._dirty_limit, "x_vcpu_dirty_limit_period": self._x_vcpu_dirty_limit_period, "vcpu_dirty_limit": self._vcpu_dirty_limit, @@ -109,4 +111,5 @@ class Scenario(object): data["compression_xbzrle"], data["compression_xbzrle_cache"], data["multifd"], - data["multifd_channels"]) + data["multifd_channels"], + data["multifd_compression"]) diff --git a/tests/migration-stress/guestperf/shell.py b/tests/migration-stress/guestperf/shell.py index 046afeb84e..63bbe3226c 100644 --- a/tests/migration-stress/guestperf/shell.py +++ b/tests/migration-stress/guestperf/shell.py @@ -131,6 +131,8 @@ class Shell(BaseShell): action="store_true") parser.add_argument("--multifd-channels", dest="multifd_channels", default=2, type=int) + parser.add_argument("--multifd-compression", dest="multifd_compression", + default="") parser.add_argument("--dirty-limit", dest="dirty_limit", default=False, action="store_true") @@ -167,6 +169,7 @@ class Shell(BaseShell): multifd=args.multifd, multifd_channels=args.multifd_channels, + multifd_compression=args.multifd_compression, dirty_limit=args.dirty_limit, x_vcpu_dirty_limit_period=\ From 5984870e02aa6cf471bc9225ae91640b544b31c8 Mon Sep 17 00:00:00 2001 From: Hyman Huang Date: Fri, 14 Feb 2025 18:55:26 +0800 Subject: [PATCH 22/22] guestperf: Add test result data into report The migration result data is not included in the guestperf report information; include the result as a report entry so the developer can check whether the migration was successful after running guestperf. Signed-off-by: Hyman Huang Message-ID: <6303400c2983ffe5647f07caa6406f00ceae4581.1739530098.git.yong.huang@smartx.com> Signed-off-by: Fabiano Rosas --- tests/migration-stress/guestperf/engine.py | 10 ++++++++-- tests/migration-stress/guestperf/report.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tests/migration-stress/guestperf/engine.py b/tests/migration-stress/guestperf/engine.py index e11f6a8496..d8462db765 100644 --- a/tests/migration-stress/guestperf/engine.py +++ b/tests/migration-stress/guestperf/engine.py @@ -24,7 +24,7 @@ import sys import time from guestperf.progress import Progress, ProgressStats -from guestperf.report import Report +from guestperf.report import Report, ReportResult from guestperf.timings import TimingRecord, Timings sys.path.append(os.path.join(os.path.dirname(__file__), @@ -276,7 +276,11 @@ class Engine(object): src_vcpu_time.extend(self._vcpu_timing(src_pid, src_threads)) sleep_secs -= 1 - return [progress_history, src_qemu_time, src_vcpu_time] + result = ReportResult() + if progress._status == "completed" and not paused: + result = ReportResult(True) + + return [progress_history, src_qemu_time, src_vcpu_time, result] if self._verbose and (loop % 20) == 0: print("Iter %d: remain %5dMB of %5dMB (total %5dMB @ %5dMb/sec)" % ( @@ -490,6 +494,7 @@ class Engine(object): progress_history = ret[0] qemu_timings = ret[1] vcpu_timings = ret[2] + result = ret[3] if uri[0:5] == "unix:" and os.path.exists(uri[5:]): os.remove(uri[5:]) @@ -509,6 +514,7 @@ class Engine(object): Timings(self._get_timings(src) + self._get_timings(dst)), Timings(qemu_timings), Timings(vcpu_timings), + result, self._binary, self._dst_host, self._kernel, self._initrd, self._transport, self._sleep) except Exception as e: diff --git a/tests/migration-stress/guestperf/report.py b/tests/migration-stress/guestperf/report.py index 1efd40c868..e135e01be6 100644 --- a/tests/migration-stress/guestperf/report.py +++ b/tests/migration-stress/guestperf/report.py @@ -24,6 +24,22 @@ from guestperf.scenario import Scenario from guestperf.progress import Progress from guestperf.timings import Timings +class ReportResult(object): + + def __init__(self, success=False): + self._success = success + + def serialize(self): + return { + "success": self._success, + } + + @classmethod + def deserialize(cls, data): + return cls( + data["success"]) + + class Report(object): def __init__(self, @@ -33,6 +49,7 @@ class Report(object): guest_timings, qemu_timings, vcpu_timings, + result, binary, dst_host, kernel, @@ -46,6 +63,7 @@ class Report(object): self._guest_timings = guest_timings self._qemu_timings = qemu_timings self._vcpu_timings = vcpu_timings + self._result = result self._binary = binary self._dst_host = dst_host self._kernel = kernel @@ -61,6 +79,7 @@ class Report(object): "guest_timings": self._guest_timings.serialize(), "qemu_timings": self._qemu_timings.serialize(), "vcpu_timings": self._vcpu_timings.serialize(), + "result": self._result.serialize(), "binary": self._binary, "dst_host": self._dst_host, "kernel": self._kernel, @@ -78,6 +97,7 @@ class Report(object): Timings.deserialize(data["guest_timings"]), Timings.deserialize(data["qemu_timings"]), Timings.deserialize(data["vcpu_timings"]), + ReportResult.deserialize(data["result"]), data["binary"], data["dst_host"], data["kernel"],