Block patches

- Fix for file-posix's zoning code crashing on I/O errors
 - Throttling refactoring
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmTxnMISHGhyZWl0ekBy
 ZWRoYXQuY29tAAoJEKH6QNCYAZzfYkUP+gMG9hhzvgjj/tw9rEBQjciihzcQmqQJ
 2Mm37RH2jj5bnnTdaTbMkcRRwVhncYSCwK9q5EYVbZmU9C/v4YJmsSEQlcl7wVou
 hbPUv6NHaBrJZX9nxNSa2RHui6pZMLKa/D0rJVB7NjYBrrRtiPo7kiLVQYjYXa2g
 kcCCfY4t3Z2RxOP31mMXRjYlhJE9bIuZdTEndrKme8KS2JGPZEJ9xjkoW1tj96EX
 oc/Cg2vk7AEtsFYA0bcD8fTFkBDJEwyYl3usu7Tk24pvH16jk7wFSqRVSsDMfnER
 tG8X3mHLIY0hbSkpzdHJdXINvZ6FWpQb0CGzIKr+pMiuWVdWr1HglBr0m4pVF+Y4
 A6AI6VX2JJgtacypoDyCZC9mzs1jIdeiwq9v5dyuikJ6ivTwEEoeoSLnLTN3AjXn
 0mtQYzgCg5Gd6+rTo7XjSO9SSlbaVrDl/B2eXle6tmIFT5k+86fh0hc+zTmP8Rkw
 Knbc+5Le95wlMrOUNx2GhXrTGwX510hLxKboho/LITxtAzqvXnEJKrYbnkm3WPnw
 wfHnR5VQH1NKEpiH/p33og6OV/vu9e7vgp0ZNZV136SnzC90C1zMUwg2simJW701
 34EtN0XBX8XBKrxfe7KscV9kRE8wrWWJVbhp+WOcQEomGI8uraxzWqDIk/v7NZXv
 m4XBscaB+Iri
 =oKgk
 -----END PGP SIGNATURE-----

Merge tag 'pull-block-2023-09-01' of https://gitlab.com/hreitz/qemu into staging

Block patches

- Fix for file-posix's zoning code crashing on I/O errors
- Throttling refactoring

# -----BEGIN PGP SIGNATURE-----
#
# iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmTxnMISHGhyZWl0ekBy
# ZWRoYXQuY29tAAoJEKH6QNCYAZzfYkUP+gMG9hhzvgjj/tw9rEBQjciihzcQmqQJ
# 2Mm37RH2jj5bnnTdaTbMkcRRwVhncYSCwK9q5EYVbZmU9C/v4YJmsSEQlcl7wVou
# hbPUv6NHaBrJZX9nxNSa2RHui6pZMLKa/D0rJVB7NjYBrrRtiPo7kiLVQYjYXa2g
# kcCCfY4t3Z2RxOP31mMXRjYlhJE9bIuZdTEndrKme8KS2JGPZEJ9xjkoW1tj96EX
# oc/Cg2vk7AEtsFYA0bcD8fTFkBDJEwyYl3usu7Tk24pvH16jk7wFSqRVSsDMfnER
# tG8X3mHLIY0hbSkpzdHJdXINvZ6FWpQb0CGzIKr+pMiuWVdWr1HglBr0m4pVF+Y4
# A6AI6VX2JJgtacypoDyCZC9mzs1jIdeiwq9v5dyuikJ6ivTwEEoeoSLnLTN3AjXn
# 0mtQYzgCg5Gd6+rTo7XjSO9SSlbaVrDl/B2eXle6tmIFT5k+86fh0hc+zTmP8Rkw
# Knbc+5Le95wlMrOUNx2GhXrTGwX510hLxKboho/LITxtAzqvXnEJKrYbnkm3WPnw
# wfHnR5VQH1NKEpiH/p33og6OV/vu9e7vgp0ZNZV136SnzC90C1zMUwg2simJW701
# 34EtN0XBX8XBKrxfe7KscV9kRE8wrWWJVbhp+WOcQEomGI8uraxzWqDIk/v7NZXv
# m4XBscaB+Iri
# =oKgk
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 01 Sep 2023 04:11:46 EDT
# gpg:                using RSA key CB62D7A0EE3829E45F004D34A1FA40D098019CDF
# gpg:                issuer "hreitz@redhat.com"
# gpg: Good signature from "Hanna Reitz <hreitz@redhat.com>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: CB62 D7A0 EE38 29E4 5F00  4D34 A1FA 40D0 9801 9CDF

* tag 'pull-block-2023-09-01' of https://gitlab.com/hreitz/qemu:
  tests/file-io-error: New test
  file-posix: Simplify raw_co_prw's 'out' zone code
  file-posix: Fix zone update in I/O error path
  file-posix: Check bs->bl.zoned for zone info
  file-posix: Clear bs->bl.zoned on error
  block/throttle-groups: Use ThrottleDirection instread of bool is_write
  fsdev: Use ThrottleDirection instread of bool is_write
  throttle: use THROTTLE_MAX/ARRAY_SIZE for hard code
  throttle: use enum ThrottleDirection instead of bool is_write
  cryptodev: use NULL throttle timer cb for read direction
  test-throttle: test read only and write only
  throttle: support read-only and write-only
  test-throttle: use enum ThrottleDirection
  throttle: introduce enum ThrottleDirection

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2023-09-21 09:05:09 -04:00
commit 416af8564f
14 changed files with 418 additions and 171 deletions

View File

@ -252,10 +252,11 @@ static void cryptodev_backend_throttle_timer_cb(void *opaque)
continue; continue;
} }
throttle_account(&backend->ts, true, ret); throttle_account(&backend->ts, THROTTLE_WRITE, ret);
cryptodev_backend_operation(backend, op_info); cryptodev_backend_operation(backend, op_info);
if (throttle_enabled(&backend->tc) && if (throttle_enabled(&backend->tc) &&
throttle_schedule_timer(&backend->ts, &backend->tt, true)) { throttle_schedule_timer(&backend->ts, &backend->tt,
THROTTLE_WRITE)) {
break; break;
} }
} }
@ -271,7 +272,7 @@ int cryptodev_backend_crypto_operation(
goto do_account; goto do_account;
} }
if (throttle_schedule_timer(&backend->ts, &backend->tt, true) || if (throttle_schedule_timer(&backend->ts, &backend->tt, THROTTLE_WRITE) ||
!QTAILQ_EMPTY(&backend->opinfos)) { !QTAILQ_EMPTY(&backend->opinfos)) {
QTAILQ_INSERT_TAIL(&backend->opinfos, op_info, next); QTAILQ_INSERT_TAIL(&backend->opinfos, op_info, next);
return 0; return 0;
@ -283,7 +284,7 @@ do_account:
return ret; return ret;
} }
throttle_account(&backend->ts, true, ret); throttle_account(&backend->ts, THROTTLE_WRITE, ret);
return cryptodev_backend_operation(backend, op_info); return cryptodev_backend_operation(backend, op_info);
} }
@ -341,8 +342,7 @@ static void cryptodev_backend_set_throttle(CryptoDevBackend *backend, int field,
if (!enabled) { if (!enabled) {
throttle_init(&backend->ts); throttle_init(&backend->ts);
throttle_timers_init(&backend->tt, qemu_get_aio_context(), throttle_timers_init(&backend->tt, qemu_get_aio_context(),
QEMU_CLOCK_REALTIME, QEMU_CLOCK_REALTIME, NULL,
cryptodev_backend_throttle_timer_cb, /* FIXME */
cryptodev_backend_throttle_timer_cb, backend); cryptodev_backend_throttle_timer_cb, backend);
} }

View File

@ -1341,7 +1341,7 @@ blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
/* throttling disk I/O */ /* throttling disk I/O */
if (blk->public.throttle_group_member.throttle_state) { if (blk->public.throttle_group_member.throttle_state) {
throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
bytes, false); bytes, THROTTLE_READ);
} }
ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset, ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset,
@ -1415,7 +1415,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
/* throttling disk I/O */ /* throttling disk I/O */
if (blk->public.throttle_group_member.throttle_state) { if (blk->public.throttle_group_member.throttle_state) {
throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member, throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
bytes, true); bytes, THROTTLE_WRITE);
} }
if (!blk->enable_write_cache) { if (!blk->enable_write_cache) {

View File

@ -1412,11 +1412,9 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
BlockZoneModel zoned; BlockZoneModel zoned;
int ret; int ret;
bs->bl.zoned = BLK_Z_NONE;
ret = get_sysfs_zoned_model(st, &zoned); ret = get_sysfs_zoned_model(st, &zoned);
if (ret < 0 || zoned == BLK_Z_NONE) { if (ret < 0 || zoned == BLK_Z_NONE) {
return; goto no_zoned;
} }
bs->bl.zoned = zoned; bs->bl.zoned = zoned;
@ -1437,10 +1435,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
if (ret < 0) { if (ret < 0) {
error_setg_errno(errp, -ret, "Unable to read chunk_sectors " error_setg_errno(errp, -ret, "Unable to read chunk_sectors "
"sysfs attribute"); "sysfs attribute");
return; goto no_zoned;
} else if (!ret) { } else if (!ret) {
error_setg(errp, "Read 0 from chunk_sectors sysfs attribute"); error_setg(errp, "Read 0 from chunk_sectors sysfs attribute");
return; goto no_zoned;
} }
bs->bl.zone_size = ret << BDRV_SECTOR_BITS; bs->bl.zone_size = ret << BDRV_SECTOR_BITS;
@ -1448,10 +1446,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
if (ret < 0) { if (ret < 0) {
error_setg_errno(errp, -ret, "Unable to read nr_zones " error_setg_errno(errp, -ret, "Unable to read nr_zones "
"sysfs attribute"); "sysfs attribute");
return; goto no_zoned;
} else if (!ret) { } else if (!ret) {
error_setg(errp, "Read 0 from nr_zones sysfs attribute"); error_setg(errp, "Read 0 from nr_zones sysfs attribute");
return; goto no_zoned;
} }
bs->bl.nr_zones = ret; bs->bl.nr_zones = ret;
@ -1472,10 +1470,15 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0); ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0);
if (ret < 0) { if (ret < 0) {
error_setg_errno(errp, -ret, "report wps failed"); error_setg_errno(errp, -ret, "report wps failed");
bs->wps = NULL; goto no_zoned;
return;
} }
qemu_co_mutex_init(&bs->wps->colock); qemu_co_mutex_init(&bs->wps->colock);
return;
no_zoned:
bs->bl.zoned = BLK_Z_NONE;
g_free(bs->wps);
bs->wps = NULL;
} }
#else /* !defined(CONFIG_BLKZONED) */ #else /* !defined(CONFIG_BLKZONED) */
static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
@ -2452,9 +2455,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
if (fd_open(bs) < 0) if (fd_open(bs) < 0)
return -EIO; return -EIO;
#if defined(CONFIG_BLKZONED) #if defined(CONFIG_BLKZONED)
if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) { if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
bs->bl.zoned != BLK_Z_NONE) {
qemu_co_mutex_lock(&bs->wps->colock); qemu_co_mutex_lock(&bs->wps->colock);
if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) { if (type & QEMU_AIO_ZONE_APPEND) {
int index = offset / bs->bl.zone_size; int index = offset / bs->bl.zone_size;
offset = bs->wps->wp[index]; offset = bs->wps->wp[index];
} }
@ -2502,11 +2506,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
out: out:
#if defined(CONFIG_BLKZONED) #if defined(CONFIG_BLKZONED)
{ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
bs->bl.zoned != BLK_Z_NONE) {
BlockZoneWps *wps = bs->wps; BlockZoneWps *wps = bs->wps;
if (ret == 0) { if (ret == 0) {
if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
&& wps && bs->bl.zone_size) {
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size]; uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
if (!BDRV_ZT_IS_CONV(*wp)) { if (!BDRV_ZT_IS_CONV(*wp)) {
if (type & QEMU_AIO_ZONE_APPEND) { if (type & QEMU_AIO_ZONE_APPEND) {
@ -2519,17 +2522,12 @@ out:
*wp = offset + bytes; *wp = offset + bytes;
} }
} }
}
} else { } else {
if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
update_zones_wp(bs, s->fd, 0, 1); update_zones_wp(bs, s->fd, 0, 1);
} }
}
if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) {
qemu_co_mutex_unlock(&wps->colock); qemu_co_mutex_unlock(&wps->colock);
} }
}
#endif #endif
return ret; return ret;
} }

View File

@ -37,7 +37,7 @@
static void throttle_group_obj_init(Object *obj); static void throttle_group_obj_init(Object *obj);
static void throttle_group_obj_complete(UserCreatable *obj, Error **errp); static void throttle_group_obj_complete(UserCreatable *obj, Error **errp);
static void timer_cb(ThrottleGroupMember *tgm, bool is_write); static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction);
/* The ThrottleGroup structure (with its ThrottleState) is shared /* The ThrottleGroup structure (with its ThrottleState) is shared
* among different ThrottleGroupMembers and it's independent from * among different ThrottleGroupMembers and it's independent from
@ -73,8 +73,8 @@ struct ThrottleGroup {
QemuMutex lock; /* This lock protects the following four fields */ QemuMutex lock; /* This lock protects the following four fields */
ThrottleState ts; ThrottleState ts;
QLIST_HEAD(, ThrottleGroupMember) head; QLIST_HEAD(, ThrottleGroupMember) head;
ThrottleGroupMember *tokens[2]; ThrottleGroupMember *tokens[THROTTLE_MAX];
bool any_timer_armed[2]; bool any_timer_armed[THROTTLE_MAX];
QEMUClockType clock_type; QEMUClockType clock_type;
/* This field is protected by the global QEMU mutex */ /* This field is protected by the global QEMU mutex */
@ -197,13 +197,13 @@ static ThrottleGroupMember *throttle_group_next_tgm(ThrottleGroupMember *tgm)
* This assumes that tg->lock is held. * This assumes that tg->lock is held.
* *
* @tgm: the ThrottleGroupMember * @tgm: the ThrottleGroupMember
* @is_write: the type of operation (read/write) * @direction: the ThrottleDirection
* @ret: whether the ThrottleGroupMember has pending requests. * @ret: whether the ThrottleGroupMember has pending requests.
*/ */
static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm, static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm,
bool is_write) ThrottleDirection direction)
{ {
return tgm->pending_reqs[is_write]; return tgm->pending_reqs[direction];
} }
/* Return the next ThrottleGroupMember in the round-robin sequence with pending /* Return the next ThrottleGroupMember in the round-robin sequence with pending
@ -212,12 +212,12 @@ static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm,
* This assumes that tg->lock is held. * This assumes that tg->lock is held.
* *
* @tgm: the current ThrottleGroupMember * @tgm: the current ThrottleGroupMember
* @is_write: the type of operation (read/write) * @direction: the ThrottleDirection
* @ret: the next ThrottleGroupMember with pending requests, or tgm if * @ret: the next ThrottleGroupMember with pending requests, or tgm if
* there is none. * there is none.
*/ */
static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm, static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
bool is_write) ThrottleDirection direction)
{ {
ThrottleState *ts = tgm->throttle_state; ThrottleState *ts = tgm->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@ -227,16 +227,16 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
* it's being drained. Skip the round-robin search and return tgm * it's being drained. Skip the round-robin search and return tgm
* immediately if it has pending requests. Otherwise we could be * immediately if it has pending requests. Otherwise we could be
* forcing it to wait for other member's throttled requests. */ * forcing it to wait for other member's throttled requests. */
if (tgm_has_pending_reqs(tgm, is_write) && if (tgm_has_pending_reqs(tgm, direction) &&
qatomic_read(&tgm->io_limits_disabled)) { qatomic_read(&tgm->io_limits_disabled)) {
return tgm; return tgm;
} }
start = token = tg->tokens[is_write]; start = token = tg->tokens[direction];
/* get next bs round in round robin style */ /* get next bs round in round robin style */
token = throttle_group_next_tgm(token); token = throttle_group_next_tgm(token);
while (token != start && !tgm_has_pending_reqs(token, is_write)) { while (token != start && !tgm_has_pending_reqs(token, direction)) {
token = throttle_group_next_tgm(token); token = throttle_group_next_tgm(token);
} }
@ -244,12 +244,12 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
* then decide the token is the current tgm because chances are * then decide the token is the current tgm because chances are
* the current tgm got the current request queued. * the current tgm got the current request queued.
*/ */
if (token == start && !tgm_has_pending_reqs(token, is_write)) { if (token == start && !tgm_has_pending_reqs(token, direction)) {
token = tgm; token = tgm;
} }
/* Either we return the original TGM, or one with pending requests */ /* Either we return the original TGM, or one with pending requests */
assert(token == tgm || tgm_has_pending_reqs(token, is_write)); assert(token == tgm || tgm_has_pending_reqs(token, direction));
return token; return token;
} }
@ -261,11 +261,11 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
* This assumes that tg->lock is held. * This assumes that tg->lock is held.
* *
* @tgm: the current ThrottleGroupMember * @tgm: the current ThrottleGroupMember
* @is_write: the type of operation (read/write) * @direction: the ThrottleDirection
* @ret: whether the I/O request needs to be throttled or not * @ret: whether the I/O request needs to be throttled or not
*/ */
static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
bool is_write) ThrottleDirection direction)
{ {
ThrottleState *ts = tgm->throttle_state; ThrottleState *ts = tgm->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@ -277,16 +277,16 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
} }
/* Check if any of the timers in this group is already armed */ /* Check if any of the timers in this group is already armed */
if (tg->any_timer_armed[is_write]) { if (tg->any_timer_armed[direction]) {
return true; return true;
} }
must_wait = throttle_schedule_timer(ts, tt, is_write); must_wait = throttle_schedule_timer(ts, tt, direction);
/* If a timer just got armed, set tgm as the current token */ /* If a timer just got armed, set tgm as the current token */
if (must_wait) { if (must_wait) {
tg->tokens[is_write] = tgm; tg->tokens[direction] = tgm;
tg->any_timer_armed[is_write] = true; tg->any_timer_armed[direction] = true;
} }
return must_wait; return must_wait;
@ -296,15 +296,15 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
* any request was actually pending. * any request was actually pending.
* *
* @tgm: the current ThrottleGroupMember * @tgm: the current ThrottleGroupMember
* @is_write: the type of operation (read/write) * @direction: the ThrottleDirection
*/ */
static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm, static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm,
bool is_write) ThrottleDirection direction)
{ {
bool ret; bool ret;
qemu_co_mutex_lock(&tgm->throttled_reqs_lock); qemu_co_mutex_lock(&tgm->throttled_reqs_lock);
ret = qemu_co_queue_next(&tgm->throttled_reqs[is_write]); ret = qemu_co_queue_next(&tgm->throttled_reqs[direction]);
qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); qemu_co_mutex_unlock(&tgm->throttled_reqs_lock);
return ret; return ret;
@ -315,9 +315,10 @@ static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tg
* This assumes that tg->lock is held. * This assumes that tg->lock is held.
* *
* @tgm: the current ThrottleGroupMember * @tgm: the current ThrottleGroupMember
* @is_write: the type of operation (read/write) * @direction: the ThrottleDirection
*/ */
static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write) static void schedule_next_request(ThrottleGroupMember *tgm,
ThrottleDirection direction)
{ {
ThrottleState *ts = tgm->throttle_state; ThrottleState *ts = tgm->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@ -325,27 +326,27 @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
ThrottleGroupMember *token; ThrottleGroupMember *token;
/* Check if there's any pending request to schedule next */ /* Check if there's any pending request to schedule next */
token = next_throttle_token(tgm, is_write); token = next_throttle_token(tgm, direction);
if (!tgm_has_pending_reqs(token, is_write)) { if (!tgm_has_pending_reqs(token, direction)) {
return; return;
} }
/* Set a timer for the request if it needs to be throttled */ /* Set a timer for the request if it needs to be throttled */
must_wait = throttle_group_schedule_timer(token, is_write); must_wait = throttle_group_schedule_timer(token, direction);
/* If it doesn't have to wait, queue it for immediate execution */ /* If it doesn't have to wait, queue it for immediate execution */
if (!must_wait) { if (!must_wait) {
/* Give preference to requests from the current tgm */ /* Give preference to requests from the current tgm */
if (qemu_in_coroutine() && if (qemu_in_coroutine() &&
throttle_group_co_restart_queue(tgm, is_write)) { throttle_group_co_restart_queue(tgm, direction)) {
token = tgm; token = tgm;
} else { } else {
ThrottleTimers *tt = &token->throttle_timers; ThrottleTimers *tt = &token->throttle_timers;
int64_t now = qemu_clock_get_ns(tg->clock_type); int64_t now = qemu_clock_get_ns(tg->clock_type);
timer_mod(tt->timers[is_write], now); timer_mod(tt->timers[direction], now);
tg->any_timer_armed[is_write] = true; tg->any_timer_armed[direction] = true;
} }
tg->tokens[is_write] = token; tg->tokens[direction] = token;
} }
} }
@ -355,48 +356,49 @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
* *
* @tgm: the current ThrottleGroupMember * @tgm: the current ThrottleGroupMember
* @bytes: the number of bytes for this I/O * @bytes: the number of bytes for this I/O
* @is_write: the type of operation (read/write) * @direction: the ThrottleDirection
*/ */
void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm, void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm,
int64_t bytes, int64_t bytes,
bool is_write) ThrottleDirection direction)
{ {
bool must_wait; bool must_wait;
ThrottleGroupMember *token; ThrottleGroupMember *token;
ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
assert(bytes >= 0); assert(bytes >= 0);
assert(direction < THROTTLE_MAX);
qemu_mutex_lock(&tg->lock); qemu_mutex_lock(&tg->lock);
/* First we check if this I/O has to be throttled. */ /* First we check if this I/O has to be throttled. */
token = next_throttle_token(tgm, is_write); token = next_throttle_token(tgm, direction);
must_wait = throttle_group_schedule_timer(token, is_write); must_wait = throttle_group_schedule_timer(token, direction);
/* Wait if there's a timer set or queued requests of this type */ /* Wait if there's a timer set or queued requests of this type */
if (must_wait || tgm->pending_reqs[is_write]) { if (must_wait || tgm->pending_reqs[direction]) {
tgm->pending_reqs[is_write]++; tgm->pending_reqs[direction]++;
qemu_mutex_unlock(&tg->lock); qemu_mutex_unlock(&tg->lock);
qemu_co_mutex_lock(&tgm->throttled_reqs_lock); qemu_co_mutex_lock(&tgm->throttled_reqs_lock);
qemu_co_queue_wait(&tgm->throttled_reqs[is_write], qemu_co_queue_wait(&tgm->throttled_reqs[direction],
&tgm->throttled_reqs_lock); &tgm->throttled_reqs_lock);
qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); qemu_co_mutex_unlock(&tgm->throttled_reqs_lock);
qemu_mutex_lock(&tg->lock); qemu_mutex_lock(&tg->lock);
tgm->pending_reqs[is_write]--; tgm->pending_reqs[direction]--;
} }
/* The I/O will be executed, so do the accounting */ /* The I/O will be executed, so do the accounting */
throttle_account(tgm->throttle_state, is_write, bytes); throttle_account(tgm->throttle_state, direction, bytes);
/* Schedule the next request */ /* Schedule the next request */
schedule_next_request(tgm, is_write); schedule_next_request(tgm, direction);
qemu_mutex_unlock(&tg->lock); qemu_mutex_unlock(&tg->lock);
} }
typedef struct { typedef struct {
ThrottleGroupMember *tgm; ThrottleGroupMember *tgm;
bool is_write; ThrottleDirection direction;
} RestartData; } RestartData;
static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
@ -405,16 +407,16 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
ThrottleGroupMember *tgm = data->tgm; ThrottleGroupMember *tgm = data->tgm;
ThrottleState *ts = tgm->throttle_state; ThrottleState *ts = tgm->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool is_write = data->is_write; ThrottleDirection direction = data->direction;
bool empty_queue; bool empty_queue;
empty_queue = !throttle_group_co_restart_queue(tgm, is_write); empty_queue = !throttle_group_co_restart_queue(tgm, direction);
/* If the request queue was empty then we have to take care of /* If the request queue was empty then we have to take care of
* scheduling the next one */ * scheduling the next one */
if (empty_queue) { if (empty_queue) {
qemu_mutex_lock(&tg->lock); qemu_mutex_lock(&tg->lock);
schedule_next_request(tgm, is_write); schedule_next_request(tgm, direction);
qemu_mutex_unlock(&tg->lock); qemu_mutex_unlock(&tg->lock);
} }
@ -424,18 +426,19 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
aio_wait_kick(); aio_wait_kick();
} }
static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) static void throttle_group_restart_queue(ThrottleGroupMember *tgm,
ThrottleDirection direction)
{ {
Coroutine *co; Coroutine *co;
RestartData *rd = g_new0(RestartData, 1); RestartData *rd = g_new0(RestartData, 1);
rd->tgm = tgm; rd->tgm = tgm;
rd->is_write = is_write; rd->direction = direction;
/* This function is called when a timer is fired or when /* This function is called when a timer is fired or when
* throttle_group_restart_tgm() is called. Either way, there can * throttle_group_restart_tgm() is called. Either way, there can
* be no timer pending on this tgm at this point */ * be no timer pending on this tgm at this point */
assert(!timer_pending(tgm->throttle_timers.timers[is_write])); assert(!timer_pending(tgm->throttle_timers.timers[direction]));
qatomic_inc(&tgm->restart_pending); qatomic_inc(&tgm->restart_pending);
@ -445,18 +448,18 @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write
void throttle_group_restart_tgm(ThrottleGroupMember *tgm) void throttle_group_restart_tgm(ThrottleGroupMember *tgm)
{ {
int i; ThrottleDirection dir;
if (tgm->throttle_state) { if (tgm->throttle_state) {
for (i = 0; i < 2; i++) { for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
QEMUTimer *t = tgm->throttle_timers.timers[i]; QEMUTimer *t = tgm->throttle_timers.timers[dir];
if (timer_pending(t)) { if (timer_pending(t)) {
/* If there's a pending timer on this tgm, fire it now */ /* If there's a pending timer on this tgm, fire it now */
timer_del(t); timer_del(t);
timer_cb(tgm, i); timer_cb(tgm, dir);
} else { } else {
/* Else run the next request from the queue manually */ /* Else run the next request from the queue manually */
throttle_group_restart_queue(tgm, i); throttle_group_restart_queue(tgm, dir);
} }
} }
} }
@ -500,30 +503,30 @@ void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg)
* because it had been throttled. * because it had been throttled.
* *
* @tgm: the ThrottleGroupMember whose request had been throttled * @tgm: the ThrottleGroupMember whose request had been throttled
* @is_write: the type of operation (read/write) * @direction: the ThrottleDirection
*/ */
static void timer_cb(ThrottleGroupMember *tgm, bool is_write) static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction)
{ {
ThrottleState *ts = tgm->throttle_state; ThrottleState *ts = tgm->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
/* The timer has just been fired, so we can update the flag */ /* The timer has just been fired, so we can update the flag */
qemu_mutex_lock(&tg->lock); qemu_mutex_lock(&tg->lock);
tg->any_timer_armed[is_write] = false; tg->any_timer_armed[direction] = false;
qemu_mutex_unlock(&tg->lock); qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */ /* Run the request that was waiting for this timer */
throttle_group_restart_queue(tgm, is_write); throttle_group_restart_queue(tgm, direction);
} }
static void read_timer_cb(void *opaque) static void read_timer_cb(void *opaque)
{ {
timer_cb(opaque, false); timer_cb(opaque, THROTTLE_READ);
} }
static void write_timer_cb(void *opaque) static void write_timer_cb(void *opaque)
{ {
timer_cb(opaque, true); timer_cb(opaque, THROTTLE_WRITE);
} }
/* Register a ThrottleGroupMember from the throttling group, also initializing /* Register a ThrottleGroupMember from the throttling group, also initializing
@ -541,7 +544,7 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
const char *groupname, const char *groupname,
AioContext *ctx) AioContext *ctx)
{ {
int i; ThrottleDirection dir;
ThrottleState *ts = throttle_group_incref(groupname); ThrottleState *ts = throttle_group_incref(groupname);
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@ -551,10 +554,11 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
QEMU_LOCK_GUARD(&tg->lock); QEMU_LOCK_GUARD(&tg->lock);
/* If the ThrottleGroup is new set this ThrottleGroupMember as the token */ /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */
for (i = 0; i < 2; i++) { for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
if (!tg->tokens[i]) { if (!tg->tokens[dir]) {
tg->tokens[i] = tgm; tg->tokens[dir] = tgm;
} }
qemu_co_queue_init(&tgm->throttled_reqs[dir]);
} }
QLIST_INSERT_HEAD(&tg->head, tgm, round_robin); QLIST_INSERT_HEAD(&tg->head, tgm, round_robin);
@ -566,8 +570,6 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
write_timer_cb, write_timer_cb,
tgm); tgm);
qemu_co_mutex_init(&tgm->throttled_reqs_lock); qemu_co_mutex_init(&tgm->throttled_reqs_lock);
qemu_co_queue_init(&tgm->throttled_reqs[0]);
qemu_co_queue_init(&tgm->throttled_reqs[1]);
} }
/* Unregister a ThrottleGroupMember from its group, removing it from the list, /* Unregister a ThrottleGroupMember from its group, removing it from the list,
@ -585,7 +587,7 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
ThrottleState *ts = tgm->throttle_state; ThrottleState *ts = tgm->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
ThrottleGroupMember *token; ThrottleGroupMember *token;
int i; ThrottleDirection dir;
if (!ts) { if (!ts) {
/* Discard already unregistered tgm */ /* Discard already unregistered tgm */
@ -596,17 +598,17 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
AIO_WAIT_WHILE(tgm->aio_context, qatomic_read(&tgm->restart_pending) > 0); AIO_WAIT_WHILE(tgm->aio_context, qatomic_read(&tgm->restart_pending) > 0);
WITH_QEMU_LOCK_GUARD(&tg->lock) { WITH_QEMU_LOCK_GUARD(&tg->lock) {
for (i = 0; i < 2; i++) { for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
assert(tgm->pending_reqs[i] == 0); assert(tgm->pending_reqs[dir] == 0);
assert(qemu_co_queue_empty(&tgm->throttled_reqs[i])); assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir]));
assert(!timer_pending(tgm->throttle_timers.timers[i])); assert(!timer_pending(tgm->throttle_timers.timers[dir]));
if (tg->tokens[i] == tgm) { if (tg->tokens[dir] == tgm) {
token = throttle_group_next_tgm(tgm); token = throttle_group_next_tgm(tgm);
/* Take care of the case where this is the last tgm in the group */ /* Take care of the case where this is the last tgm in the group */
if (token == tgm) { if (token == tgm) {
token = NULL; token = NULL;
} }
tg->tokens[i] = token; tg->tokens[dir] = token;
} }
} }
@ -631,19 +633,20 @@ void throttle_group_detach_aio_context(ThrottleGroupMember *tgm)
{ {
ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
ThrottleTimers *tt = &tgm->throttle_timers; ThrottleTimers *tt = &tgm->throttle_timers;
int i; ThrottleDirection dir;
/* Requests must have been drained */ /* Requests must have been drained */
assert(tgm->pending_reqs[0] == 0 && tgm->pending_reqs[1] == 0); for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
assert(qemu_co_queue_empty(&tgm->throttled_reqs[0])); assert(tgm->pending_reqs[dir] == 0);
assert(qemu_co_queue_empty(&tgm->throttled_reqs[1])); assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir]));
}
/* Kick off next ThrottleGroupMember, if necessary */ /* Kick off next ThrottleGroupMember, if necessary */
WITH_QEMU_LOCK_GUARD(&tg->lock) { WITH_QEMU_LOCK_GUARD(&tg->lock) {
for (i = 0; i < 2; i++) { for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
if (timer_pending(tt->timers[i])) { if (timer_pending(tt->timers[dir])) {
tg->any_timer_armed[i] = false; tg->any_timer_armed[dir] = false;
schedule_next_request(tgm, i); schedule_next_request(tgm, dir);
} }
} }
} }

View File

@ -118,7 +118,7 @@ throttle_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
{ {
ThrottleGroupMember *tgm = bs->opaque; ThrottleGroupMember *tgm = bs->opaque;
throttle_group_co_io_limits_intercept(tgm, bytes, false); throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_READ);
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
} }
@ -128,7 +128,7 @@ throttle_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags) QEMUIOVector *qiov, BdrvRequestFlags flags)
{ {
ThrottleGroupMember *tgm = bs->opaque; ThrottleGroupMember *tgm = bs->opaque;
throttle_group_co_io_limits_intercept(tgm, bytes, true); throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
} }
@ -138,7 +138,7 @@ throttle_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
BdrvRequestFlags flags) BdrvRequestFlags flags)
{ {
ThrottleGroupMember *tgm = bs->opaque; ThrottleGroupMember *tgm = bs->opaque;
throttle_group_co_io_limits_intercept(tgm, bytes, true); throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
} }
@ -147,7 +147,7 @@ static int coroutine_fn GRAPH_RDLOCK
throttle_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) throttle_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
{ {
ThrottleGroupMember *tgm = bs->opaque; ThrottleGroupMember *tgm = bs->opaque;
throttle_group_co_io_limits_intercept(tgm, bytes, true); throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
return bdrv_co_pdiscard(bs->file, offset, bytes); return bdrv_co_pdiscard(bs->file, offset, bytes);
} }

View File

@ -94,20 +94,22 @@ void fsdev_throttle_init(FsThrottle *fst)
} }
} }
void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write, void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst,
ThrottleDirection direction,
struct iovec *iov, int iovcnt) struct iovec *iov, int iovcnt)
{ {
assert(direction < THROTTLE_MAX);
if (throttle_enabled(&fst->cfg)) { if (throttle_enabled(&fst->cfg)) {
if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) || if (throttle_schedule_timer(&fst->ts, &fst->tt, direction) ||
!qemu_co_queue_empty(&fst->throttled_reqs[is_write])) { !qemu_co_queue_empty(&fst->throttled_reqs[direction])) {
qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL); qemu_co_queue_wait(&fst->throttled_reqs[direction], NULL);
} }
throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt)); throttle_account(&fst->ts, direction, iov_size(iov, iovcnt));
if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) && if (!qemu_co_queue_empty(&fst->throttled_reqs[direction]) &&
!throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) { !throttle_schedule_timer(&fst->ts, &fst->tt, direction)) {
qemu_co_queue_next(&fst->throttled_reqs[is_write]); qemu_co_queue_next(&fst->throttled_reqs[direction]);
} }
} }
} }

View File

@ -23,14 +23,14 @@ typedef struct FsThrottle {
ThrottleState ts; ThrottleState ts;
ThrottleTimers tt; ThrottleTimers tt;
ThrottleConfig cfg; ThrottleConfig cfg;
CoQueue throttled_reqs[2]; CoQueue throttled_reqs[THROTTLE_MAX];
} FsThrottle; } FsThrottle;
int fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **); int fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **);
void fsdev_throttle_init(FsThrottle *); void fsdev_throttle_init(FsThrottle *);
void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool , void coroutine_fn fsdev_co_throttle_request(FsThrottle *, ThrottleDirection ,
struct iovec *, int); struct iovec *, int);
void fsdev_throttle_cleanup(FsThrottle *); void fsdev_throttle_cleanup(FsThrottle *);

View File

@ -252,7 +252,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
if (v9fs_request_cancelled(pdu)) { if (v9fs_request_cancelled(pdu)) {
return -EINTR; return -EINTR;
} }
fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt); fsdev_co_throttle_request(s->ctx.fst, THROTTLE_WRITE, iov, iovcnt);
v9fs_co_run_in_worker( v9fs_co_run_in_worker(
{ {
err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset); err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset);
@ -272,7 +272,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
if (v9fs_request_cancelled(pdu)) { if (v9fs_request_cancelled(pdu)) {
return -EINTR; return -EINTR;
} }
fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt); fsdev_co_throttle_request(s->ctx.fst, THROTTLE_READ, iov, iovcnt);
v9fs_co_run_in_worker( v9fs_co_run_in_worker(
{ {
err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset); err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset);

View File

@ -37,7 +37,7 @@ typedef struct ThrottleGroupMember {
AioContext *aio_context; AioContext *aio_context;
/* throttled_reqs_lock protects the CoQueues for throttled requests. */ /* throttled_reqs_lock protects the CoQueues for throttled requests. */
CoMutex throttled_reqs_lock; CoMutex throttled_reqs_lock;
CoQueue throttled_reqs[2]; CoQueue throttled_reqs[THROTTLE_MAX];
/* Nonzero if the I/O limits are currently being ignored; generally /* Nonzero if the I/O limits are currently being ignored; generally
* it is zero. Accessed with atomic operations. * it is zero. Accessed with atomic operations.
@ -54,7 +54,7 @@ typedef struct ThrottleGroupMember {
* throttle_state tells us if I/O limits are configured. */ * throttle_state tells us if I/O limits are configured. */
ThrottleState *throttle_state; ThrottleState *throttle_state;
ThrottleTimers throttle_timers; ThrottleTimers throttle_timers;
unsigned pending_reqs[2]; unsigned pending_reqs[THROTTLE_MAX];
QLIST_ENTRY(ThrottleGroupMember) round_robin; QLIST_ENTRY(ThrottleGroupMember) round_robin;
} ThrottleGroupMember; } ThrottleGroupMember;
@ -78,7 +78,7 @@ void throttle_group_restart_tgm(ThrottleGroupMember *tgm);
void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm, void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm,
int64_t bytes, int64_t bytes,
bool is_write); ThrottleDirection direction);
void throttle_group_attach_aio_context(ThrottleGroupMember *tgm, void throttle_group_attach_aio_context(ThrottleGroupMember *tgm,
AioContext *new_context); AioContext *new_context);
void throttle_group_detach_aio_context(ThrottleGroupMember *tgm); void throttle_group_detach_aio_context(ThrottleGroupMember *tgm);

View File

@ -99,13 +99,18 @@ typedef struct ThrottleState {
int64_t previous_leak; /* timestamp of the last leak done */ int64_t previous_leak; /* timestamp of the last leak done */
} ThrottleState; } ThrottleState;
typedef enum {
THROTTLE_READ = 0,
THROTTLE_WRITE,
THROTTLE_MAX
} ThrottleDirection;
typedef struct ThrottleTimers { typedef struct ThrottleTimers {
QEMUTimer *timers[2]; /* timers used to do the throttling */ QEMUTimer *timers[THROTTLE_MAX]; /* timers used to do the throttling */
QEMUClockType clock_type; /* the clock used */ QEMUClockType clock_type; /* the clock used */
/* Callbacks */ /* Callbacks */
QEMUTimerCB *read_timer_cb; QEMUTimerCB *timer_cb[THROTTLE_MAX];
QEMUTimerCB *write_timer_cb;
void *timer_opaque; void *timer_opaque;
} ThrottleTimers; } ThrottleTimers;
@ -149,9 +154,10 @@ void throttle_config_init(ThrottleConfig *cfg);
/* usage */ /* usage */
bool throttle_schedule_timer(ThrottleState *ts, bool throttle_schedule_timer(ThrottleState *ts,
ThrottleTimers *tt, ThrottleTimers *tt,
bool is_write); ThrottleDirection direction);
void throttle_account(ThrottleState *ts, bool is_write, uint64_t size); void throttle_account(ThrottleState *ts, ThrottleDirection direction,
uint64_t size);
void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg, void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg,
Error **errp); Error **errp);
void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var); void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var);

View File

@ -0,0 +1,119 @@
#!/usr/bin/env bash
# group: rw
#
# Produce an I/O error in file-posix, and hope that it is not catastrophic.
# Regression test for: https://bugzilla.redhat.com/show_bug.cgi?id=2234374
#
# Copyright (C) 2023 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
seq=$(basename "$0")
echo "QA output created by $seq"
status=1 # failure is the default!
_cleanup()
{
_cleanup_qemu
rm -f "$TEST_DIR/fuse-export"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ../common.rc
. ../common.filter
. ../common.qemu
# Format-agnostic (we do not use any), but we do test the file protocol
_supported_proto file
_require_drivers blkdebug null-co
if [ "$IMGOPTSSYNTAX" = "true" ]; then
# We need `$QEMU_IO -f file` to work; IMGOPTSSYNTAX uses --image-opts,
# breaking -f.
_unsupported_fmt $IMGFMT
fi
# This is a regression test of a bug in which flie-posix would access zone
# information in case of an I/O error even when there is no zone information,
# resulting in a division by zero.
# To reproduce the problem, we need to trigger an I/O error inside of
# file-posix, which can be done (rootless) by providing a FUSE export that
# presents only errors when accessed.
_launch_qemu
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'qmp_capabilities'}" \
'return'
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'blockdev-add',
'arguments': {
'driver': 'blkdebug',
'node-name': 'node0',
'inject-error': [{'event': 'none'}],
'image': {
'driver': 'null-co'
}
}}" \
'return'
# FUSE mountpoint must exist and be a regular file
touch "$TEST_DIR/fuse-export"
# The grep -v to filter fusermount's (benign) error when /etc/fuse.conf does
# not contain user_allow_other and the subsequent check for missing FUSE support
# have both been taken from iotest 308.
output=$(_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'block-export-add',
'arguments': {
'id': 'exp0',
'type': 'fuse',
'node-name': 'node0',
'mountpoint': '$TEST_DIR/fuse-export',
'writable': true
}}" \
'return' \
| grep -v 'option allow_other only allowed if')
if echo "$output" | grep -q "Parameter 'type' does not accept value 'fuse'"; then
_notrun 'No FUSE support'
fi
echo "$output"
echo
# This should fail, but gracefully, i.e. just print an I/O error, not crash.
$QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io
echo
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'block-export-del',
'arguments': {'id': 'exp0'}}" \
'return'
_send_qemu_cmd $QEMU_HANDLE \
'' \
'BLOCK_EXPORT_DELETED'
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'blockdev-del',
'arguments': {'node-name': 'node0'}}" \
'return'
# success, all done
echo "*** done"
rm -f $seq.full
status=0

View File

@ -0,0 +1,33 @@
QA output created by file-io-error
{'execute': 'qmp_capabilities'}
{"return": {}}
{'execute': 'blockdev-add',
'arguments': {
'driver': 'blkdebug',
'node-name': 'node0',
'inject-error': [{'event': 'none'}],
'image': {
'driver': 'null-co'
}
}}
{"return": {}}
{'execute': 'block-export-add',
'arguments': {
'id': 'exp0',
'type': 'fuse',
'node-name': 'node0',
'mountpoint': 'TEST_DIR/fuse-export',
'writable': true
}}
{"return": {}}
write failed: Input/output error
{'execute': 'block-export-del',
'arguments': {'id': 'exp0'}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "exp0"}}
{'execute': 'blockdev-del',
'arguments': {'node-name': 'node0'}}
{"return": {}}
*** done

View File

@ -169,8 +169,72 @@ static void test_init(void)
/* check initialized fields */ /* check initialized fields */
g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL); g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
g_assert(tt->timers[0]); g_assert(tt->timers[THROTTLE_READ]);
g_assert(tt->timers[1]); g_assert(tt->timers[THROTTLE_WRITE]);
/* check other fields where cleared */
g_assert(!ts.previous_leak);
g_assert(!ts.cfg.op_size);
for (i = 0; i < BUCKETS_COUNT; i++) {
g_assert(!ts.cfg.buckets[i].avg);
g_assert(!ts.cfg.buckets[i].max);
g_assert(!ts.cfg.buckets[i].level);
}
throttle_timers_destroy(tt);
}
static void test_init_readonly(void)
{
int i;
tt = &tgm.throttle_timers;
/* fill the structures with crap */
memset(&ts, 1, sizeof(ts));
memset(tt, 1, sizeof(*tt));
/* init structures */
throttle_init(&ts);
throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
read_timer_cb, NULL, &ts);
/* check initialized fields */
g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
g_assert(tt->timers[THROTTLE_READ]);
g_assert(!tt->timers[THROTTLE_WRITE]);
/* check other fields where cleared */
g_assert(!ts.previous_leak);
g_assert(!ts.cfg.op_size);
for (i = 0; i < BUCKETS_COUNT; i++) {
g_assert(!ts.cfg.buckets[i].avg);
g_assert(!ts.cfg.buckets[i].max);
g_assert(!ts.cfg.buckets[i].level);
}
throttle_timers_destroy(tt);
}
static void test_init_writeonly(void)
{
int i;
tt = &tgm.throttle_timers;
/* fill the structures with crap */
memset(&ts, 1, sizeof(ts));
memset(tt, 1, sizeof(*tt));
/* init structures */
throttle_init(&ts);
throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
NULL, write_timer_cb, &ts);
/* check initialized fields */
g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
g_assert(!tt->timers[THROTTLE_READ]);
g_assert(tt->timers[THROTTLE_WRITE]);
/* check other fields where cleared */ /* check other fields where cleared */
g_assert(!ts.previous_leak); g_assert(!ts.previous_leak);
@ -191,7 +255,7 @@ static void test_destroy(void)
throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL, throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
read_timer_cb, write_timer_cb, &ts); read_timer_cb, write_timer_cb, &ts);
throttle_timers_destroy(tt); throttle_timers_destroy(tt);
for (i = 0; i < 2; i++) { for (i = 0; i < THROTTLE_MAX; i++) {
g_assert(!tt->timers[i]); g_assert(!tt->timers[i]);
} }
} }
@ -573,9 +637,9 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
throttle_config(&ts, QEMU_CLOCK_VIRTUAL, &cfg); throttle_config(&ts, QEMU_CLOCK_VIRTUAL, &cfg);
/* account a read */ /* account a read */
throttle_account(&ts, false, size); throttle_account(&ts, THROTTLE_READ, size);
/* account a write */ /* account a write */
throttle_account(&ts, true, size); throttle_account(&ts, THROTTLE_WRITE, size);
/* check total result */ /* check total result */
index = to_test[is_ops][0]; index = to_test[is_ops][0];
@ -752,6 +816,8 @@ int main(int argc, char **argv)
g_test_add_func("/throttle/leak_bucket", test_leak_bucket); g_test_add_func("/throttle/leak_bucket", test_leak_bucket);
g_test_add_func("/throttle/compute_wait", test_compute_wait); g_test_add_func("/throttle/compute_wait", test_compute_wait);
g_test_add_func("/throttle/init", test_init); g_test_add_func("/throttle/init", test_init);
g_test_add_func("/throttle/init_readonly", test_init_readonly);
g_test_add_func("/throttle/init_writeonly", test_init_writeonly);
g_test_add_func("/throttle/destroy", test_destroy); g_test_add_func("/throttle/destroy", test_destroy);
g_test_add_func("/throttle/have_timer", test_have_timer); g_test_add_func("/throttle/have_timer", test_have_timer);
g_test_add_func("/throttle/detach_attach", test_detach_attach); g_test_add_func("/throttle/detach_attach", test_detach_attach);

View File

@ -136,13 +136,14 @@ int64_t throttle_compute_wait(LeakyBucket *bkt)
/* This function compute the time that must be waited while this IO /* This function compute the time that must be waited while this IO
* *
* @is_write: true if the current IO is a write, false if it's a read * @direction: throttle direction
* @ret: time to wait * @ret: time to wait
*/ */
static int64_t throttle_compute_wait_for(ThrottleState *ts, static int64_t throttle_compute_wait_for(ThrottleState *ts,
bool is_write) ThrottleDirection direction)
{ {
BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, static const BucketType to_check[THROTTLE_MAX][4] = {
{THROTTLE_BPS_TOTAL,
THROTTLE_OPS_TOTAL, THROTTLE_OPS_TOTAL,
THROTTLE_BPS_READ, THROTTLE_BPS_READ,
THROTTLE_OPS_READ}, THROTTLE_OPS_READ},
@ -153,8 +154,8 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
int64_t wait, max_wait = 0; int64_t wait, max_wait = 0;
int i; int i;
for (i = 0; i < 4; i++) { for (i = 0; i < ARRAY_SIZE(to_check[THROTTLE_READ]); i++) {
BucketType index = to_check[is_write][i]; BucketType index = to_check[direction][i];
wait = throttle_compute_wait(&ts->cfg.buckets[index]); wait = throttle_compute_wait(&ts->cfg.buckets[index]);
if (wait > max_wait) { if (wait > max_wait) {
max_wait = wait; max_wait = wait;
@ -166,13 +167,13 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
/* compute the timer for this type of operation /* compute the timer for this type of operation
* *
* @is_write: the type of operation * @direction: throttle direction
* @now: the current clock timestamp * @now: the current clock timestamp
* @next_timestamp: the resulting timer * @next_timestamp: the resulting timer
* @ret: true if a timer must be set * @ret: true if a timer must be set
*/ */
static bool throttle_compute_timer(ThrottleState *ts, static bool throttle_compute_timer(ThrottleState *ts,
bool is_write, ThrottleDirection direction,
int64_t now, int64_t now,
int64_t *next_timestamp) int64_t *next_timestamp)
{ {
@ -182,7 +183,7 @@ static bool throttle_compute_timer(ThrottleState *ts,
throttle_do_leak(ts, now); throttle_do_leak(ts, now);
/* compute the wait time if any */ /* compute the wait time if any */
wait = throttle_compute_wait_for(ts, is_write); wait = throttle_compute_wait_for(ts, direction);
/* if the code must wait compute when the next timer should fire */ /* if the code must wait compute when the next timer should fire */
if (wait) { if (wait) {
@ -199,10 +200,15 @@ static bool throttle_compute_timer(ThrottleState *ts,
void throttle_timers_attach_aio_context(ThrottleTimers *tt, void throttle_timers_attach_aio_context(ThrottleTimers *tt,
AioContext *new_context) AioContext *new_context)
{ {
tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, ThrottleDirection dir;
tt->read_timer_cb, tt->timer_opaque);
tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
tt->write_timer_cb, tt->timer_opaque); if (tt->timer_cb[dir]) {
tt->timers[dir] =
aio_timer_new(new_context, tt->clock_type, SCALE_NS,
tt->timer_cb[dir], tt->timer_opaque);
}
}
} }
/* /*
@ -233,11 +239,12 @@ void throttle_timers_init(ThrottleTimers *tt,
QEMUTimerCB *write_timer_cb, QEMUTimerCB *write_timer_cb,
void *timer_opaque) void *timer_opaque)
{ {
assert(read_timer_cb || write_timer_cb);
memset(tt, 0, sizeof(ThrottleTimers)); memset(tt, 0, sizeof(ThrottleTimers));
tt->clock_type = clock_type; tt->clock_type = clock_type;
tt->read_timer_cb = read_timer_cb; tt->timer_cb[THROTTLE_READ] = read_timer_cb;
tt->write_timer_cb = write_timer_cb; tt->timer_cb[THROTTLE_WRITE] = write_timer_cb;
tt->timer_opaque = timer_opaque; tt->timer_opaque = timer_opaque;
throttle_timers_attach_aio_context(tt, aio_context); throttle_timers_attach_aio_context(tt, aio_context);
} }
@ -245,7 +252,9 @@ void throttle_timers_init(ThrottleTimers *tt,
/* destroy a timer */ /* destroy a timer */
static void throttle_timer_destroy(QEMUTimer **timer) static void throttle_timer_destroy(QEMUTimer **timer)
{ {
assert(*timer != NULL); if (*timer == NULL) {
return;
}
timer_free(*timer); timer_free(*timer);
*timer = NULL; *timer = NULL;
@ -254,10 +263,10 @@ static void throttle_timer_destroy(QEMUTimer **timer)
/* Remove timers from event loop */ /* Remove timers from event loop */
void throttle_timers_detach_aio_context(ThrottleTimers *tt) void throttle_timers_detach_aio_context(ThrottleTimers *tt)
{ {
int i; ThrottleDirection dir;
for (i = 0; i < 2; i++) { for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
throttle_timer_destroy(&tt->timers[i]); throttle_timer_destroy(&tt->timers[dir]);
} }
} }
@ -270,9 +279,13 @@ void throttle_timers_destroy(ThrottleTimers *tt)
/* is any throttling timer configured */ /* is any throttling timer configured */
bool throttle_timers_are_initialized(ThrottleTimers *tt) bool throttle_timers_are_initialized(ThrottleTimers *tt)
{ {
if (tt->timers[0]) { ThrottleDirection dir;
for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
if (tt->timers[dir]) {
return true; return true;
} }
}
return false; return false;
} }
@ -413,19 +426,24 @@ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
* NOTE: this function is not unit tested due to it's usage of timer_mod * NOTE: this function is not unit tested due to it's usage of timer_mod
* *
* @tt: the timers structure * @tt: the timers structure
* @is_write: the type of operation (read/write) * @direction: throttle direction
* @ret: true if the timer has been scheduled else false * @ret: true if the timer has been scheduled else false
*/ */
bool throttle_schedule_timer(ThrottleState *ts, bool throttle_schedule_timer(ThrottleState *ts,
ThrottleTimers *tt, ThrottleTimers *tt,
bool is_write) ThrottleDirection direction)
{ {
int64_t now = qemu_clock_get_ns(tt->clock_type); int64_t now = qemu_clock_get_ns(tt->clock_type);
int64_t next_timestamp; int64_t next_timestamp;
QEMUTimer *timer;
bool must_wait; bool must_wait;
assert(direction < THROTTLE_MAX);
timer = tt->timers[direction];
assert(timer);
must_wait = throttle_compute_timer(ts, must_wait = throttle_compute_timer(ts,
is_write, direction,
now, now,
&next_timestamp); &next_timestamp);
@ -435,48 +453,50 @@ bool throttle_schedule_timer(ThrottleState *ts,
} }
/* request throttled and timer pending -> do nothing */ /* request throttled and timer pending -> do nothing */
if (timer_pending(tt->timers[is_write])) { if (timer_pending(timer)) {
return true; return true;
} }
/* request throttled and timer not pending -> arm timer */ /* request throttled and timer not pending -> arm timer */
timer_mod(tt->timers[is_write], next_timestamp); timer_mod(timer, next_timestamp);
return true; return true;
} }
/* do the accounting for this operation /* do the accounting for this operation
* *
* @is_write: the type of operation (read/write) * @direction: throttle direction
* @size: the size of the operation * @size: the size of the operation
*/ */
void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) void throttle_account(ThrottleState *ts, ThrottleDirection direction,
uint64_t size)
{ {
const BucketType bucket_types_size[2][2] = { static const BucketType bucket_types_size[THROTTLE_MAX][2] = {
{ THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ }, { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
{ THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE } { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
}; };
const BucketType bucket_types_units[2][2] = { static const BucketType bucket_types_units[THROTTLE_MAX][2] = {
{ THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ }, { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
{ THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE } { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
}; };
double units = 1.0; double units = 1.0;
unsigned i; unsigned i;
assert(direction < THROTTLE_MAX);
/* if cfg.op_size is defined and smaller than size we compute unit count */ /* if cfg.op_size is defined and smaller than size we compute unit count */
if (ts->cfg.op_size && size > ts->cfg.op_size) { if (ts->cfg.op_size && size > ts->cfg.op_size) {
units = (double) size / ts->cfg.op_size; units = (double) size / ts->cfg.op_size;
} }
for (i = 0; i < 2; i++) { for (i = 0; i < ARRAY_SIZE(bucket_types_size[THROTTLE_READ]); i++) {
LeakyBucket *bkt; LeakyBucket *bkt;
bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]]; bkt = &ts->cfg.buckets[bucket_types_size[direction][i]];
bkt->level += size; bkt->level += size;
if (bkt->burst_length > 1) { if (bkt->burst_length > 1) {
bkt->burst_level += size; bkt->burst_level += size;
} }
bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]]; bkt = &ts->cfg.buckets[bucket_types_units[direction][i]];
bkt->level += units; bkt->level += units;
if (bkt->burst_length > 1) { if (bkt->burst_length > 1) {
bkt->burst_level += units; bkt->burst_level += units;