From 7875efb9f641ed0e79320bf258ee69cd0bf03716 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Wed, 23 Nov 2016 10:39:12 +0000 Subject: [PATCH 1/4] xen_disk: split discard input to match internal representation The guest sends discard requests as u64 sector/count pairs, but the block layer operates internally with s64/s32 pairs. The conversion leads to IO errors in the guest, the discard request is not processed. domU.cfg: 'vdev=xvda, format=qcow2, backendtype=qdisk, target=/x.qcow2' domU: mkfs.ext4 -F /dev/xvda Discarding device blocks: failed - Input/output error Fix this by splitting the request into chunks of BDRV_REQUEST_MAX_SECTORS. Add input range checking to avoid overflow. Fixes f313520 ("xen_disk: add discard support") Signed-off-by: Olaf Hering Reviewed-by: Eric Blake Reviewed-by: Stefano Stabellini --- hw/block/xen_disk.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 3a7dc194e2..456a2d5694 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -660,6 +660,38 @@ static void qemu_aio_complete(void *opaque, int ret) qemu_bh_schedule(ioreq->blkdev->bh); } +static bool blk_split_discard(struct ioreq *ioreq, blkif_sector_t sector_number, + uint64_t nr_sectors) +{ + struct XenBlkDev *blkdev = ioreq->blkdev; + int64_t byte_offset; + int byte_chunk; + uint64_t byte_remaining, limit; + uint64_t sec_start = sector_number; + uint64_t sec_count = nr_sectors; + + /* Wrap around, or overflowing byte limit? */ + if (sec_start + sec_count < sec_count || + sec_start + sec_count > INT64_MAX >> BDRV_SECTOR_BITS) { + return false; + } + + limit = BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS; + byte_offset = sec_start << BDRV_SECTOR_BITS; + byte_remaining = sec_count << BDRV_SECTOR_BITS; + + do { + byte_chunk = byte_remaining > limit ? limit : byte_remaining; + ioreq->aio_inflight++; + blk_aio_pdiscard(blkdev->blk, byte_offset, byte_chunk, + qemu_aio_complete, ioreq); + byte_remaining -= byte_chunk; + byte_offset += byte_chunk; + } while (byte_remaining > 0); + + return true; +} + static int ioreq_runio_qemu_aio(struct ioreq *ioreq) { struct XenBlkDev *blkdev = ioreq->blkdev; @@ -708,12 +740,10 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) break; case BLKIF_OP_DISCARD: { - struct blkif_request_discard *discard_req = (void *)&ioreq->req; - ioreq->aio_inflight++; - blk_aio_pdiscard(blkdev->blk, - discard_req->sector_number << BDRV_SECTOR_BITS, - discard_req->nr_sectors << BDRV_SECTOR_BITS, - qemu_aio_complete, ioreq); + struct blkif_request_discard *req = (void *)&ioreq->req; + if (!blk_split_discard(ioreq, req->sector_number, req->nr_sectors)) { + goto err; + } break; } default: From ff3b8b8f864fa5721b196ca3dcc48853b0ee0275 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 25 Nov 2016 03:05:57 -0700 Subject: [PATCH 2/4] xen: fix quad word bufioreq handling We should not consume the second slot if it didn't get written yet. Normal writers - i.e. Xen - would not update write_pointer between the two writes, but the page may get fiddled with by the guest itself, and we're better off avoiding to enter an infinite loop in that case. Reported-by: yanghongke Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Reviewed-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- xen-hvm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xen-hvm.c b/xen-hvm.c index 99b8ee8a4f..d74e233054 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -1021,6 +1021,9 @@ static int handle_buffered_iopage(XenIOState *state) xen_rmb(); qw = (req.size == 8); if (qw) { + if (rdptr + 1 == wrptr) { + hw_error("Incomplete quad word buffered ioreq"); + } buf_req = &buf_page->buf_ioreq[(rdptr + 1) % IOREQ_BUFFER_SLOT_NUM]; req.data |= ((uint64_t)buf_req->data) << 32; From f37f29d31488fe36354e59b2fdc4fae83b2cf763 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 25 Nov 2016 03:06:33 -0700 Subject: [PATCH 3/4] xen: slightly simplify bufioreq handling There's no point setting fields always receiving the same value on each iteration, as handle_ioreq() doesn't alter them anyway. Set state and count once ahead of the loop, drop the redundant clearing of data_is_ptr, and avoid the meaningless (because count is 1) setting of df altogether. Also avoid doing an unsigned long calculation of size when the field to be initialized is only 32 bits wide (and the shift value in the range 0...3). Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Reviewed-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- xen-hvm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/xen-hvm.c b/xen-hvm.c index d74e233054..124ae10b5d 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -995,6 +995,8 @@ static int handle_buffered_iopage(XenIOState *state) } memset(&req, 0x00, sizeof(req)); + req.state = STATE_IOREQ_READY; + req.count = 1; for (;;) { uint32_t rdptr = buf_page->read_pointer, wrptr; @@ -1009,15 +1011,11 @@ static int handle_buffered_iopage(XenIOState *state) break; } buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM]; - req.size = 1UL << buf_req->size; - req.count = 1; + req.size = 1U << buf_req->size; req.addr = buf_req->addr; req.data = buf_req->data; - req.state = STATE_IOREQ_READY; req.dir = buf_req->dir; - req.df = 1; req.type = buf_req->type; - req.data_is_ptr = 0; xen_rmb(); qw = (req.size == 8); if (qw) { @@ -1032,6 +1030,13 @@ static int handle_buffered_iopage(XenIOState *state) handle_ioreq(state, &req); + /* Only req.data may get updated by handle_ioreq(), albeit even that + * should not happen as such data would never make it to the guest. + */ + assert(req.state == STATE_IOREQ_READY); + assert(req.count == 1); + assert(!req.data_is_ptr); + atomic_add(&buf_page->read_pointer, qw + 1); } From e514379de52573131ccc47441787e5fab6dbfc08 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 25 Nov 2016 03:06:58 -0700 Subject: [PATCH 4/4] xen: ignore direction in bufioreq handling There's no way to communicate back read data, so only writes can ever be usefully specified. Ignore the field, paving the road for eventually re-using the bit for something else in a few (many?) years time. Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Acked-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- xen-hvm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xen-hvm.c b/xen-hvm.c index 124ae10b5d..0892361cc2 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -997,6 +997,7 @@ static int handle_buffered_iopage(XenIOState *state) memset(&req, 0x00, sizeof(req)); req.state = STATE_IOREQ_READY; req.count = 1; + req.dir = IOREQ_WRITE; for (;;) { uint32_t rdptr = buf_page->read_pointer, wrptr; @@ -1014,7 +1015,6 @@ static int handle_buffered_iopage(XenIOState *state) req.size = 1U << buf_req->size; req.addr = buf_req->addr; req.data = buf_req->data; - req.dir = buf_req->dir; req.type = buf_req->type; xen_rmb(); qw = (req.size == 8); @@ -1031,10 +1031,12 @@ static int handle_buffered_iopage(XenIOState *state) handle_ioreq(state, &req); /* Only req.data may get updated by handle_ioreq(), albeit even that - * should not happen as such data would never make it to the guest. + * should not happen as such data would never make it to the guest (we + * can only usefully see writes here after all). */ assert(req.state == STATE_IOREQ_READY); assert(req.count == 1); + assert(req.dir == IOREQ_WRITE); assert(!req.data_is_ptr); atomic_add(&buf_page->read_pointer, qw + 1);