From 64ebf556486af39024cc9e7a8c88303a7b37c465 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Mon, 5 Jun 2017 15:38:42 -0500
Subject: [PATCH 01/40] qemu-io: Don't die on second open

Most callback commands in qemu-io return 0 to keep the interpreter
loop running, or 1 to quit immediately.  However, open_f() just
passed through the return value of openfile(), which has different
semantics of returning 0 if a file was opened, or 1 on any failure.

As a result of mixing the return semantics, we are forcing the
qemu-io interpreter to exit early on any failures, which is rather
annoying when some of the failures are obviously trying to give
the user a hint of how to proceed (if we didn't then kill qemu-io
out from under the user's feet):

$ qemu-io
qemu-io> open foo
qemu-io> open foo
file open already, try 'help close'
$ echo $?
0

In general, we WANT openfile() to report failures, since it is the
function used in the form 'qemu-io -c "$something" no_such_file'
for performing one or more -c options on a single file, and it is
not worth attempting $something if the file itself cannot be opened.
So the solution is to fix open_f() to always return 0 (when we are
in interactive mode, even failure to open should not end the
session), and save the return value of openfile() for command line
use in main().

Note, however, that we do have some qemu-iotests that do 'qemu-io
-c "open file" -c "$something"'; such tests will now proceed to
attempt $something whether or not the open succeeded, the same way
as if the two commands had been attempted in interactive mode.  As
such, the expected output for those tests has to be modified.  But it
also means that it is now possible to use -c close and have a single
qemu-io command line operate on more than one file even without
using interactive mode.  Although the '-c open' action is a subtle
change in behavior, remember that qemu-io is for debugging purposes,
so as long as it serves the needs of qemu-iotests while still being
reasonable for interactive use, it should not be a problem that we
are changing tests to the new behavior.

This has been awkward since at least as far back as commit
e3aff4f, in 2009.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-io.c                  | 7 ++++---
 tests/qemu-iotests/060.out | 1 +
 tests/qemu-iotests/114.out | 5 +++--
 tests/qemu-iotests/153.out | 6 ++++++
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/qemu-io.c b/qemu-io.c
index 8e38b288b7..8074656b7c 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -230,13 +230,14 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
     qemu_opts_reset(&empty_opts);
 
     if (optind == argc - 1) {
-        return openfile(argv[optind], flags, writethrough, force_share, opts);
+        openfile(argv[optind], flags, writethrough, force_share, opts);
     } else if (optind == argc) {
-        return openfile(NULL, flags, writethrough, force_share, opts);
+        openfile(NULL, flags, writethrough, force_share, opts);
     } else {
         QDECREF(opts);
-        return qemuio_command_usage(&open_cmd);
+        qemuio_command_usage(&open_cmd);
     }
+    return 0;
 }
 
 static int quit_f(BlockBackend *blk, int argc, char **argv)
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index 3bc14616be..5ca3af491f 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -21,6 +21,7 @@ Format specific information:
     refcount bits: 16
     corrupt: true
 can't open device TEST_DIR/t.IMGFMT: IMGFMT: Image is corrupt; cannot be opened read/write
+no file open, try 'help open'
 read 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
diff --git a/tests/qemu-iotests/114.out b/tests/qemu-iotests/114.out
index b6d10e4804..1a47a526b9 100644
--- a/tests/qemu-iotests/114.out
+++ b/tests/qemu-iotests/114.out
@@ -1,6 +1,6 @@
 QA output created by 114
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base 
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base
 image: TEST_DIR/t.IMGFMT
 file format: IMGFMT
 virtual size: 64M (67108864 bytes)
@@ -8,6 +8,7 @@ cluster_size: 65536
 backing file: TEST_DIR/t.IMGFMT.base
 backing file format: foo
 can't open device TEST_DIR/t.qcow2: Could not open backing file: Unknown driver 'foo'
+no file open, try 'help open'
 read 4096/4096 bytes at offset 0
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/153.out b/tests/qemu-iotests/153.out
index 5ba0b63867..5b917b177c 100644
--- a/tests/qemu-iotests/153.out
+++ b/tests/qemu-iotests/153.out
@@ -33,10 +33,12 @@ Is another process using the image?
 _qemu_io_wrapper -c open  TEST_DIR/t.qcow2 -c read 0 512
 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock
 Is another process using the image?
+no file open, try 'help open'
 
 _qemu_io_wrapper -c open -r  TEST_DIR/t.qcow2 -c read 0 512
 can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock
 Is another process using the image?
+no file open, try 'help open'
 
 _qemu_img_wrapper info TEST_DIR/t.qcow2
 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
@@ -99,6 +101,7 @@ _qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2
 
 _qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512
 can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+no file open, try 'help open'
 
 _qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512
 
@@ -166,6 +169,7 @@ _qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2
 _qemu_io_wrapper -c open  TEST_DIR/t.qcow2 -c read 0 512
 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock
 Is another process using the image?
+no file open, try 'help open'
 
 _qemu_io_wrapper -c open -r  TEST_DIR/t.qcow2 -c read 0 512
 
@@ -214,6 +218,7 @@ _qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2
 
 _qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512
 can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+no file open, try 'help open'
 
 _qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512
 
@@ -313,6 +318,7 @@ _qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2
 
 _qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512
 can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+no file open, try 'help open'
 
 _qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512
 

From 81c219ac6ce0d6182e35f3976f2caa4cefcaf9f0 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Mon, 5 Jun 2017 15:38:43 -0500
Subject: [PATCH 02/40] block: Guarantee that *file is set on
 bdrv_get_block_status()

We document that *file is valid if the return is not an error and
includes BDRV_BLOCK_OFFSET_VALID, but forgot to obey this contract
when a driver (such as blkdebug) lacks a callback.  Messed up in
commit 67a0fd2 (v2.6), when we added the file parameter.

Enhance qemu-iotest 177 to cover this, using a sequence that would
print garbage or even SEGV, because it was dererefencing through
uninitialized memory.  [The resulting test output shows that we
have less-than-ideal block status from the blkdebug driver, but
that's a separate fix coming up soon.]

Setting *file on all paths that return BDRV_BLOCK_OFFSET_VALID is
enough to fix the crash, but we can go one step further: always
setting *file, even on error, means that a broken caller that
blindly dereferences file without checking for error is now more
likely to get a reliable SEGV instead of randomly acting on garbage,
making it easier to diagnose such buggy callers.  Adding an
assertion that file is set where expected doesn't hurt either.

CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/io.c                 | 5 +++--
 tests/qemu-iotests/177     | 3 +++
 tests/qemu-iotests/177.out | 2 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index 2de7c77983..5c146b5a10 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1734,6 +1734,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
     int64_t n;
     int64_t ret, ret2;
 
+    *file = NULL;
     total_sectors = bdrv_nb_sectors(bs);
     if (total_sectors < 0) {
         return total_sectors;
@@ -1757,11 +1758,11 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
         }
         if (bs->drv->protocol_name) {
             ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+            *file = bs;
         }
         return ret;
     }
 
-    *file = NULL;
     bdrv_inc_in_flight(bs);
     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
                                             file);
@@ -1771,7 +1772,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
     }
 
     if (ret & BDRV_BLOCK_RAW) {
-        assert(ret & BDRV_BLOCK_OFFSET_VALID);
+        assert(ret & BDRV_BLOCK_OFFSET_VALID && *file);
         ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
                                        *pnum, pnum, file);
         goto out;
diff --git a/tests/qemu-iotests/177 b/tests/qemu-iotests/177
index 2005c174f2..f8ed8fb86b 100755
--- a/tests/qemu-iotests/177
+++ b/tests/qemu-iotests/177
@@ -43,6 +43,7 @@ _supported_proto file
 CLUSTER_SIZE=1M
 size=128M
 options=driver=blkdebug,image.driver=qcow2
+nested_opts=image.file.driver=file,image.file.filename=$TEST_IMG
 
 echo
 echo "== setting up files =="
@@ -106,6 +107,8 @@ function verify_io()
 }
 
 verify_io | $QEMU_IO -r "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --image-opts "$options,$nested_opts,align=4k" \
+    | _filter_qemu_img_map
 
 _check_test_img
 
diff --git a/tests/qemu-iotests/177.out b/tests/qemu-iotests/177.out
index e887542678..fcfbfa3b83 100644
--- a/tests/qemu-iotests/177.out
+++ b/tests/qemu-iotests/177.out
@@ -45,5 +45,7 @@ read 30408704/30408704 bytes at offset 80740352
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 23068672/23068672 bytes at offset 111149056
 22 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0               0x8000000       json:{"image": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}}, "driver": "blkdebug", "align": "4k"}
 No errors were found on the image.
 *** done

From d5254033daf524fb2eee862eb8377b9ddc81000a Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Mon, 5 Jun 2017 15:38:44 -0500
Subject: [PATCH 03/40] block: Simplify use of BDRV_BLOCK_RAW

The lone caller that cares about a return of BDRV_BLOCK_RAW
(namely, io.c:bdrv_co_get_block_status) completely replaces the
return value, so there is no point in passing BDRV_BLOCK_DATA.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/commit.c        | 2 +-
 block/mirror.c        | 2 +-
 block/raw-format.c    | 2 +-
 block/vpc.c           | 2 +-
 include/block/block.h | 6 +++---
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 8c09c3dbcd..524bd54946 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -253,7 +253,7 @@ static int64_t coroutine_fn bdrv_commit_top_get_block_status(
 {
     *pnum = nb_sectors;
     *file = bs->backing->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
            (sector_num << BDRV_SECTOR_BITS);
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index 68744a17e8..61a862dcf3 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1058,7 +1058,7 @@ static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
 {
     *pnum = nb_sectors;
     *file = bs->backing->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
            (sector_num << BDRV_SECTOR_BITS);
 }
 
diff --git a/block/raw-format.c b/block/raw-format.c
index 0d185fe41b..1ea8c2d7ff 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -259,7 +259,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
     *pnum = nb_sectors;
     *file = bs->file->bs;
     sector_num += s->offset / BDRV_SECTOR_SIZE;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
            (sector_num << BDRV_SECTOR_BITS);
 }
 
diff --git a/block/vpc.c b/block/vpc.c
index 4240ba9d1c..b313c68148 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -701,7 +701,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
     if (be32_to_cpu(footer->type) == VHD_FIXED) {
         *pnum = nb_sectors;
         *file = bs->file->bs;
-        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
                (sector_num << BDRV_SECTOR_BITS);
     }
 
diff --git a/include/block/block.h b/include/block/block.h
index 4c149adb17..afe1b61524 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -132,9 +132,9 @@ typedef struct HDGeometry {
  * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
  *
  * Internal flag:
- * BDRV_BLOCK_RAW: used internally to indicate that the request was
- *                 answered by a passthrough driver such as raw and that the
- *                 block layer should recompute the answer from bs->file.
+ * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
+ *                 that the block layer recompute the answer from the returned
+ *                 BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
  *
  * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK)
  * represent the offset in the returned BDS that is allocated for the

From 544daf6679cef4c0fac8b4a07deef305021a22c1 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Mon, 5 Jun 2017 15:38:45 -0500
Subject: [PATCH 04/40] blkdebug: Support .bdrv_co_get_block_status

Without a passthrough status of BDRV_BLOCK_RAW, anything wrapped by
blkdebug appears 100% allocated as data.  Better is treating it the
same as the underlying file being wrapped.

Update iotest 177 for the new expected output.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/blkdebug.c           | 11 +++++++++++
 tests/qemu-iotests/177.out |  5 ++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index a1b24b9b0d..b25856c49c 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -641,6 +641,16 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
     return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
 }
 
+static int64_t coroutine_fn blkdebug_co_get_block_status(
+    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+    BlockDriverState **file)
+{
+    *pnum = nb_sectors;
+    *file = bs->file->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
+        (sector_num << BDRV_SECTOR_BITS);
+}
+
 static void blkdebug_close(BlockDriverState *bs)
 {
     BDRVBlkdebugState *s = bs->opaque;
@@ -915,6 +925,7 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_co_flush_to_disk  = blkdebug_co_flush,
     .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
     .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
+    .bdrv_co_get_block_status = blkdebug_co_get_block_status,
 
     .bdrv_debug_event           = blkdebug_debug_event,
     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
diff --git a/tests/qemu-iotests/177.out b/tests/qemu-iotests/177.out
index fcfbfa3b83..43a777836c 100644
--- a/tests/qemu-iotests/177.out
+++ b/tests/qemu-iotests/177.out
@@ -46,6 +46,9 @@ read 30408704/30408704 bytes at offset 80740352
 read 23068672/23068672 bytes at offset 111149056
 22 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
-0               0x8000000       json:{"image": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}}, "driver": "blkdebug", "align": "4k"}
+0               0x800000        TEST_DIR/t.IMGFMT
+0x900000        0x2400000       TEST_DIR/t.IMGFMT
+0x3c00000       0x1100000       TEST_DIR/t.IMGFMT
+0x6a00000       0x1600000       TEST_DIR/t.IMGFMT
 No errors were found on the image.
 *** done

From 139921aaa77c435104308ad53b631a00c3b65ae8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:11:53 +0200
Subject: [PATCH 05/40] vvfat: fix qemu-img map and qemu-img convert
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- bs->total_sectors is the number of sectors of the whole disk
- s->sector_count is the number of sectors of the FAT partition

This fixes the following assert in qemu-img map:
qemu-img.c:2641: get_block_status: Assertion `nb_sectors' failed.

This also fixes an infinite loop in qemu-img convert.

Fixes: 4480e0f924a42e1db8b8cfcac4d0634dd1bb27a0
Fixes: https://bugs.launchpad.net/qemu/+bug/1599539
Cc: qemu-stable@nongnu.org
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 8ab647c0c6..040fb713ec 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2968,8 +2968,7 @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
 	int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
 {
-    BDRVVVFATState* s = bs->opaque;
-    *n = s->sector_count - sector_num;
+    *n = bs->total_sectors - sector_num;
     if (*n > nb_sectors) {
         *n = nb_sectors;
     } else if (*n < 0) {

From d6a7e54ed3c53ab05e364fdf863becebb90a7111 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:11:54 +0200
Subject: [PATCH 06/40] vvfat: replace tabs by 8 spaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was a complete mess. On 2299 indented lines:
- 1329 were with spaces only
- 617 with tabulations only
- 353 with spaces and tabulations

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 1940 ++++++++++++++++++++++++-------------------------
 1 file changed, 970 insertions(+), 970 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 040fb713ec..e83b8bab5d 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -100,12 +100,12 @@ static inline void* array_get(array_t* array,unsigned int index) {
 static inline int array_ensure_allocated(array_t* array, int index)
 {
     if((index + 1) * array->item_size > array->size) {
-	int new_size = (index + 32) * array->item_size;
-	array->pointer = g_realloc(array->pointer, new_size);
-	if (!array->pointer)
-	    return -1;
-	array->size = new_size;
-	array->next = index + 1;
+        int new_size = (index + 32) * array->item_size;
+        array->pointer = g_realloc(array->pointer, new_size);
+        if (!array->pointer)
+            return -1;
+        array->size = new_size;
+        array->next = index + 1;
     }
 
     return 0;
@@ -115,7 +115,7 @@ static inline void* array_get_next(array_t* array) {
     unsigned int next = array->next;
 
     if (array_ensure_allocated(array, next) < 0)
-	return NULL;
+        return NULL;
 
     array->next = next + 1;
     return array_get(array, next);
@@ -123,15 +123,15 @@ static inline void* array_get_next(array_t* array) {
 
 static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) {
     if((array->next+count)*array->item_size>array->size) {
-	int increment=count*array->item_size;
-	array->pointer=g_realloc(array->pointer,array->size+increment);
-	if(!array->pointer)
+        int increment=count*array->item_size;
+        array->pointer=g_realloc(array->pointer,array->size+increment);
+        if(!array->pointer)
             return NULL;
-	array->size+=increment;
+        array->size+=increment;
     }
     memmove(array->pointer+(index+count)*array->item_size,
-		array->pointer+index*array->item_size,
-		(array->next-index)*array->item_size);
+                array->pointer+index*array->item_size,
+                (array->next-index)*array->item_size);
     array->next+=count;
     return array->pointer+index*array->item_size;
 }
@@ -146,12 +146,12 @@ static inline int array_roll(array_t* array,int index_to,int index_from,int coun
     int is;
 
     if(!array ||
-	    index_to<0 || index_to>=array->next ||
-	    index_from<0 || index_from>=array->next)
-	return -1;
+            index_to<0 || index_to>=array->next ||
+            index_from<0 || index_from>=array->next)
+        return -1;
 
     if(index_to==index_from)
-	return 0;
+        return 0;
 
     is=array->item_size;
     from=array->pointer+index_from*is;
@@ -160,9 +160,9 @@ static inline int array_roll(array_t* array,int index_to,int index_from,int coun
     memcpy(buf,from,is*count);
 
     if(index_to<index_from)
-	memmove(to+is*count,to,from-to);
+        memmove(to+is*count,to,from-to);
     else
-	memmove(from,from+is*count,to-from);
+        memmove(from,from+is*count,to-from);
 
     memcpy(to,buf,is*count);
 
@@ -177,7 +177,7 @@ static inline int array_remove_slice(array_t* array,int index, int count)
     assert(count > 0);
     assert(index + count <= array->next);
     if(array_roll(array,array->next-1,index,count))
-	return -1;
+        return -1;
     array->next -= count;
     return 0;
 }
@@ -216,21 +216,21 @@ typedef struct bootsector_t {
     uint32_t total_sectors;
     union {
         struct {
-	    uint8_t drive_number;
-	    uint8_t current_head;
-	    uint8_t signature;
-	    uint32_t id;
-	    uint8_t volume_label[11];
-	} QEMU_PACKED fat16;
-	struct {
-	    uint32_t sectors_per_fat;
-	    uint16_t flags;
-	    uint8_t major,minor;
-	    uint32_t first_cluster_of_root_directory;
-	    uint16_t info_sector;
-	    uint16_t backup_boot_sector;
-	    uint16_t ignored;
-	} QEMU_PACKED fat32;
+            uint8_t drive_number;
+            uint8_t current_head;
+            uint8_t signature;
+            uint32_t id;
+            uint8_t volume_label[11];
+        } QEMU_PACKED fat16;
+        struct {
+            uint32_t sectors_per_fat;
+            uint16_t flags;
+            uint8_t major,minor;
+            uint32_t first_cluster_of_root_directory;
+            uint16_t info_sector;
+            uint16_t backup_boot_sector;
+            uint16_t ignored;
+        } QEMU_PACKED fat32;
     } u;
     uint8_t fat_type[8];
     uint8_t ignored[0x1c0];
@@ -284,25 +284,25 @@ typedef struct mapping_t {
     /* the clusters of a file may be in any order; this points to the first */
     int first_mapping_index;
     union {
-	/* offset is
-	 * - the offset in the file (in clusters) for a file, or
-	 * - the next cluster of the directory for a directory, and
-	 * - the address of the buffer for a faked entry
-	 */
-	struct {
-	    uint32_t offset;
-	} file;
-	struct {
-	    int parent_mapping_index;
-	    int first_dir_index;
-	} dir;
+        /* offset is
+         * - the offset in the file (in clusters) for a file, or
+         * - the next cluster of the directory for a directory, and
+         * - the address of the buffer for a faked entry
+         */
+        struct {
+            uint32_t offset;
+        } file;
+        struct {
+            int parent_mapping_index;
+            int first_dir_index;
+        } dir;
     } info;
     /* path contains the full path, i.e. it always starts with s->path */
     char* path;
 
     enum { MODE_UNDEFINED = 0, MODE_NORMAL = 1, MODE_MODIFIED = 2,
-	MODE_DIRECTORY = 4, MODE_FAKED = 8,
-	MODE_DELETED = 16, MODE_RENAMED = 32 } mode;
+        MODE_DIRECTORY = 4, MODE_FAKED = 8,
+        MODE_DELETED = 16, MODE_RENAMED = 32 } mode;
     int read_only;
 } mapping_t;
 
@@ -419,12 +419,12 @@ static inline int short2long_name(char* dest,const char* src)
     int len;
     for(i=0;i<129 && src[i];i++) {
         dest[2*i]=src[i];
-	dest[2*i+1]=0;
+        dest[2*i+1]=0;
     }
     len=2*i;
     dest[2*i]=dest[2*i+1]=0;
     for(i=2*i+2;(i%26);i++)
-	dest[i]=0xff;
+        dest[i]=0xff;
     return len;
 }
 
@@ -436,19 +436,19 @@ static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* fil
     direntry_t* entry;
 
     for(i=0;i<number_of_entries;i++) {
-	entry=array_get_next(&(s->directory));
-	entry->attributes=0xf;
-	entry->reserved[0]=0;
-	entry->begin=0;
-	entry->name[0]=(number_of_entries-i)|(i==0?0x40:0);
+        entry=array_get_next(&(s->directory));
+        entry->attributes=0xf;
+        entry->reserved[0]=0;
+        entry->begin=0;
+        entry->name[0]=(number_of_entries-i)|(i==0?0x40:0);
     }
     for(i=0;i<26*number_of_entries;i++) {
-	int offset=(i%26);
-	if(offset<10) offset=1+offset;
-	else if(offset<22) offset=14+offset-10;
-	else offset=28+offset-22;
-	entry=array_get(&(s->directory),s->directory.next-1-(i/26));
-	entry->name[offset]=buffer[i];
+        int offset=(i%26);
+        if(offset<10) offset=1+offset;
+        else if(offset<22) offset=14+offset-10;
+        else offset=28+offset-22;
+        entry=array_get(&(s->directory),s->directory.next-1-(i/26));
+        entry->name[offset]=buffer[i];
     }
     return array_get(&(s->directory),s->directory.next-number_of_entries);
 }
@@ -471,7 +471,7 @@ static char is_long_name(const direntry_t* direntry)
 static char is_short_name(const direntry_t* direntry)
 {
     return !is_volume_label(direntry) && !is_long_name(direntry)
-	&& !is_free(direntry);
+        && !is_free(direntry);
 }
 
 static char is_directory(const direntry_t* direntry)
@@ -527,73 +527,73 @@ static uint16_t fat_datetime(time_t time,int return_time) {
     t = &t1;
     localtime_r(&time,t);
     if(return_time)
-	return cpu_to_le16((t->tm_sec/2)|(t->tm_min<<5)|(t->tm_hour<<11));
+        return cpu_to_le16((t->tm_sec/2)|(t->tm_min<<5)|(t->tm_hour<<11));
     return cpu_to_le16((t->tm_mday)|((t->tm_mon+1)<<5)|((t->tm_year-80)<<9));
 }
 
 static inline void fat_set(BDRVVVFATState* s,unsigned int cluster,uint32_t value)
 {
     if(s->fat_type==32) {
-	uint32_t* entry=array_get(&(s->fat),cluster);
-	*entry=cpu_to_le32(value);
+        uint32_t* entry=array_get(&(s->fat),cluster);
+        *entry=cpu_to_le32(value);
     } else if(s->fat_type==16) {
-	uint16_t* entry=array_get(&(s->fat),cluster);
-	*entry=cpu_to_le16(value&0xffff);
+        uint16_t* entry=array_get(&(s->fat),cluster);
+        *entry=cpu_to_le16(value&0xffff);
     } else {
-	int offset = (cluster*3/2);
-	unsigned char* p = array_get(&(s->fat), offset);
+        int offset = (cluster*3/2);
+        unsigned char* p = array_get(&(s->fat), offset);
         switch (cluster&1) {
-	case 0:
-		p[0] = value&0xff;
-		p[1] = (p[1]&0xf0) | ((value>>8)&0xf);
-		break;
-	case 1:
-		p[0] = (p[0]&0xf) | ((value&0xf)<<4);
-		p[1] = (value>>4);
-		break;
-	}
+        case 0:
+                p[0] = value&0xff;
+                p[1] = (p[1]&0xf0) | ((value>>8)&0xf);
+                break;
+        case 1:
+                p[0] = (p[0]&0xf) | ((value&0xf)<<4);
+                p[1] = (value>>4);
+                break;
+        }
     }
 }
 
 static inline uint32_t fat_get(BDRVVVFATState* s,unsigned int cluster)
 {
     if(s->fat_type==32) {
-	uint32_t* entry=array_get(&(s->fat),cluster);
-	return le32_to_cpu(*entry);
+        uint32_t* entry=array_get(&(s->fat),cluster);
+        return le32_to_cpu(*entry);
     } else if(s->fat_type==16) {
-	uint16_t* entry=array_get(&(s->fat),cluster);
-	return le16_to_cpu(*entry);
+        uint16_t* entry=array_get(&(s->fat),cluster);
+        return le16_to_cpu(*entry);
     } else {
-	const uint8_t* x=(uint8_t*)(s->fat.pointer)+cluster*3/2;
-	return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff;
+        const uint8_t* x=(uint8_t*)(s->fat.pointer)+cluster*3/2;
+        return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff;
     }
 }
 
 static inline int fat_eof(BDRVVVFATState* s,uint32_t fat_entry)
 {
     if(fat_entry>s->max_fat_value-8)
-	return -1;
+        return -1;
     return 0;
 }
 
 static inline void init_fat(BDRVVVFATState* s)
 {
     if (s->fat_type == 12) {
-	array_init(&(s->fat),1);
-	array_ensure_allocated(&(s->fat),
-		s->sectors_per_fat * 0x200 * 3 / 2 - 1);
+        array_init(&(s->fat),1);
+        array_ensure_allocated(&(s->fat),
+                s->sectors_per_fat * 0x200 * 3 / 2 - 1);
     } else {
-	array_init(&(s->fat),(s->fat_type==32?4:2));
-	array_ensure_allocated(&(s->fat),
-		s->sectors_per_fat * 0x200 / s->fat.item_size - 1);
+        array_init(&(s->fat),(s->fat_type==32?4:2));
+        array_ensure_allocated(&(s->fat),
+                s->sectors_per_fat * 0x200 / s->fat.item_size - 1);
     }
     memset(s->fat.pointer,0,s->fat.size);
 
     switch(s->fat_type) {
-	case 12: s->max_fat_value=0xfff; break;
-	case 16: s->max_fat_value=0xffff; break;
-	case 32: s->max_fat_value=0x0fffffff; break;
-	default: s->max_fat_value=0; /* error... */
+        case 12: s->max_fat_value=0xfff; break;
+        case 16: s->max_fat_value=0xffff; break;
+        case 32: s->max_fat_value=0x0fffffff; break;
+        default: s->max_fat_value=0; /* error... */
     }
 
 }
@@ -601,17 +601,17 @@ static inline void init_fat(BDRVVVFATState* s)
 /* TODO: in create_short_filename, 0xe5->0x05 is not yet handled! */
 /* TODO: in parse_short_filename, 0x05->0xe5 is not yet handled! */
 static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
-	unsigned int directory_start, const char* filename, int is_dot)
+        unsigned int directory_start, const char* filename, int is_dot)
 {
     int i,j,long_index=s->directory.next;
     direntry_t* entry = NULL;
     direntry_t* entry_long = NULL;
 
     if(is_dot) {
-	entry=array_get_next(&(s->directory));
+        entry=array_get_next(&(s->directory));
         memset(entry->name, 0x20, sizeof(entry->name));
-	memcpy(entry->name,filename,strlen(filename));
-	return entry;
+        memcpy(entry->name,filename,strlen(filename));
+        return entry;
     }
 
     entry_long=create_long_filename(s,filename);
@@ -619,9 +619,9 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
     i = strlen(filename);
     for(j = i - 1; j>0  && filename[j]!='.';j--);
     if (j > 0)
-	i = (j > 8 ? 8 : j);
+        i = (j > 8 ? 8 : j);
     else if (i > 8)
-	i = 8;
+        i = 8;
 
     entry=array_get_next(&(s->directory));
     memset(entry->name, 0x20, sizeof(entry->name));
@@ -635,53 +635,53 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
 
     /* upcase & remove unwanted characters */
     for(i=10;i>=0;i--) {
-	if(i==10 || i==7) for(;i>0 && entry->name[i]==' ';i--);
-	if(entry->name[i]<=' ' || entry->name[i]>0x7f
-		|| strchr(".*?<>|\":/\\[];,+='",entry->name[i]))
-	    entry->name[i]='_';
+        if(i==10 || i==7) for(;i>0 && entry->name[i]==' ';i--);
+        if(entry->name[i]<=' ' || entry->name[i]>0x7f
+                || strchr(".*?<>|\":/\\[];,+='",entry->name[i]))
+            entry->name[i]='_';
         else if(entry->name[i]>='a' && entry->name[i]<='z')
             entry->name[i]+='A'-'a';
     }
 
     /* mangle duplicates */
     while(1) {
-	direntry_t* entry1=array_get(&(s->directory),directory_start);
-	int j;
+        direntry_t* entry1=array_get(&(s->directory),directory_start);
+        int j;
 
-	for(;entry1<entry;entry1++)
-	    if(!is_long_name(entry1) && !memcmp(entry1->name,entry->name,11))
-		break; /* found dupe */
-	if(entry1==entry) /* no dupe found */
-	    break;
+        for(;entry1<entry;entry1++)
+            if(!is_long_name(entry1) && !memcmp(entry1->name,entry->name,11))
+                break; /* found dupe */
+        if(entry1==entry) /* no dupe found */
+            break;
 
-	/* use all 8 characters of name */
-	if(entry->name[7]==' ') {
-	    int j;
-	    for(j=6;j>0 && entry->name[j]==' ';j--)
-		entry->name[j]='~';
-	}
+        /* use all 8 characters of name */
+        if(entry->name[7]==' ') {
+            int j;
+            for(j=6;j>0 && entry->name[j]==' ';j--)
+                entry->name[j]='~';
+        }
 
-	/* increment number */
-	for(j=7;j>0 && entry->name[j]=='9';j--)
-	    entry->name[j]='0';
-	if(j>0) {
-	    if(entry->name[j]<'0' || entry->name[j]>'9')
-	        entry->name[j]='0';
-	    else
-	        entry->name[j]++;
-	}
+        /* increment number */
+        for(j=7;j>0 && entry->name[j]=='9';j--)
+            entry->name[j]='0';
+        if(j>0) {
+            if(entry->name[j]<'0' || entry->name[j]>'9')
+                entry->name[j]='0';
+            else
+                entry->name[j]++;
+        }
     }
 
     /* calculate checksum; propagate to long name */
     if(entry_long) {
         uint8_t chksum=fat_chksum(entry);
 
-	/* calculate anew, because realloc could have taken place */
-	entry_long=array_get(&(s->directory),long_index);
-	while(entry_long<entry && is_long_name(entry_long)) {
-	    entry_long->reserved[1]=chksum;
-	    entry_long++;
-	}
+        /* calculate anew, because realloc could have taken place */
+        entry_long=array_get(&(s->directory),long_index);
+        while(entry_long<entry && is_long_name(entry_long)) {
+            entry_long->reserved[1]=chksum;
+            entry_long++;
+        }
     }
 
     return entry;
@@ -708,80 +708,80 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
     assert(mapping->mode & MODE_DIRECTORY);
 
     if(!dir) {
-	mapping->end = mapping->begin;
-	return -1;
+        mapping->end = mapping->begin;
+        return -1;
     }
 
     i = mapping->info.dir.first_dir_index =
-	    first_cluster == 0 ? 0 : s->directory.next;
+            first_cluster == 0 ? 0 : s->directory.next;
 
     /* actually read the directory, and allocate the mappings */
     while((entry=readdir(dir))) {
-	unsigned int length=strlen(dirname)+2+strlen(entry->d_name);
+        unsigned int length=strlen(dirname)+2+strlen(entry->d_name);
         char* buffer;
-	direntry_t* direntry;
+        direntry_t* direntry;
         struct stat st;
-	int is_dot=!strcmp(entry->d_name,".");
-	int is_dotdot=!strcmp(entry->d_name,"..");
+        int is_dot=!strcmp(entry->d_name,".");
+        int is_dotdot=!strcmp(entry->d_name,"..");
 
-	if(first_cluster == 0 && (is_dotdot || is_dot))
-	    continue;
+        if(first_cluster == 0 && (is_dotdot || is_dot))
+            continue;
 
-	buffer = g_malloc(length);
-	snprintf(buffer,length,"%s/%s",dirname,entry->d_name);
+        buffer = g_malloc(length);
+        snprintf(buffer,length,"%s/%s",dirname,entry->d_name);
 
-	if(stat(buffer,&st)<0) {
+        if(stat(buffer,&st)<0) {
             g_free(buffer);
             continue;
-	}
+        }
 
-	/* create directory entry for this file */
-	direntry=create_short_and_long_name(s, i, entry->d_name,
-		is_dot || is_dotdot);
-	direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20);
-	direntry->reserved[0]=direntry->reserved[1]=0;
-	direntry->ctime=fat_datetime(st.st_ctime,1);
-	direntry->cdate=fat_datetime(st.st_ctime,0);
-	direntry->adate=fat_datetime(st.st_atime,0);
-	direntry->begin_hi=0;
-	direntry->mtime=fat_datetime(st.st_mtime,1);
-	direntry->mdate=fat_datetime(st.st_mtime,0);
-	if(is_dotdot)
-	    set_begin_of_direntry(direntry, first_cluster_of_parent);
-	else if(is_dot)
-	    set_begin_of_direntry(direntry, first_cluster);
-	else
-	    direntry->begin=0; /* do that later */
+        /* create directory entry for this file */
+        direntry=create_short_and_long_name(s, i, entry->d_name,
+                is_dot || is_dotdot);
+        direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20);
+        direntry->reserved[0]=direntry->reserved[1]=0;
+        direntry->ctime=fat_datetime(st.st_ctime,1);
+        direntry->cdate=fat_datetime(st.st_ctime,0);
+        direntry->adate=fat_datetime(st.st_atime,0);
+        direntry->begin_hi=0;
+        direntry->mtime=fat_datetime(st.st_mtime,1);
+        direntry->mdate=fat_datetime(st.st_mtime,0);
+        if(is_dotdot)
+            set_begin_of_direntry(direntry, first_cluster_of_parent);
+        else if(is_dot)
+            set_begin_of_direntry(direntry, first_cluster);
+        else
+            direntry->begin=0; /* do that later */
         if (st.st_size > 0x7fffffff) {
-	    fprintf(stderr, "File %s is larger than 2GB\n", buffer);
+            fprintf(stderr, "File %s is larger than 2GB\n", buffer);
             g_free(buffer);
             closedir(dir);
-	    return -2;
+            return -2;
         }
-	direntry->size=cpu_to_le32(S_ISDIR(st.st_mode)?0:st.st_size);
+        direntry->size=cpu_to_le32(S_ISDIR(st.st_mode)?0:st.st_size);
 
-	/* create mapping for this file */
-	if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) {
-	    s->current_mapping = array_get_next(&(s->mapping));
-	    s->current_mapping->begin=0;
-	    s->current_mapping->end=st.st_size;
-	    /*
-	     * we get the direntry of the most recent direntry, which
-	     * contains the short name and all the relevant information.
-	     */
-	    s->current_mapping->dir_index=s->directory.next-1;
-	    s->current_mapping->first_mapping_index = -1;
-	    if (S_ISDIR(st.st_mode)) {
-		s->current_mapping->mode = MODE_DIRECTORY;
-		s->current_mapping->info.dir.parent_mapping_index =
-		    mapping_index;
-	    } else {
-		s->current_mapping->mode = MODE_UNDEFINED;
-		s->current_mapping->info.file.offset = 0;
-	    }
-	    s->current_mapping->path=buffer;
-	    s->current_mapping->read_only =
-		(st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0;
+        /* create mapping for this file */
+        if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) {
+            s->current_mapping = array_get_next(&(s->mapping));
+            s->current_mapping->begin=0;
+            s->current_mapping->end=st.st_size;
+            /*
+             * we get the direntry of the most recent direntry, which
+             * contains the short name and all the relevant information.
+             */
+            s->current_mapping->dir_index=s->directory.next-1;
+            s->current_mapping->first_mapping_index = -1;
+            if (S_ISDIR(st.st_mode)) {
+                s->current_mapping->mode = MODE_DIRECTORY;
+                s->current_mapping->info.dir.parent_mapping_index =
+                    mapping_index;
+            } else {
+                s->current_mapping->mode = MODE_UNDEFINED;
+                s->current_mapping->info.file.offset = 0;
+            }
+            s->current_mapping->path=buffer;
+            s->current_mapping->read_only =
+                (st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0;
         } else {
             g_free(buffer);
         }
@@ -790,25 +790,25 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
 
     /* fill with zeroes up to the end of the cluster */
     while(s->directory.next%(0x10*s->sectors_per_cluster)) {
-	direntry_t* direntry=array_get_next(&(s->directory));
-	memset(direntry,0,sizeof(direntry_t));
+        direntry_t* direntry=array_get_next(&(s->directory));
+        memset(direntry,0,sizeof(direntry_t));
     }
 
 /* TODO: if there are more entries, bootsector has to be adjusted! */
 #define ROOT_ENTRIES (0x02 * 0x10 * s->sectors_per_cluster)
     if (mapping_index == 0 && s->directory.next < ROOT_ENTRIES) {
-	/* root directory */
-	int cur = s->directory.next;
-	array_ensure_allocated(&(s->directory), ROOT_ENTRIES - 1);
-	s->directory.next = ROOT_ENTRIES;
-	memset(array_get(&(s->directory), cur), 0,
-		(ROOT_ENTRIES - cur) * sizeof(direntry_t));
+        /* root directory */
+        int cur = s->directory.next;
+        array_ensure_allocated(&(s->directory), ROOT_ENTRIES - 1);
+        s->directory.next = ROOT_ENTRIES;
+        memset(array_get(&(s->directory), cur), 0,
+                (ROOT_ENTRIES - cur) * sizeof(direntry_t));
     }
 
      /* reget the mapping, since s->mapping was possibly realloc()ed */
     mapping = array_get(&(s->mapping), mapping_index);
     first_cluster += (s->directory.next - mapping->info.dir.first_dir_index)
-	* 0x20 / s->cluster_size;
+        * 0x20 / s->cluster_size;
     mapping->end = first_cluster;
 
     direntry = array_get(&(s->directory), mapping->dir_index);
@@ -856,8 +856,8 @@ static int init_directories(BDRVVVFATState* s,
 
     /* add volume label */
     {
-	direntry_t* entry=array_get_next(&(s->directory));
-	entry->attributes=0x28; /* archive | volume label */
+        direntry_t* entry=array_get_next(&(s->directory));
+        entry->attributes=0x28; /* archive | volume label */
         memcpy(entry->name, s->volume_label, sizeof(entry->name));
     }
 
@@ -875,61 +875,61 @@ static int init_directories(BDRVVVFATState* s,
     mapping->path = g_strdup(dirname);
     i = strlen(mapping->path);
     if (i > 0 && mapping->path[i - 1] == '/')
-	mapping->path[i - 1] = '\0';
+        mapping->path[i - 1] = '\0';
     mapping->mode = MODE_DIRECTORY;
     mapping->read_only = 0;
     s->path = mapping->path;
 
     for (i = 0, cluster = 0; i < s->mapping.next; i++) {
-	/* MS-DOS expects the FAT to be 0 for the root directory
-	 * (except for the media byte). */
-	/* LATER TODO: still true for FAT32? */
-	int fix_fat = (i != 0);
-	mapping = array_get(&(s->mapping), i);
+        /* MS-DOS expects the FAT to be 0 for the root directory
+         * (except for the media byte). */
+        /* LATER TODO: still true for FAT32? */
+        int fix_fat = (i != 0);
+        mapping = array_get(&(s->mapping), i);
 
         if (mapping->mode & MODE_DIRECTORY) {
-	    mapping->begin = cluster;
-	    if(read_directory(s, i)) {
+            mapping->begin = cluster;
+            if(read_directory(s, i)) {
                 error_setg(errp, "Could not read directory %s",
                            mapping->path);
-		return -1;
-	    }
-	    mapping = array_get(&(s->mapping), i);
-	} else {
-	    assert(mapping->mode == MODE_UNDEFINED);
-	    mapping->mode=MODE_NORMAL;
-	    mapping->begin = cluster;
-	    if (mapping->end > 0) {
-		direntry_t* direntry = array_get(&(s->directory),
-			mapping->dir_index);
+                return -1;
+            }
+            mapping = array_get(&(s->mapping), i);
+        } else {
+            assert(mapping->mode == MODE_UNDEFINED);
+            mapping->mode=MODE_NORMAL;
+            mapping->begin = cluster;
+            if (mapping->end > 0) {
+                direntry_t* direntry = array_get(&(s->directory),
+                        mapping->dir_index);
 
-		mapping->end = cluster + 1 + (mapping->end-1)/s->cluster_size;
-		set_begin_of_direntry(direntry, mapping->begin);
-	    } else {
-		mapping->end = cluster + 1;
-		fix_fat = 0;
-	    }
-	}
+                mapping->end = cluster + 1 + (mapping->end-1)/s->cluster_size;
+                set_begin_of_direntry(direntry, mapping->begin);
+            } else {
+                mapping->end = cluster + 1;
+                fix_fat = 0;
+            }
+        }
 
-	assert(mapping->begin < mapping->end);
+        assert(mapping->begin < mapping->end);
 
-	/* next free cluster */
-	cluster = mapping->end;
+        /* next free cluster */
+        cluster = mapping->end;
 
-	if(cluster > s->cluster_count) {
+        if(cluster > s->cluster_count) {
             error_setg(errp,
                        "Directory does not fit in FAT%d (capacity %.2f MB)",
                        s->fat_type, s->sector_count / 2000.0);
             return -1;
-	}
+        }
 
-	/* fix fat for entry */
-	if (fix_fat) {
-	    int j;
-	    for(j = mapping->begin; j < mapping->end - 1; j++)
-		fat_set(s, j, j+1);
-	    fat_set(s, mapping->end - 1, s->max_fat_value);
-	}
+        /* fix fat for entry */
+        if (fix_fat) {
+            int j;
+            for(j = mapping->begin; j < mapping->end - 1; j++)
+                fat_set(s, j, j+1);
+            fat_set(s, mapping->end - 1, s->max_fat_value);
+        }
     }
 
     mapping = array_get(&(s->mapping), 0);
@@ -1135,7 +1135,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
 
     switch (s->fat_type) {
     case 32:
-	    fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. "
+            fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. "
                 "You are welcome to do so!\n");
         break;
     case 16:
@@ -1229,11 +1229,11 @@ static void vvfat_refresh_limits(BlockDriverState *bs, Error **errp)
 static inline void vvfat_close_current_file(BDRVVVFATState *s)
 {
     if(s->current_mapping) {
-	s->current_mapping = NULL;
-	if (s->current_fd) {
-		qemu_close(s->current_fd);
-		s->current_fd = 0;
-	}
+        s->current_mapping = NULL;
+        if (s->current_fd) {
+                qemu_close(s->current_fd);
+                s->current_fd = 0;
+        }
     }
     s->current_cluster = -1;
 }
@@ -1245,26 +1245,26 @@ static inline int find_mapping_for_cluster_aux(BDRVVVFATState* s,int cluster_num
 {
     while(1) {
         int index3;
-	mapping_t* mapping;
-	index3=(index1+index2)/2;
-	mapping=array_get(&(s->mapping),index3);
-	assert(mapping->begin < mapping->end);
-	if(mapping->begin>=cluster_num) {
-	    assert(index2!=index3 || index2==0);
-	    if(index2==index3)
-		return index1;
-	    index2=index3;
-	} else {
-	    if(index1==index3)
-		return mapping->end<=cluster_num ? index2 : index1;
-	    index1=index3;
-	}
-	assert(index1<=index2);
-	DLOG(mapping=array_get(&(s->mapping),index1);
-	assert(mapping->begin<=cluster_num);
-	assert(index2 >= s->mapping.next ||
-		((mapping = array_get(&(s->mapping),index2)) &&
-		mapping->end>cluster_num)));
+        mapping_t* mapping;
+        index3=(index1+index2)/2;
+        mapping=array_get(&(s->mapping),index3);
+        assert(mapping->begin < mapping->end);
+        if(mapping->begin>=cluster_num) {
+            assert(index2!=index3 || index2==0);
+            if(index2==index3)
+                return index1;
+            index2=index3;
+        } else {
+            if(index1==index3)
+                return mapping->end<=cluster_num ? index2 : index1;
+            index1=index3;
+        }
+        assert(index1<=index2);
+        DLOG(mapping=array_get(&(s->mapping),index1);
+        assert(mapping->begin<=cluster_num);
+        assert(index2 >= s->mapping.next ||
+                ((mapping = array_get(&(s->mapping),index2)) &&
+                mapping->end>cluster_num)));
     }
 }
 
@@ -1284,16 +1284,16 @@ static inline mapping_t* find_mapping_for_cluster(BDRVVVFATState* s,int cluster_
 static int open_file(BDRVVVFATState* s,mapping_t* mapping)
 {
     if(!mapping)
-	return -1;
+        return -1;
     if(!s->current_mapping ||
-	    strcmp(s->current_mapping->path,mapping->path)) {
-	/* open file */
-	int fd = qemu_open(mapping->path, O_RDONLY | O_BINARY | O_LARGEFILE);
-	if(fd<0)
-	    return -1;
-	vvfat_close_current_file(s);
-	s->current_fd = fd;
-	s->current_mapping = mapping;
+            strcmp(s->current_mapping->path,mapping->path)) {
+        /* open file */
+        int fd = qemu_open(mapping->path, O_RDONLY | O_BINARY | O_LARGEFILE);
+        if(fd<0)
+            return -1;
+        vvfat_close_current_file(s);
+        s->current_fd = fd;
+        s->current_mapping = mapping;
     }
     return 0;
 }
@@ -1301,47 +1301,47 @@ static int open_file(BDRVVVFATState* s,mapping_t* mapping)
 static inline int read_cluster(BDRVVVFATState *s,int cluster_num)
 {
     if(s->current_cluster != cluster_num) {
-	int result=0;
-	off_t offset;
-	assert(!s->current_mapping || s->current_fd || (s->current_mapping->mode & MODE_DIRECTORY));
-	if(!s->current_mapping
-		|| s->current_mapping->begin>cluster_num
-		|| s->current_mapping->end<=cluster_num) {
-	    /* binary search of mappings for file */
-	    mapping_t* mapping=find_mapping_for_cluster(s,cluster_num);
+        int result=0;
+        off_t offset;
+        assert(!s->current_mapping || s->current_fd || (s->current_mapping->mode & MODE_DIRECTORY));
+        if(!s->current_mapping
+                || s->current_mapping->begin>cluster_num
+                || s->current_mapping->end<=cluster_num) {
+            /* binary search of mappings for file */
+            mapping_t* mapping=find_mapping_for_cluster(s,cluster_num);
 
-	    assert(!mapping || (cluster_num>=mapping->begin && cluster_num<mapping->end));
+            assert(!mapping || (cluster_num>=mapping->begin && cluster_num<mapping->end));
 
-	    if (mapping && mapping->mode & MODE_DIRECTORY) {
-		vvfat_close_current_file(s);
-		s->current_mapping = mapping;
+            if (mapping && mapping->mode & MODE_DIRECTORY) {
+                vvfat_close_current_file(s);
+                s->current_mapping = mapping;
 read_cluster_directory:
-		offset = s->cluster_size*(cluster_num-s->current_mapping->begin);
-		s->cluster = (unsigned char*)s->directory.pointer+offset
-			+ 0x20*s->current_mapping->info.dir.first_dir_index;
-		assert(((s->cluster-(unsigned char*)s->directory.pointer)%s->cluster_size)==0);
-		assert((char*)s->cluster+s->cluster_size <= s->directory.pointer+s->directory.next*s->directory.item_size);
-		s->current_cluster = cluster_num;
-		return 0;
-	    }
+                offset = s->cluster_size*(cluster_num-s->current_mapping->begin);
+                s->cluster = (unsigned char*)s->directory.pointer+offset
+                        + 0x20*s->current_mapping->info.dir.first_dir_index;
+                assert(((s->cluster-(unsigned char*)s->directory.pointer)%s->cluster_size)==0);
+                assert((char*)s->cluster+s->cluster_size <= s->directory.pointer+s->directory.next*s->directory.item_size);
+                s->current_cluster = cluster_num;
+                return 0;
+            }
 
-	    if(open_file(s,mapping))
-		return -2;
-	} else if (s->current_mapping->mode & MODE_DIRECTORY)
-	    goto read_cluster_directory;
+            if(open_file(s,mapping))
+                return -2;
+        } else if (s->current_mapping->mode & MODE_DIRECTORY)
+            goto read_cluster_directory;
 
-	assert(s->current_fd);
+        assert(s->current_fd);
 
-	offset=s->cluster_size*(cluster_num-s->current_mapping->begin)+s->current_mapping->info.file.offset;
-	if(lseek(s->current_fd, offset, SEEK_SET)!=offset)
-	    return -3;
-	s->cluster=s->cluster_buffer;
-	result=read(s->current_fd,s->cluster,s->cluster_size);
-	if(result<0) {
-	    s->current_cluster = -1;
-	    return -1;
-	}
-	s->current_cluster = cluster_num;
+        offset=s->cluster_size*(cluster_num-s->current_mapping->begin)+s->current_mapping->info.file.offset;
+        if(lseek(s->current_fd, offset, SEEK_SET)!=offset)
+            return -3;
+        s->cluster=s->cluster_buffer;
+        result=read(s->current_fd,s->cluster,s->cluster_size);
+        if(result<0) {
+            s->current_cluster = -1;
+            return -1;
+        }
+        s->current_cluster = cluster_num;
     }
     return 0;
 }
@@ -1354,28 +1354,28 @@ static void print_direntry(const direntry_t* direntry)
 
     fprintf(stderr, "direntry %p: ", direntry);
     if(!direntry)
-	return;
+        return;
     if(is_long_name(direntry)) {
-	unsigned char* c=(unsigned char*)direntry;
-	int i;
-	for(i=1;i<11 && c[i] && c[i]!=0xff;i+=2)
+        unsigned char* c=(unsigned char*)direntry;
+        int i;
+        for(i=1;i<11 && c[i] && c[i]!=0xff;i+=2)
 #define ADD_CHAR(c) {buffer[j] = (c); if (buffer[j] < ' ') buffer[j] = 0xb0; j++;}
-	    ADD_CHAR(c[i]);
-	for(i=14;i<26 && c[i] && c[i]!=0xff;i+=2)
-	    ADD_CHAR(c[i]);
-	for(i=28;i<32 && c[i] && c[i]!=0xff;i+=2)
-	    ADD_CHAR(c[i]);
-	buffer[j] = 0;
-	fprintf(stderr, "%s\n", buffer);
+            ADD_CHAR(c[i]);
+        for(i=14;i<26 && c[i] && c[i]!=0xff;i+=2)
+            ADD_CHAR(c[i]);
+        for(i=28;i<32 && c[i] && c[i]!=0xff;i+=2)
+            ADD_CHAR(c[i]);
+        buffer[j] = 0;
+        fprintf(stderr, "%s\n", buffer);
     } else {
-	int i;
-	for(i=0;i<11;i++)
-	    ADD_CHAR(direntry->name[i]);
-	buffer[j] = 0;
-	fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n",
-		buffer,
-		direntry->attributes,
-		begin_of_direntry(direntry),le32_to_cpu(direntry->size));
+        int i;
+        for(i=0;i<11;i++)
+            ADD_CHAR(direntry->name[i]);
+        buffer[j] = 0;
+        fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n",
+                buffer,
+                direntry->attributes,
+                begin_of_direntry(direntry),le32_to_cpu(direntry->size));
     }
 }
 
@@ -1387,9 +1387,9 @@ static void print_mapping(const mapping_t* mapping)
         mapping->first_mapping_index, mapping->path, mapping->mode);
 
     if (mapping->mode & MODE_DIRECTORY)
-	fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index);
+        fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index);
     else
-	fprintf(stderr, "offset = %d\n", mapping->info.file.offset);
+        fprintf(stderr, "offset = %d\n", mapping->info.file.offset);
 }
 #endif
 
@@ -1400,10 +1400,10 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
     int i;
 
     for(i=0;i<nb_sectors;i++,sector_num++) {
-	if (sector_num >= bs->total_sectors)
-	   return -1;
-	if (s->qcow) {
-	    int n;
+        if (sector_num >= bs->total_sectors)
+           return -1;
+        if (s->qcow) {
+            int n;
             int ret;
             ret = bdrv_is_allocated(s->qcow->bs, sector_num,
                                     nb_sectors - i, &n);
@@ -1421,25 +1421,25 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
                 continue;
             }
 DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
-	}
-	if(sector_num<s->faked_sectors) {
-	    if(sector_num<s->first_sectors_number)
-		memcpy(buf+i*0x200,&(s->first_sectors[sector_num*0x200]),0x200);
-	    else if(sector_num-s->first_sectors_number<s->sectors_per_fat)
-		memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number)*0x200]),0x200);
-	    else if(sector_num-s->first_sectors_number-s->sectors_per_fat<s->sectors_per_fat)
-		memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number-s->sectors_per_fat)*0x200]),0x200);
-	} else {
-	    uint32_t sector=sector_num-s->faked_sectors,
-	    sector_offset_in_cluster=(sector%s->sectors_per_cluster),
-	    cluster_num=sector/s->sectors_per_cluster;
-	    if(cluster_num > s->cluster_count || read_cluster(s, cluster_num) != 0) {
-		/* LATER TODO: strict: return -1; */
-		memset(buf+i*0x200,0,0x200);
-		continue;
-	    }
-	    memcpy(buf+i*0x200,s->cluster+sector_offset_in_cluster*0x200,0x200);
-	}
+        }
+        if(sector_num<s->faked_sectors) {
+            if(sector_num<s->first_sectors_number)
+                memcpy(buf+i*0x200,&(s->first_sectors[sector_num*0x200]),0x200);
+            else if(sector_num-s->first_sectors_number<s->sectors_per_fat)
+                memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number)*0x200]),0x200);
+            else if(sector_num-s->first_sectors_number-s->sectors_per_fat<s->sectors_per_fat)
+                memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number-s->sectors_per_fat)*0x200]),0x200);
+        } else {
+            uint32_t sector=sector_num-s->faked_sectors,
+            sector_offset_in_cluster=(sector%s->sectors_per_cluster),
+            cluster_num=sector/s->sectors_per_cluster;
+            if(cluster_num > s->cluster_count || read_cluster(s, cluster_num) != 0) {
+                /* LATER TODO: strict: return -1; */
+                memset(buf+i*0x200,0,0x200);
+                continue;
+            }
+            memcpy(buf+i*0x200,s->cluster+sector_offset_in_cluster*0x200,0x200);
+        }
     }
     return 0;
 }
@@ -1497,14 +1497,14 @@ vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 typedef struct commit_t {
     char* path;
     union {
-	struct { uint32_t cluster; } rename;
-	struct { int dir_index; uint32_t modified_offset; } writeout;
-	struct { uint32_t first_cluster; } new_file;
-	struct { uint32_t cluster; } mkdir;
+        struct { uint32_t cluster; } rename;
+        struct { int dir_index; uint32_t modified_offset; } writeout;
+        struct { uint32_t first_cluster; } new_file;
+        struct { uint32_t cluster; } mkdir;
     } param;
     /* DELETEs and RMDIRs are handled differently: see handle_deletes() */
     enum {
-	ACTION_RENAME, ACTION_WRITEOUT, ACTION_NEW_FILE, ACTION_MKDIR
+        ACTION_RENAME, ACTION_WRITEOUT, ACTION_NEW_FILE, ACTION_MKDIR
     } action;
 } commit_t;
 
@@ -1513,19 +1513,19 @@ static void clear_commits(BDRVVVFATState* s)
     int i;
 DLOG(fprintf(stderr, "clear_commits (%d commits)\n", s->commits.next));
     for (i = 0; i < s->commits.next; i++) {
-	commit_t* commit = array_get(&(s->commits), i);
-	assert(commit->path || commit->action == ACTION_WRITEOUT);
-	if (commit->action != ACTION_WRITEOUT) {
-	    assert(commit->path);
+        commit_t* commit = array_get(&(s->commits), i);
+        assert(commit->path || commit->action == ACTION_WRITEOUT);
+        if (commit->action != ACTION_WRITEOUT) {
+            assert(commit->path);
             g_free(commit->path);
-	} else
-	    assert(commit->path == NULL);
+        } else
+            assert(commit->path == NULL);
     }
     s->commits.next = 0;
 }
 
 static void schedule_rename(BDRVVVFATState* s,
-	uint32_t cluster, char* new_path)
+        uint32_t cluster, char* new_path)
 {
     commit_t* commit = array_get_next(&(s->commits));
     commit->path = new_path;
@@ -1534,7 +1534,7 @@ static void schedule_rename(BDRVVVFATState* s,
 }
 
 static void schedule_writeout(BDRVVVFATState* s,
-	int dir_index, uint32_t modified_offset)
+        int dir_index, uint32_t modified_offset)
 {
     commit_t* commit = array_get_next(&(s->commits));
     commit->path = NULL;
@@ -1544,7 +1544,7 @@ static void schedule_writeout(BDRVVVFATState* s,
 }
 
 static void schedule_new_file(BDRVVVFATState* s,
-	char* path, uint32_t first_cluster)
+        char* path, uint32_t first_cluster)
 {
     commit_t* commit = array_get_next(&(s->commits));
     commit->path = path;
@@ -1579,72 +1579,72 @@ static void lfn_init(long_file_name* lfn)
 
 /* return 0 if parsed successfully, > 0 if no long name, < 0 if error */
 static int parse_long_name(long_file_name* lfn,
-	const direntry_t* direntry)
+        const direntry_t* direntry)
 {
     int i, j, offset;
     const unsigned char* pointer = (const unsigned char*)direntry;
 
     if (!is_long_name(direntry))
-	return 1;
+        return 1;
 
     if (pointer[0] & 0x40) {
-	lfn->sequence_number = pointer[0] & 0x3f;
-	lfn->checksum = pointer[13];
-	lfn->name[0] = 0;
-	lfn->name[lfn->sequence_number * 13] = 0;
+        lfn->sequence_number = pointer[0] & 0x3f;
+        lfn->checksum = pointer[13];
+        lfn->name[0] = 0;
+        lfn->name[lfn->sequence_number * 13] = 0;
     } else if ((pointer[0] & 0x3f) != --lfn->sequence_number)
-	return -1;
+        return -1;
     else if (pointer[13] != lfn->checksum)
-	return -2;
+        return -2;
     else if (pointer[12] || pointer[26] || pointer[27])
-	return -3;
+        return -3;
 
     offset = 13 * (lfn->sequence_number - 1);
     for (i = 0, j = 1; i < 13; i++, j+=2) {
-	if (j == 11)
-	    j = 14;
-	else if (j == 26)
-	    j = 28;
+        if (j == 11)
+            j = 14;
+        else if (j == 26)
+            j = 28;
 
-	if (pointer[j+1] == 0)
-	    lfn->name[offset + i] = pointer[j];
-	else if (pointer[j+1] != 0xff || (pointer[0] & 0x40) == 0)
-	    return -4;
-	else
-	    lfn->name[offset + i] = 0;
+        if (pointer[j+1] == 0)
+            lfn->name[offset + i] = pointer[j];
+        else if (pointer[j+1] != 0xff || (pointer[0] & 0x40) == 0)
+            return -4;
+        else
+            lfn->name[offset + i] = 0;
     }
 
     if (pointer[0] & 0x40)
-	lfn->len = offset + strlen((char*)lfn->name + offset);
+        lfn->len = offset + strlen((char*)lfn->name + offset);
 
     return 0;
 }
 
 /* returns 0 if successful, >0 if no short_name, and <0 on error */
 static int parse_short_name(BDRVVVFATState* s,
-	long_file_name* lfn, direntry_t* direntry)
+        long_file_name* lfn, direntry_t* direntry)
 {
     int i, j;
 
     if (!is_short_name(direntry))
-	return 1;
+        return 1;
 
     for (j = 7; j >= 0 && direntry->name[j] == ' '; j--);
     for (i = 0; i <= j; i++) {
-	if (direntry->name[i] <= ' ' || direntry->name[i] > 0x7f)
-	    return -1;
-	else if (s->downcase_short_names)
-	    lfn->name[i] = qemu_tolower(direntry->name[i]);
-	else
-	    lfn->name[i] = direntry->name[i];
+        if (direntry->name[i] <= ' ' || direntry->name[i] > 0x7f)
+            return -1;
+        else if (s->downcase_short_names)
+            lfn->name[i] = qemu_tolower(direntry->name[i]);
+        else
+            lfn->name[i] = direntry->name[i];
     }
 
     for (j = 2; j >= 0 && direntry->name[8 + j] == ' '; j--) {
     }
     if (j >= 0) {
-	lfn->name[i++] = '.';
-	lfn->name[i + j + 1] = '\0';
-	for (;j >= 0; j--) {
+        lfn->name[i++] = '.';
+        lfn->name[i + j + 1] = '\0';
+        for (;j >= 0; j--) {
             uint8_t c = direntry->name[8 + j];
             if (c <= ' ' || c > 0x7f) {
                 return -2;
@@ -1653,9 +1653,9 @@ static int parse_short_name(BDRVVVFATState* s,
             } else {
                 lfn->name[i + j] = c;
             }
-	}
+        }
     } else
-	lfn->name[i + j + 1] = '\0';
+        lfn->name[i + j + 1] = '\0';
 
     lfn->len = strlen((char*)lfn->name);
 
@@ -1663,13 +1663,13 @@ static int parse_short_name(BDRVVVFATState* s,
 }
 
 static inline uint32_t modified_fat_get(BDRVVVFATState* s,
-	unsigned int cluster)
+        unsigned int cluster)
 {
     if (cluster < s->last_cluster_of_root_directory) {
-	if (cluster + 1 == s->last_cluster_of_root_directory)
-	    return s->max_fat_value;
-	else
-	    return cluster + 1;
+        if (cluster + 1 == s->last_cluster_of_root_directory)
+            return s->max_fat_value;
+        else
+            return cluster + 1;
     }
 
     if (s->fat_type==32) {
@@ -1713,9 +1713,9 @@ static const char* get_basename(const char* path)
 {
     char* basename = strrchr(path, '/');
     if (basename == NULL)
-	return path;
+        return path;
     else
-	return basename + 1; /* strip '/' */
+        return basename + 1; /* strip '/' */
 }
 
 /*
@@ -1740,7 +1740,7 @@ typedef enum {
  * assumed to be *not* deleted (and *only* those).
  */
 static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
-	direntry_t* direntry, const char* path)
+        direntry_t* direntry, const char* path)
 {
     /*
      * This is a little bit tricky:
@@ -1773,85 +1773,85 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
 
     /* the root directory */
     if (cluster_num == 0)
-	return 0;
+        return 0;
 
     /* write support */
     if (s->qcow) {
-	basename2 = get_basename(path);
+        basename2 = get_basename(path);
 
-	mapping = find_mapping_for_cluster(s, cluster_num);
+        mapping = find_mapping_for_cluster(s, cluster_num);
 
-	if (mapping) {
-	    const char* basename;
+        if (mapping) {
+            const char* basename;
 
-	    assert(mapping->mode & MODE_DELETED);
-	    mapping->mode &= ~MODE_DELETED;
+            assert(mapping->mode & MODE_DELETED);
+            mapping->mode &= ~MODE_DELETED;
 
-	    basename = get_basename(mapping->path);
+            basename = get_basename(mapping->path);
 
-	    assert(mapping->mode & MODE_NORMAL);
+            assert(mapping->mode & MODE_NORMAL);
 
-	    /* rename */
-	    if (strcmp(basename, basename2))
-		schedule_rename(s, cluster_num, g_strdup(path));
-	} else if (is_file(direntry))
-	    /* new file */
-	    schedule_new_file(s, g_strdup(path), cluster_num);
-	else {
+            /* rename */
+            if (strcmp(basename, basename2))
+                schedule_rename(s, cluster_num, g_strdup(path));
+        } else if (is_file(direntry))
+            /* new file */
+            schedule_new_file(s, g_strdup(path), cluster_num);
+        else {
             abort();
-	    return 0;
-	}
+            return 0;
+        }
     }
 
     while(1) {
-	if (s->qcow) {
-	    if (!copy_it && cluster_was_modified(s, cluster_num)) {
-		if (mapping == NULL ||
-			mapping->begin > cluster_num ||
-			mapping->end <= cluster_num)
-		mapping = find_mapping_for_cluster(s, cluster_num);
+        if (s->qcow) {
+            if (!copy_it && cluster_was_modified(s, cluster_num)) {
+                if (mapping == NULL ||
+                        mapping->begin > cluster_num ||
+                        mapping->end <= cluster_num)
+                mapping = find_mapping_for_cluster(s, cluster_num);
 
 
-		if (mapping &&
-			(mapping->mode & MODE_DIRECTORY) == 0) {
+                if (mapping &&
+                        (mapping->mode & MODE_DIRECTORY) == 0) {
 
-		    /* was modified in qcow */
-		    if (offset != mapping->info.file.offset + s->cluster_size
-			    * (cluster_num - mapping->begin)) {
-			/* offset of this cluster in file chain has changed */
+                    /* was modified in qcow */
+                    if (offset != mapping->info.file.offset + s->cluster_size
+                            * (cluster_num - mapping->begin)) {
+                        /* offset of this cluster in file chain has changed */
                         abort();
-			copy_it = 1;
-		    } else if (offset == 0) {
-			const char* basename = get_basename(mapping->path);
+                        copy_it = 1;
+                    } else if (offset == 0) {
+                        const char* basename = get_basename(mapping->path);
 
-			if (strcmp(basename, basename2))
-			    copy_it = 1;
-			first_mapping_index = array_index(&(s->mapping), mapping);
-		    }
+                        if (strcmp(basename, basename2))
+                            copy_it = 1;
+                        first_mapping_index = array_index(&(s->mapping), mapping);
+                    }
 
-		    if (mapping->first_mapping_index != first_mapping_index
-			    && mapping->info.file.offset > 0) {
+                    if (mapping->first_mapping_index != first_mapping_index
+                            && mapping->info.file.offset > 0) {
                         abort();
-			copy_it = 1;
-		    }
+                        copy_it = 1;
+                    }
 
-		    /* need to write out? */
-		    if (!was_modified && is_file(direntry)) {
-			was_modified = 1;
-			schedule_writeout(s, mapping->dir_index, offset);
-		    }
-		}
-	    }
+                    /* need to write out? */
+                    if (!was_modified && is_file(direntry)) {
+                        was_modified = 1;
+                        schedule_writeout(s, mapping->dir_index, offset);
+                    }
+                }
+            }
 
-	    if (copy_it) {
-		int i, dummy;
-		/*
-		 * This is horribly inefficient, but that is okay, since
-		 * it is rarely executed, if at all.
-		 */
-		int64_t offset = cluster2sector(s, cluster_num);
+            if (copy_it) {
+                int i, dummy;
+                /*
+                 * This is horribly inefficient, but that is okay, since
+                 * it is rarely executed, if at all.
+                 */
+                int64_t offset = cluster2sector(s, cluster_num);
 
-		vvfat_close_current_file(s);
+                vvfat_close_current_file(s);
                 for (i = 0; i < s->sectors_per_cluster; i++) {
                     int res;
 
@@ -1870,22 +1870,22 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
                         }
                     }
                 }
-	    }
-	}
+            }
+        }
 
-	ret++;
-	if (s->used_clusters[cluster_num] & USED_ANY)
-	    return 0;
-	s->used_clusters[cluster_num] = USED_FILE;
+        ret++;
+        if (s->used_clusters[cluster_num] & USED_ANY)
+            return 0;
+        s->used_clusters[cluster_num] = USED_FILE;
 
-	cluster_num = modified_fat_get(s, cluster_num);
+        cluster_num = modified_fat_get(s, cluster_num);
 
-	if (fat_eof(s, cluster_num))
-	    return ret;
-	else if (cluster_num < 2 || cluster_num > s->max_fat_value - 16)
-	    return -1;
+        if (fat_eof(s, cluster_num))
+            return ret;
+        else if (cluster_num < 2 || cluster_num > s->max_fat_value - 16)
+            return -1;
 
-	offset += s->cluster_size;
+        offset += s->cluster_size;
     }
 }
 
@@ -1895,7 +1895,7 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
  * used by the directory, its subdirectories and their files.
  */
 static int check_directory_consistency(BDRVVVFATState *s,
-	int cluster_num, const char* path)
+        int cluster_num, const char* path)
 {
     int ret = 0;
     unsigned char* cluster = g_malloc(s->cluster_size);
@@ -1912,104 +1912,104 @@ static int check_directory_consistency(BDRVVVFATState *s,
     path2[path_len + 1] = '\0';
 
     if (mapping) {
-	const char* basename = get_basename(mapping->path);
-	const char* basename2 = get_basename(path);
+        const char* basename = get_basename(mapping->path);
+        const char* basename2 = get_basename(path);
 
-	assert(mapping->mode & MODE_DIRECTORY);
+        assert(mapping->mode & MODE_DIRECTORY);
 
-	assert(mapping->mode & MODE_DELETED);
-	mapping->mode &= ~MODE_DELETED;
+        assert(mapping->mode & MODE_DELETED);
+        mapping->mode &= ~MODE_DELETED;
 
-	if (strcmp(basename, basename2))
-	    schedule_rename(s, cluster_num, g_strdup(path));
+        if (strcmp(basename, basename2))
+            schedule_rename(s, cluster_num, g_strdup(path));
     } else
-	/* new directory */
-	schedule_mkdir(s, cluster_num, g_strdup(path));
+        /* new directory */
+        schedule_mkdir(s, cluster_num, g_strdup(path));
 
     lfn_init(&lfn);
     do {
-	int i;
-	int subret = 0;
+        int i;
+        int subret = 0;
 
-	ret++;
+        ret++;
 
-	if (s->used_clusters[cluster_num] & USED_ANY) {
-	    fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num);
+        if (s->used_clusters[cluster_num] & USED_ANY) {
+            fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num);
             goto fail;
-	}
-	s->used_clusters[cluster_num] = USED_DIRECTORY;
+        }
+        s->used_clusters[cluster_num] = USED_DIRECTORY;
 
 DLOG(fprintf(stderr, "read cluster %d (sector %d)\n", (int)cluster_num, (int)cluster2sector(s, cluster_num)));
-	subret = vvfat_read(s->bs, cluster2sector(s, cluster_num), cluster,
-		s->sectors_per_cluster);
-	if (subret) {
-	    fprintf(stderr, "Error fetching direntries\n");
-	fail:
+        subret = vvfat_read(s->bs, cluster2sector(s, cluster_num), cluster,
+                s->sectors_per_cluster);
+        if (subret) {
+            fprintf(stderr, "Error fetching direntries\n");
+        fail:
             g_free(cluster);
-	    return 0;
-	}
+            return 0;
+        }
 
-	for (i = 0; i < 0x10 * s->sectors_per_cluster; i++) {
-	    int cluster_count = 0;
+        for (i = 0; i < 0x10 * s->sectors_per_cluster; i++) {
+            int cluster_count = 0;
 
 DLOG(fprintf(stderr, "check direntry %d:\n", i); print_direntry(direntries + i));
-	    if (is_volume_label(direntries + i) || is_dot(direntries + i) ||
-		    is_free(direntries + i))
-		continue;
+            if (is_volume_label(direntries + i) || is_dot(direntries + i) ||
+                    is_free(direntries + i))
+                continue;
 
-	    subret = parse_long_name(&lfn, direntries + i);
-	    if (subret < 0) {
-		fprintf(stderr, "Error in long name\n");
-		goto fail;
-	    }
-	    if (subret == 0 || is_free(direntries + i))
-		continue;
+            subret = parse_long_name(&lfn, direntries + i);
+            if (subret < 0) {
+                fprintf(stderr, "Error in long name\n");
+                goto fail;
+            }
+            if (subret == 0 || is_free(direntries + i))
+                continue;
 
-	    if (fat_chksum(direntries+i) != lfn.checksum) {
-		subret = parse_short_name(s, &lfn, direntries + i);
-		if (subret < 0) {
-		    fprintf(stderr, "Error in short name (%d)\n", subret);
-		    goto fail;
-		}
-		if (subret > 0 || !strcmp((char*)lfn.name, ".")
-			|| !strcmp((char*)lfn.name, ".."))
-		    continue;
-	    }
-	    lfn.checksum = 0x100; /* cannot use long name twice */
+            if (fat_chksum(direntries+i) != lfn.checksum) {
+                subret = parse_short_name(s, &lfn, direntries + i);
+                if (subret < 0) {
+                    fprintf(stderr, "Error in short name (%d)\n", subret);
+                    goto fail;
+                }
+                if (subret > 0 || !strcmp((char*)lfn.name, ".")
+                        || !strcmp((char*)lfn.name, ".."))
+                    continue;
+            }
+            lfn.checksum = 0x100; /* cannot use long name twice */
 
-	    if (path_len + 1 + lfn.len >= PATH_MAX) {
-		fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name);
-		goto fail;
-	    }
+            if (path_len + 1 + lfn.len >= PATH_MAX) {
+                fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name);
+                goto fail;
+            }
             pstrcpy(path2 + path_len + 1, sizeof(path2) - path_len - 1,
                     (char*)lfn.name);
 
-	    if (is_directory(direntries + i)) {
-		if (begin_of_direntry(direntries + i) == 0) {
-		    DLOG(fprintf(stderr, "invalid begin for directory: %s\n", path2); print_direntry(direntries + i));
-		    goto fail;
-		}
-		cluster_count = check_directory_consistency(s,
-			begin_of_direntry(direntries + i), path2);
-		if (cluster_count == 0) {
-		    DLOG(fprintf(stderr, "problem in directory %s:\n", path2); print_direntry(direntries + i));
-		    goto fail;
-		}
-	    } else if (is_file(direntries + i)) {
-		/* check file size with FAT */
-		cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2);
-		if (cluster_count !=
+            if (is_directory(direntries + i)) {
+                if (begin_of_direntry(direntries + i) == 0) {
+                    DLOG(fprintf(stderr, "invalid begin for directory: %s\n", path2); print_direntry(direntries + i));
+                    goto fail;
+                }
+                cluster_count = check_directory_consistency(s,
+                        begin_of_direntry(direntries + i), path2);
+                if (cluster_count == 0) {
+                    DLOG(fprintf(stderr, "problem in directory %s:\n", path2); print_direntry(direntries + i));
+                    goto fail;
+                }
+            } else if (is_file(direntries + i)) {
+                /* check file size with FAT */
+                cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2);
+                if (cluster_count !=
             DIV_ROUND_UP(le32_to_cpu(direntries[i].size), s->cluster_size)) {
-		    DLOG(fprintf(stderr, "Cluster count mismatch\n"));
-		    goto fail;
-		}
-	    } else
+                    DLOG(fprintf(stderr, "Cluster count mismatch\n"));
+                    goto fail;
+                }
+            } else
                 abort(); /* cluster_count = 0; */
 
-	    ret += cluster_count;
-	}
+            ret += cluster_count;
+        }
 
-	cluster_num = modified_fat_get(s, cluster_num);
+        cluster_num = modified_fat_get(s, cluster_num);
     } while(!fat_eof(s, cluster_num));
 
     g_free(cluster);
@@ -2037,81 +2037,81 @@ DLOG(checkpoint());
      * - if all is fine, return number of used clusters
      */
     if (s->fat2 == NULL) {
-	int size = 0x200 * s->sectors_per_fat;
-	s->fat2 = g_malloc(size);
-	memcpy(s->fat2, s->fat.pointer, size);
+        int size = 0x200 * s->sectors_per_fat;
+        s->fat2 = g_malloc(size);
+        memcpy(s->fat2, s->fat.pointer, size);
     }
     check = vvfat_read(s->bs,
-	    s->first_sectors_number, s->fat2, s->sectors_per_fat);
+            s->first_sectors_number, s->fat2, s->sectors_per_fat);
     if (check) {
-	fprintf(stderr, "Could not copy fat\n");
-	return 0;
+        fprintf(stderr, "Could not copy fat\n");
+        return 0;
     }
     assert (s->used_clusters);
     for (i = 0; i < sector2cluster(s, s->sector_count); i++)
-	s->used_clusters[i] &= ~USED_ANY;
+        s->used_clusters[i] &= ~USED_ANY;
 
     clear_commits(s);
 
     /* mark every mapped file/directory as deleted.
      * (check_directory_consistency() will unmark those still present). */
     if (s->qcow)
-	for (i = 0; i < s->mapping.next; i++) {
-	    mapping_t* mapping = array_get(&(s->mapping), i);
-	    if (mapping->first_mapping_index < 0)
-		mapping->mode |= MODE_DELETED;
-	}
+        for (i = 0; i < s->mapping.next; i++) {
+            mapping_t* mapping = array_get(&(s->mapping), i);
+            if (mapping->first_mapping_index < 0)
+                mapping->mode |= MODE_DELETED;
+        }
 
     used_clusters_count = check_directory_consistency(s, 0, s->path);
     if (used_clusters_count <= 0) {
-	DLOG(fprintf(stderr, "problem in directory\n"));
-	return 0;
+        DLOG(fprintf(stderr, "problem in directory\n"));
+        return 0;
     }
 
     check = s->last_cluster_of_root_directory;
     for (i = check; i < sector2cluster(s, s->sector_count); i++) {
-	if (modified_fat_get(s, i)) {
-	    if(!s->used_clusters[i]) {
-		DLOG(fprintf(stderr, "FAT was modified (%d), but cluster is not used?\n", i));
-		return 0;
-	    }
-	    check++;
-	}
+        if (modified_fat_get(s, i)) {
+            if(!s->used_clusters[i]) {
+                DLOG(fprintf(stderr, "FAT was modified (%d), but cluster is not used?\n", i));
+                return 0;
+            }
+            check++;
+        }
 
-	if (s->used_clusters[i] == USED_ALLOCATED) {
-	    /* allocated, but not used... */
-	    DLOG(fprintf(stderr, "unused, modified cluster: %d\n", i));
-	    return 0;
-	}
+        if (s->used_clusters[i] == USED_ALLOCATED) {
+            /* allocated, but not used... */
+            DLOG(fprintf(stderr, "unused, modified cluster: %d\n", i));
+            return 0;
+        }
     }
 
     if (check != used_clusters_count)
-	return 0;
+        return 0;
 
     return used_clusters_count;
 }
 
 static inline void adjust_mapping_indices(BDRVVVFATState* s,
-	int offset, int adjust)
+        int offset, int adjust)
 {
     int i;
 
     for (i = 0; i < s->mapping.next; i++) {
-	mapping_t* mapping = array_get(&(s->mapping), i);
+        mapping_t* mapping = array_get(&(s->mapping), i);
 
 #define ADJUST_MAPPING_INDEX(name) \
-	if (mapping->name >= offset) \
-	    mapping->name += adjust
+        if (mapping->name >= offset) \
+            mapping->name += adjust
 
-	ADJUST_MAPPING_INDEX(first_mapping_index);
-	if (mapping->mode & MODE_DIRECTORY)
-	    ADJUST_MAPPING_INDEX(info.dir.parent_mapping_index);
+        ADJUST_MAPPING_INDEX(first_mapping_index);
+        if (mapping->mode & MODE_DIRECTORY)
+            ADJUST_MAPPING_INDEX(info.dir.parent_mapping_index);
     }
 }
 
 /* insert or update mapping */
 static mapping_t* insert_mapping(BDRVVVFATState* s,
-	uint32_t begin, uint32_t end)
+        uint32_t begin, uint32_t end)
 {
     /*
      * - find mapping where mapping->begin >= begin,
@@ -2125,15 +2125,15 @@ static mapping_t* insert_mapping(BDRVVVFATState* s,
     mapping_t* first_mapping = array_get(&(s->mapping), 0);
 
     if (index < s->mapping.next && (mapping = array_get(&(s->mapping), index))
-	    && mapping->begin < begin) {
-	mapping->end = begin;
-	index++;
-	mapping = array_get(&(s->mapping), index);
+            && mapping->begin < begin) {
+        mapping->end = begin;
+        index++;
+        mapping = array_get(&(s->mapping), index);
     }
     if (index >= s->mapping.next || mapping->begin > begin) {
-	mapping = array_insert(&(s->mapping), index, 1);
-	mapping->path = NULL;
-	adjust_mapping_indices(s, index, +1);
+        mapping = array_insert(&(s->mapping), index, 1);
+        mapping->path = NULL;
+        adjust_mapping_indices(s, index, +1);
     }
 
     mapping->begin = begin;
@@ -2145,8 +2145,8 @@ assert(index + 1 >= s->mapping.next ||
  next_mapping->begin >= end)));
 
     if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer)
-	s->current_mapping = array_get(&(s->mapping),
-		s->current_mapping - first_mapping);
+        s->current_mapping = array_get(&(s->mapping),
+                s->current_mapping - first_mapping);
 
     return mapping;
 }
@@ -2168,8 +2168,8 @@ static int remove_mapping(BDRVVVFATState* s, int mapping_index)
     adjust_mapping_indices(s, mapping_index, -1);
 
     if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer)
-	s->current_mapping = array_get(&(s->mapping),
-		s->current_mapping - first_mapping);
+        s->current_mapping = array_get(&(s->mapping),
+                s->current_mapping - first_mapping);
 
     return 0;
 }
@@ -2178,17 +2178,17 @@ static void adjust_dirindices(BDRVVVFATState* s, int offset, int adjust)
 {
     int i;
     for (i = 0; i < s->mapping.next; i++) {
-	mapping_t* mapping = array_get(&(s->mapping), i);
-	if (mapping->dir_index >= offset)
-	    mapping->dir_index += adjust;
-	if ((mapping->mode & MODE_DIRECTORY) &&
-		mapping->info.dir.first_dir_index >= offset)
-	    mapping->info.dir.first_dir_index += adjust;
+        mapping_t* mapping = array_get(&(s->mapping), i);
+        if (mapping->dir_index >= offset)
+            mapping->dir_index += adjust;
+        if ((mapping->mode & MODE_DIRECTORY) &&
+                mapping->info.dir.first_dir_index >= offset)
+            mapping->info.dir.first_dir_index += adjust;
     }
 }
 
 static direntry_t* insert_direntries(BDRVVVFATState* s,
-	int dir_index, int count)
+        int dir_index, int count)
 {
     /*
      * make room in s->directory,
@@ -2196,7 +2196,7 @@ static direntry_t* insert_direntries(BDRVVVFATState* s,
      */
     direntry_t* result = array_insert(&(s->directory), dir_index, count);
     if (result == NULL)
-	return NULL;
+        return NULL;
     adjust_dirindices(s, dir_index, count);
     return result;
 }
@@ -2205,7 +2205,7 @@ static int remove_direntries(BDRVVVFATState* s, int dir_index, int count)
 {
     int ret = array_remove_slice(&(s->directory), dir_index, count);
     if (ret)
-	return ret;
+        return ret;
     adjust_dirindices(s, dir_index, -count);
     return 0;
 }
@@ -2217,7 +2217,7 @@ static int remove_direntries(BDRVVVFATState* s, int dir_index, int count)
  * adjusted)
  */
 static int commit_mappings(BDRVVVFATState* s,
-	uint32_t first_cluster, int dir_index)
+        uint32_t first_cluster, int dir_index)
 {
     mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
     direntry_t* direntry = array_get(&(s->directory), dir_index);
@@ -2230,71 +2230,71 @@ static int commit_mappings(BDRVVVFATState* s,
     mapping->first_mapping_index = -1;
     mapping->dir_index = dir_index;
     mapping->mode = (dir_index <= 0 || is_directory(direntry)) ?
-	MODE_DIRECTORY : MODE_NORMAL;
+        MODE_DIRECTORY : MODE_NORMAL;
 
     while (!fat_eof(s, cluster)) {
-	uint32_t c, c1;
+        uint32_t c, c1;
 
-	for (c = cluster, c1 = modified_fat_get(s, c); c + 1 == c1;
-		c = c1, c1 = modified_fat_get(s, c1));
+        for (c = cluster, c1 = modified_fat_get(s, c); c + 1 == c1;
+                c = c1, c1 = modified_fat_get(s, c1));
 
-	c++;
-	if (c > mapping->end) {
-	    int index = array_index(&(s->mapping), mapping);
-	    int i, max_i = s->mapping.next - index;
-	    for (i = 1; i < max_i && mapping[i].begin < c; i++);
-	    while (--i > 0)
-		remove_mapping(s, index + 1);
-	}
-	assert(mapping == array_get(&(s->mapping), s->mapping.next - 1)
-		|| mapping[1].begin >= c);
-	mapping->end = c;
+        c++;
+        if (c > mapping->end) {
+            int index = array_index(&(s->mapping), mapping);
+            int i, max_i = s->mapping.next - index;
+            for (i = 1; i < max_i && mapping[i].begin < c; i++);
+            while (--i > 0)
+                remove_mapping(s, index + 1);
+        }
+        assert(mapping == array_get(&(s->mapping), s->mapping.next - 1)
+                || mapping[1].begin >= c);
+        mapping->end = c;
 
-	if (!fat_eof(s, c1)) {
-	    int i = find_mapping_for_cluster_aux(s, c1, 0, s->mapping.next);
-	    mapping_t* next_mapping = i >= s->mapping.next ? NULL :
-		array_get(&(s->mapping), i);
+        if (!fat_eof(s, c1)) {
+            int i = find_mapping_for_cluster_aux(s, c1, 0, s->mapping.next);
+            mapping_t* next_mapping = i >= s->mapping.next ? NULL :
+                array_get(&(s->mapping), i);
 
-	    if (next_mapping == NULL || next_mapping->begin > c1) {
-		int i1 = array_index(&(s->mapping), mapping);
+            if (next_mapping == NULL || next_mapping->begin > c1) {
+                int i1 = array_index(&(s->mapping), mapping);
 
-		next_mapping = insert_mapping(s, c1, c1+1);
+                next_mapping = insert_mapping(s, c1, c1+1);
 
-		if (c1 < c)
-		    i1++;
-		mapping = array_get(&(s->mapping), i1);
-	    }
+                if (c1 < c)
+                    i1++;
+                mapping = array_get(&(s->mapping), i1);
+            }
 
-	    next_mapping->dir_index = mapping->dir_index;
-	    next_mapping->first_mapping_index =
-		mapping->first_mapping_index < 0 ?
-		array_index(&(s->mapping), mapping) :
-		mapping->first_mapping_index;
-	    next_mapping->path = mapping->path;
-	    next_mapping->mode = mapping->mode;
-	    next_mapping->read_only = mapping->read_only;
-	    if (mapping->mode & MODE_DIRECTORY) {
-		next_mapping->info.dir.parent_mapping_index =
-			mapping->info.dir.parent_mapping_index;
-		next_mapping->info.dir.first_dir_index =
-			mapping->info.dir.first_dir_index +
-			0x10 * s->sectors_per_cluster *
-			(mapping->end - mapping->begin);
-	    } else
-		next_mapping->info.file.offset = mapping->info.file.offset +
-			mapping->end - mapping->begin;
+            next_mapping->dir_index = mapping->dir_index;
+            next_mapping->first_mapping_index =
+                mapping->first_mapping_index < 0 ?
+                array_index(&(s->mapping), mapping) :
+                mapping->first_mapping_index;
+            next_mapping->path = mapping->path;
+            next_mapping->mode = mapping->mode;
+            next_mapping->read_only = mapping->read_only;
+            if (mapping->mode & MODE_DIRECTORY) {
+                next_mapping->info.dir.parent_mapping_index =
+                        mapping->info.dir.parent_mapping_index;
+                next_mapping->info.dir.first_dir_index =
+                        mapping->info.dir.first_dir_index +
+                        0x10 * s->sectors_per_cluster *
+                        (mapping->end - mapping->begin);
+            } else
+                next_mapping->info.file.offset = mapping->info.file.offset +
+                        mapping->end - mapping->begin;
 
-	    mapping = next_mapping;
-	}
+            mapping = next_mapping;
+        }
 
-	cluster = c1;
+        cluster = c1;
     }
 
     return 0;
 }
 
 static int commit_direntries(BDRVVVFATState* s,
-	int dir_index, int parent_mapping_index)
+        int dir_index, int parent_mapping_index)
 {
     direntry_t* direntry = array_get(&(s->directory), dir_index);
     uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
@@ -2319,58 +2319,58 @@ DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapp
     mapping->info.dir.parent_mapping_index = parent_mapping_index;
 
     if (first_cluster == 0) {
-	old_cluster_count = new_cluster_count =
-	    s->last_cluster_of_root_directory;
+        old_cluster_count = new_cluster_count =
+            s->last_cluster_of_root_directory;
     } else {
-	for (old_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
-		c = fat_get(s, c))
-	    old_cluster_count++;
+        for (old_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
+                c = fat_get(s, c))
+            old_cluster_count++;
 
-	for (new_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
-		c = modified_fat_get(s, c))
-	    new_cluster_count++;
+        for (new_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
+                c = modified_fat_get(s, c))
+            new_cluster_count++;
     }
 
     if (new_cluster_count > old_cluster_count) {
-	if (insert_direntries(s,
-		current_dir_index + factor * old_cluster_count,
-		factor * (new_cluster_count - old_cluster_count)) == NULL)
-	    return -1;
+        if (insert_direntries(s,
+                current_dir_index + factor * old_cluster_count,
+                factor * (new_cluster_count - old_cluster_count)) == NULL)
+            return -1;
     } else if (new_cluster_count < old_cluster_count)
-	remove_direntries(s,
-		current_dir_index + factor * new_cluster_count,
-		factor * (old_cluster_count - new_cluster_count));
+        remove_direntries(s,
+                current_dir_index + factor * new_cluster_count,
+                factor * (old_cluster_count - new_cluster_count));
 
     for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
         direntry_t *first_direntry;
-	void* direntry = array_get(&(s->directory), current_dir_index);
-	int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
-		s->sectors_per_cluster);
-	if (ret)
-	    return ret;
+        void* direntry = array_get(&(s->directory), current_dir_index);
+        int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
+                s->sectors_per_cluster);
+        if (ret)
+            return ret;
 
         /* The first directory entry on the filesystem is the volume name */
         first_direntry = (direntry_t*) s->directory.pointer;
         assert(!memcmp(first_direntry->name, s->volume_label, 11));
 
-	current_dir_index += factor;
+        current_dir_index += factor;
     }
 
     ret = commit_mappings(s, first_cluster, dir_index);
     if (ret)
-	return ret;
+        return ret;
 
     /* recurse */
     for (i = 0; i < factor * new_cluster_count; i++) {
-	direntry = array_get(&(s->directory), first_dir_index + i);
-	if (is_directory(direntry) && !is_dot(direntry)) {
-	    mapping = find_mapping_for_cluster(s, first_cluster);
-	    assert(mapping->mode & MODE_DIRECTORY);
-	    ret = commit_direntries(s, first_dir_index + i,
-		array_index(&(s->mapping), mapping));
-	    if (ret)
-		return ret;
-	}
+        direntry = array_get(&(s->directory), first_dir_index + i);
+        if (is_directory(direntry) && !is_dot(direntry)) {
+            mapping = find_mapping_for_cluster(s, first_cluster);
+            assert(mapping->mode & MODE_DIRECTORY);
+            ret = commit_direntries(s, first_dir_index + i,
+                array_index(&(s->mapping), mapping));
+            if (ret)
+                return ret;
+        }
     }
 
     return 0;
@@ -2379,7 +2379,7 @@ DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapp
 /* commit one file (adjust contents, adjust mapping),
    return first_mapping_index */
 static int commit_one_file(BDRVVVFATState* s,
-	int dir_index, uint32_t offset)
+        int dir_index, uint32_t offset)
 {
     direntry_t* direntry = array_get(&(s->directory), dir_index);
     uint32_t c = begin_of_direntry(direntry);
@@ -2394,14 +2394,14 @@ static int commit_one_file(BDRVVVFATState* s,
     assert((offset % s->cluster_size) == 0);
 
     for (i = s->cluster_size; i < offset; i += s->cluster_size)
-	c = modified_fat_get(s, c);
+        c = modified_fat_get(s, c);
 
     fd = qemu_open(mapping->path, O_RDWR | O_CREAT | O_BINARY, 0666);
     if (fd < 0) {
-	fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path,
-		strerror(errno), errno);
+        fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path,
+                strerror(errno), errno);
         g_free(cluster);
-	return fd;
+        return fd;
     }
     if (offset > 0) {
         if (lseek(fd, offset, SEEK_SET) != offset) {
@@ -2412,18 +2412,18 @@ static int commit_one_file(BDRVVVFATState* s,
     }
 
     while (offset < size) {
-	uint32_t c1;
-	int rest_size = (size - offset > s->cluster_size ?
-		s->cluster_size : size - offset);
-	int ret;
+        uint32_t c1;
+        int rest_size = (size - offset > s->cluster_size ?
+                s->cluster_size : size - offset);
+        int ret;
 
-	c1 = modified_fat_get(s, c);
+        c1 = modified_fat_get(s, c);
 
-	assert((size - offset == 0 && fat_eof(s, c)) ||
-		(size > offset && c >=2 && !fat_eof(s, c)));
+        assert((size - offset == 0 && fat_eof(s, c)) ||
+                (size > offset && c >=2 && !fat_eof(s, c)));
 
-	ret = vvfat_read(s->bs, cluster2sector(s, c),
-	    (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);
+        ret = vvfat_read(s->bs, cluster2sector(s, c),
+            (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);
 
         if (ret < 0) {
             qemu_close(fd);
@@ -2437,8 +2437,8 @@ static int commit_one_file(BDRVVVFATState* s,
             return -2;
         }
 
-	offset += rest_size;
-	c = c1;
+        offset += rest_size;
+        c = c1;
     }
 
     if (ftruncate(fd, size)) {
@@ -2459,18 +2459,18 @@ static void check1(BDRVVVFATState* s)
 {
     int i;
     for (i = 0; i < s->mapping.next; i++) {
-	mapping_t* mapping = array_get(&(s->mapping), i);
-	if (mapping->mode & MODE_DELETED) {
-	    fprintf(stderr, "deleted\n");
-	    continue;
-	}
-	assert(mapping->dir_index < s->directory.next);
-	direntry_t* direntry = array_get(&(s->directory), mapping->dir_index);
-	assert(mapping->begin == begin_of_direntry(direntry) || mapping->first_mapping_index >= 0);
-	if (mapping->mode & MODE_DIRECTORY) {
-	    assert(mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster * (mapping->end - mapping->begin) <= s->directory.next);
-	    assert((mapping->info.dir.first_dir_index % (0x10 * s->sectors_per_cluster)) == 0);
-	}
+        mapping_t* mapping = array_get(&(s->mapping), i);
+        if (mapping->mode & MODE_DELETED) {
+            fprintf(stderr, "deleted\n");
+            continue;
+        }
+        assert(mapping->dir_index < s->directory.next);
+        direntry_t* direntry = array_get(&(s->directory), mapping->dir_index);
+        assert(mapping->begin == begin_of_direntry(direntry) || mapping->first_mapping_index >= 0);
+        if (mapping->mode & MODE_DIRECTORY) {
+            assert(mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster * (mapping->end - mapping->begin) <= s->directory.next);
+            assert((mapping->info.dir.first_dir_index % (0x10 * s->sectors_per_cluster)) == 0);
+        }
     }
 }
 
@@ -2481,43 +2481,43 @@ static void check2(BDRVVVFATState* s)
     int first_mapping = -1;
 
     for (i = 0; i < s->directory.next; i++) {
-	direntry_t* direntry = array_get(&(s->directory), i);
+        direntry_t* direntry = array_get(&(s->directory), i);
 
-	if (is_short_name(direntry) && begin_of_direntry(direntry)) {
-	    mapping_t* mapping = find_mapping_for_cluster(s, begin_of_direntry(direntry));
-	    assert(mapping);
-	    assert(mapping->dir_index == i || is_dot(direntry));
-	    assert(mapping->begin == begin_of_direntry(direntry) || is_dot(direntry));
-	}
+        if (is_short_name(direntry) && begin_of_direntry(direntry)) {
+            mapping_t* mapping = find_mapping_for_cluster(s, begin_of_direntry(direntry));
+            assert(mapping);
+            assert(mapping->dir_index == i || is_dot(direntry));
+            assert(mapping->begin == begin_of_direntry(direntry) || is_dot(direntry));
+        }
 
-	if ((i % (0x10 * s->sectors_per_cluster)) == 0) {
-	    /* cluster start */
-	    int j, count = 0;
+        if ((i % (0x10 * s->sectors_per_cluster)) == 0) {
+            /* cluster start */
+            int j, count = 0;
 
-	    for (j = 0; j < s->mapping.next; j++) {
-		mapping_t* mapping = array_get(&(s->mapping), j);
-		if (mapping->mode & MODE_DELETED)
-		    continue;
-		if (mapping->mode & MODE_DIRECTORY) {
-		    if (mapping->info.dir.first_dir_index <= i && mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster > i) {
-			assert(++count == 1);
-			if (mapping->first_mapping_index == -1)
-			    first_mapping = array_index(&(s->mapping), mapping);
-			else
-			    assert(first_mapping == mapping->first_mapping_index);
-			if (mapping->info.dir.parent_mapping_index < 0)
-			    assert(j == 0);
-			else {
-			    mapping_t* parent = array_get(&(s->mapping), mapping->info.dir.parent_mapping_index);
-			    assert(parent->mode & MODE_DIRECTORY);
-			    assert(parent->info.dir.first_dir_index < mapping->info.dir.first_dir_index);
-			}
-		    }
-		}
-	    }
-	    if (count == 0)
-		first_mapping = -1;
-	}
+            for (j = 0; j < s->mapping.next; j++) {
+                mapping_t* mapping = array_get(&(s->mapping), j);
+                if (mapping->mode & MODE_DELETED)
+                    continue;
+                if (mapping->mode & MODE_DIRECTORY) {
+                    if (mapping->info.dir.first_dir_index <= i && mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster > i) {
+                        assert(++count == 1);
+                        if (mapping->first_mapping_index == -1)
+                            first_mapping = array_index(&(s->mapping), mapping);
+                        else
+                            assert(first_mapping == mapping->first_mapping_index);
+                        if (mapping->info.dir.parent_mapping_index < 0)
+                            assert(j == 0);
+                        else {
+                            mapping_t* parent = array_get(&(s->mapping), mapping->info.dir.parent_mapping_index);
+                            assert(parent->mode & MODE_DIRECTORY);
+                            assert(parent->info.dir.first_dir_index < mapping->info.dir.first_dir_index);
+                        }
+                    }
+                }
+            }
+            if (count == 0)
+                first_mapping = -1;
+        }
     }
 }
 #endif
@@ -2529,63 +2529,63 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
 #ifdef DEBUG
     fprintf(stderr, "handle_renames\n");
     for (i = 0; i < s->commits.next; i++) {
-	commit_t* commit = array_get(&(s->commits), i);
-	fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action);
+        commit_t* commit = array_get(&(s->commits), i);
+        fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action);
     }
 #endif
 
     for (i = 0; i < s->commits.next;) {
-	commit_t* commit = array_get(&(s->commits), i);
-	if (commit->action == ACTION_RENAME) {
-	    mapping_t* mapping = find_mapping_for_cluster(s,
-		    commit->param.rename.cluster);
-	    char* old_path = mapping->path;
+        commit_t* commit = array_get(&(s->commits), i);
+        if (commit->action == ACTION_RENAME) {
+            mapping_t* mapping = find_mapping_for_cluster(s,
+                    commit->param.rename.cluster);
+            char* old_path = mapping->path;
 
-	    assert(commit->path);
-	    mapping->path = commit->path;
-	    if (rename(old_path, mapping->path))
-		return -2;
+            assert(commit->path);
+            mapping->path = commit->path;
+            if (rename(old_path, mapping->path))
+                return -2;
 
-	    if (mapping->mode & MODE_DIRECTORY) {
-		int l1 = strlen(mapping->path);
-		int l2 = strlen(old_path);
-		int diff = l1 - l2;
-		direntry_t* direntry = array_get(&(s->directory),
-			mapping->info.dir.first_dir_index);
-		uint32_t c = mapping->begin;
-		int i = 0;
+            if (mapping->mode & MODE_DIRECTORY) {
+                int l1 = strlen(mapping->path);
+                int l2 = strlen(old_path);
+                int diff = l1 - l2;
+                direntry_t* direntry = array_get(&(s->directory),
+                        mapping->info.dir.first_dir_index);
+                uint32_t c = mapping->begin;
+                int i = 0;
 
-		/* recurse */
-		while (!fat_eof(s, c)) {
-		    do {
-			direntry_t* d = direntry + i;
+                /* recurse */
+                while (!fat_eof(s, c)) {
+                    do {
+                        direntry_t* d = direntry + i;
 
-			if (is_file(d) || (is_directory(d) && !is_dot(d))) {
-			    mapping_t* m = find_mapping_for_cluster(s,
-				    begin_of_direntry(d));
-			    int l = strlen(m->path);
-			    char* new_path = g_malloc(l + diff + 1);
+                        if (is_file(d) || (is_directory(d) && !is_dot(d))) {
+                            mapping_t* m = find_mapping_for_cluster(s,
+                                    begin_of_direntry(d));
+                            int l = strlen(m->path);
+                            char* new_path = g_malloc(l + diff + 1);
 
-			    assert(!strncmp(m->path, mapping->path, l2));
+                            assert(!strncmp(m->path, mapping->path, l2));
 
                             pstrcpy(new_path, l + diff + 1, mapping->path);
                             pstrcpy(new_path + l1, l + diff + 1 - l1,
                                     m->path + l2);
 
-			    schedule_rename(s, m->begin, new_path);
-			}
-			i++;
-		    } while((i % (0x10 * s->sectors_per_cluster)) != 0);
-		    c = fat_get(s, c);
-		}
-	    }
+                            schedule_rename(s, m->begin, new_path);
+                        }
+                        i++;
+                    } while((i % (0x10 * s->sectors_per_cluster)) != 0);
+                    c = fat_get(s, c);
+                }
+            }
 
             g_free(old_path);
-	    array_remove(&(s->commits), i);
-	    continue;
-	} else if (commit->action == ACTION_MKDIR) {
-	    mapping_t* mapping;
-	    int j, parent_path_len;
+            array_remove(&(s->commits), i);
+            continue;
+        } else if (commit->action == ACTION_MKDIR) {
+            mapping_t* mapping;
+            int j, parent_path_len;
 
 #ifdef __MINGW32__
             if (mkdir(commit->path))
@@ -2595,37 +2595,37 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
                 return -5;
 #endif
 
-	    mapping = insert_mapping(s, commit->param.mkdir.cluster,
-		    commit->param.mkdir.cluster + 1);
-	    if (mapping == NULL)
-		return -6;
+            mapping = insert_mapping(s, commit->param.mkdir.cluster,
+                    commit->param.mkdir.cluster + 1);
+            if (mapping == NULL)
+                return -6;
 
-	    mapping->mode = MODE_DIRECTORY;
-	    mapping->read_only = 0;
-	    mapping->path = commit->path;
-	    j = s->directory.next;
-	    assert(j);
-	    insert_direntries(s, s->directory.next,
-		    0x10 * s->sectors_per_cluster);
-	    mapping->info.dir.first_dir_index = j;
+            mapping->mode = MODE_DIRECTORY;
+            mapping->read_only = 0;
+            mapping->path = commit->path;
+            j = s->directory.next;
+            assert(j);
+            insert_direntries(s, s->directory.next,
+                    0x10 * s->sectors_per_cluster);
+            mapping->info.dir.first_dir_index = j;
 
-	    parent_path_len = strlen(commit->path)
-		- strlen(get_basename(commit->path)) - 1;
-	    for (j = 0; j < s->mapping.next; j++) {
-		mapping_t* m = array_get(&(s->mapping), j);
-		if (m->first_mapping_index < 0 && m != mapping &&
-			!strncmp(m->path, mapping->path, parent_path_len) &&
-			strlen(m->path) == parent_path_len)
-		    break;
-	    }
-	    assert(j < s->mapping.next);
-	    mapping->info.dir.parent_mapping_index = j;
+            parent_path_len = strlen(commit->path)
+                - strlen(get_basename(commit->path)) - 1;
+            for (j = 0; j < s->mapping.next; j++) {
+                mapping_t* m = array_get(&(s->mapping), j);
+                if (m->first_mapping_index < 0 && m != mapping &&
+                        !strncmp(m->path, mapping->path, parent_path_len) &&
+                        strlen(m->path) == parent_path_len)
+                    break;
+            }
+            assert(j < s->mapping.next);
+            mapping->info.dir.parent_mapping_index = j;
 
-	    array_remove(&(s->commits), i);
-	    continue;
-	}
+            array_remove(&(s->commits), i);
+            continue;
+        }
 
-	i++;
+        i++;
     }
     return 0;
 }
@@ -2640,75 +2640,75 @@ static int handle_commits(BDRVVVFATState* s)
     vvfat_close_current_file(s);
 
     for (i = 0; !fail && i < s->commits.next; i++) {
-	commit_t* commit = array_get(&(s->commits), i);
-	switch(commit->action) {
-	case ACTION_RENAME: case ACTION_MKDIR:
+        commit_t* commit = array_get(&(s->commits), i);
+        switch(commit->action) {
+        case ACTION_RENAME: case ACTION_MKDIR:
             abort();
-	    fail = -2;
-	    break;
-	case ACTION_WRITEOUT: {
+            fail = -2;
+            break;
+        case ACTION_WRITEOUT: {
 #ifndef NDEBUG
             /* these variables are only used by assert() below */
-	    direntry_t* entry = array_get(&(s->directory),
-		    commit->param.writeout.dir_index);
-	    uint32_t begin = begin_of_direntry(entry);
-	    mapping_t* mapping = find_mapping_for_cluster(s, begin);
+            direntry_t* entry = array_get(&(s->directory),
+                    commit->param.writeout.dir_index);
+            uint32_t begin = begin_of_direntry(entry);
+            mapping_t* mapping = find_mapping_for_cluster(s, begin);
 #endif
 
-	    assert(mapping);
-	    assert(mapping->begin == begin);
-	    assert(commit->path == NULL);
+            assert(mapping);
+            assert(mapping->begin == begin);
+            assert(commit->path == NULL);
 
-	    if (commit_one_file(s, commit->param.writeout.dir_index,
-			commit->param.writeout.modified_offset))
-		fail = -3;
+            if (commit_one_file(s, commit->param.writeout.dir_index,
+                        commit->param.writeout.modified_offset))
+                fail = -3;
 
-	    break;
-	}
-	case ACTION_NEW_FILE: {
-	    int begin = commit->param.new_file.first_cluster;
-	    mapping_t* mapping = find_mapping_for_cluster(s, begin);
-	    direntry_t* entry;
-	    int i;
+            break;
+        }
+        case ACTION_NEW_FILE: {
+            int begin = commit->param.new_file.first_cluster;
+            mapping_t* mapping = find_mapping_for_cluster(s, begin);
+            direntry_t* entry;
+            int i;
 
-	    /* find direntry */
-	    for (i = 0; i < s->directory.next; i++) {
-		entry = array_get(&(s->directory), i);
-		if (is_file(entry) && begin_of_direntry(entry) == begin)
-		    break;
-	    }
+            /* find direntry */
+            for (i = 0; i < s->directory.next; i++) {
+                entry = array_get(&(s->directory), i);
+                if (is_file(entry) && begin_of_direntry(entry) == begin)
+                    break;
+            }
 
-	    if (i >= s->directory.next) {
-		fail = -6;
-		continue;
-	    }
+            if (i >= s->directory.next) {
+                fail = -6;
+                continue;
+            }
 
-	    /* make sure there exists an initial mapping */
-	    if (mapping && mapping->begin != begin) {
-		mapping->end = begin;
-		mapping = NULL;
-	    }
-	    if (mapping == NULL) {
-		mapping = insert_mapping(s, begin, begin+1);
-	    }
-	    /* most members will be fixed in commit_mappings() */
-	    assert(commit->path);
-	    mapping->path = commit->path;
-	    mapping->read_only = 0;
-	    mapping->mode = MODE_NORMAL;
-	    mapping->info.file.offset = 0;
+            /* make sure there exists an initial mapping */
+            if (mapping && mapping->begin != begin) {
+                mapping->end = begin;
+                mapping = NULL;
+            }
+            if (mapping == NULL) {
+                mapping = insert_mapping(s, begin, begin+1);
+            }
+            /* most members will be fixed in commit_mappings() */
+            assert(commit->path);
+            mapping->path = commit->path;
+            mapping->read_only = 0;
+            mapping->mode = MODE_NORMAL;
+            mapping->info.file.offset = 0;
 
-	    if (commit_one_file(s, i, 0))
-		fail = -7;
+            if (commit_one_file(s, i, 0))
+                fail = -7;
 
-	    break;
-	}
-	default:
+            break;
+        }
+        default:
             abort();
-	}
+        }
     }
     if (i > 0 && array_remove_slice(&(s->commits), 0, i))
-	return -1;
+        return -1;
     return fail;
 }
 
@@ -2719,53 +2719,53 @@ static int handle_deletes(BDRVVVFATState* s)
     /* delete files corresponding to mappings marked as deleted */
     /* handle DELETEs and unused mappings (modified_fat_get(s, mapping->begin) == 0) */
     while (deferred && deleted) {
-	deferred = 0;
-	deleted = 0;
+        deferred = 0;
+        deleted = 0;
 
-	for (i = 1; i < s->mapping.next; i++) {
-	    mapping_t* mapping = array_get(&(s->mapping), i);
-	    if (mapping->mode & MODE_DELETED) {
-		direntry_t* entry = array_get(&(s->directory),
-			mapping->dir_index);
+        for (i = 1; i < s->mapping.next; i++) {
+            mapping_t* mapping = array_get(&(s->mapping), i);
+            if (mapping->mode & MODE_DELETED) {
+                direntry_t* entry = array_get(&(s->directory),
+                        mapping->dir_index);
 
-		if (is_free(entry)) {
-		    /* remove file/directory */
-		    if (mapping->mode & MODE_DIRECTORY) {
-			int j, next_dir_index = s->directory.next,
-			first_dir_index = mapping->info.dir.first_dir_index;
+                if (is_free(entry)) {
+                    /* remove file/directory */
+                    if (mapping->mode & MODE_DIRECTORY) {
+                        int j, next_dir_index = s->directory.next,
+                        first_dir_index = mapping->info.dir.first_dir_index;
 
-			if (rmdir(mapping->path) < 0) {
-			    if (errno == ENOTEMPTY) {
-				deferred++;
-				continue;
-			    } else
-				return -5;
-			}
+                        if (rmdir(mapping->path) < 0) {
+                            if (errno == ENOTEMPTY) {
+                                deferred++;
+                                continue;
+                            } else
+                                return -5;
+                        }
 
-			for (j = 1; j < s->mapping.next; j++) {
-			    mapping_t* m = array_get(&(s->mapping), j);
-			    if (m->mode & MODE_DIRECTORY &&
-				    m->info.dir.first_dir_index >
-				    first_dir_index &&
-				    m->info.dir.first_dir_index <
-				    next_dir_index)
-				next_dir_index =
-				    m->info.dir.first_dir_index;
-			}
-			remove_direntries(s, first_dir_index,
-				next_dir_index - first_dir_index);
+                        for (j = 1; j < s->mapping.next; j++) {
+                            mapping_t* m = array_get(&(s->mapping), j);
+                            if (m->mode & MODE_DIRECTORY &&
+                                    m->info.dir.first_dir_index >
+                                    first_dir_index &&
+                                    m->info.dir.first_dir_index <
+                                    next_dir_index)
+                                next_dir_index =
+                                    m->info.dir.first_dir_index;
+                        }
+                        remove_direntries(s, first_dir_index,
+                                next_dir_index - first_dir_index);
 
-			deleted++;
-		    }
-		} else {
-		    if (unlink(mapping->path))
-			return -4;
-		    deleted++;
-		}
-		DLOG(fprintf(stderr, "DELETE (%d)\n", i); print_mapping(mapping); print_direntry(entry));
-		remove_mapping(s, i);
-	    }
-	}
+                        deleted++;
+                    }
+                } else {
+                    if (unlink(mapping->path))
+                        return -4;
+                    deleted++;
+                }
+                DLOG(fprintf(stderr, "DELETE (%d)\n", i); print_mapping(mapping); print_direntry(entry));
+                remove_mapping(s, i);
+            }
+        }
     }
 
     return 0;
@@ -2785,15 +2785,15 @@ static int do_commit(BDRVVVFATState* s)
 
     /* the real meat are the commits. Nothing to do? Move along! */
     if (s->commits.next == 0)
-	return 0;
+        return 0;
 
     vvfat_close_current_file(s);
 
     ret = handle_renames_and_mkdirs(s);
     if (ret) {
-	fprintf(stderr, "Error handling renames (%d)\n", ret);
+        fprintf(stderr, "Error handling renames (%d)\n", ret);
         abort();
-	return ret;
+        return ret;
     }
 
     /* copy FAT (with bdrv_read) */
@@ -2802,23 +2802,23 @@ static int do_commit(BDRVVVFATState* s)
     /* recurse direntries from root (using bs->bdrv_read) */
     ret = commit_direntries(s, 0, -1);
     if (ret) {
-	fprintf(stderr, "Fatal: error while committing (%d)\n", ret);
+        fprintf(stderr, "Fatal: error while committing (%d)\n", ret);
         abort();
-	return ret;
+        return ret;
     }
 
     ret = handle_commits(s);
     if (ret) {
-	fprintf(stderr, "Error handling commits (%d)\n", ret);
+        fprintf(stderr, "Error handling commits (%d)\n", ret);
         abort();
-	return ret;
+        return ret;
     }
 
     ret = handle_deletes(s);
     if (ret) {
-	fprintf(stderr, "Error deleting\n");
+        fprintf(stderr, "Error deleting\n");
         abort();
-	return ret;
+        return ret;
     }
 
     if (s->qcow->bs->drv->bdrv_make_empty) {
@@ -2836,7 +2836,7 @@ static int try_commit(BDRVVVFATState* s)
     vvfat_close_current_file(s);
 DLOG(checkpoint());
     if(!is_consistent(s))
-	return -1;
+        return -1;
     return do_commit(s);
 }
 
@@ -2862,56 +2862,56 @@ DLOG(checkpoint());
      */
 
     if (sector_num < s->first_sectors_number)
-	return -1;
+        return -1;
 
     for (i = sector2cluster(s, sector_num);
-	    i <= sector2cluster(s, sector_num + nb_sectors - 1);) {
-	mapping_t* mapping = find_mapping_for_cluster(s, i);
-	if (mapping) {
-	    if (mapping->read_only) {
-		fprintf(stderr, "Tried to write to write-protected file %s\n",
-			mapping->path);
-		return -1;
-	    }
+            i <= sector2cluster(s, sector_num + nb_sectors - 1);) {
+        mapping_t* mapping = find_mapping_for_cluster(s, i);
+        if (mapping) {
+            if (mapping->read_only) {
+                fprintf(stderr, "Tried to write to write-protected file %s\n",
+                        mapping->path);
+                return -1;
+            }
 
-	    if (mapping->mode & MODE_DIRECTORY) {
-		int begin = cluster2sector(s, i);
-		int end = begin + s->sectors_per_cluster, k;
-		int dir_index;
-		const direntry_t* direntries;
-		long_file_name lfn;
+            if (mapping->mode & MODE_DIRECTORY) {
+                int begin = cluster2sector(s, i);
+                int end = begin + s->sectors_per_cluster, k;
+                int dir_index;
+                const direntry_t* direntries;
+                long_file_name lfn;
 
-		lfn_init(&lfn);
+                lfn_init(&lfn);
 
-		if (begin < sector_num)
-		    begin = sector_num;
-		if (end > sector_num + nb_sectors)
-		    end = sector_num + nb_sectors;
-		dir_index  = mapping->dir_index +
-		    0x10 * (begin - mapping->begin * s->sectors_per_cluster);
-		direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num));
+                if (begin < sector_num)
+                    begin = sector_num;
+                if (end > sector_num + nb_sectors)
+                    end = sector_num + nb_sectors;
+                dir_index  = mapping->dir_index +
+                    0x10 * (begin - mapping->begin * s->sectors_per_cluster);
+                direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num));
 
-		for (k = 0; k < (end - begin) * 0x10; k++) {
-		    /* do not allow non-ASCII filenames */
-		    if (parse_long_name(&lfn, direntries + k) < 0) {
-			fprintf(stderr, "Warning: non-ASCII filename\n");
-			return -1;
-		    }
-		    /* no access to the direntry of a read-only file */
-		    else if (is_short_name(direntries+k) &&
-			    (direntries[k].attributes & 1)) {
-			if (memcmp(direntries + k,
-				    array_get(&(s->directory), dir_index + k),
-				    sizeof(direntry_t))) {
-			    fprintf(stderr, "Warning: tried to write to write-protected file\n");
-			    return -1;
-			}
-		    }
-		}
-	    }
-	    i = mapping->end;
-	} else
-	    i++;
+                for (k = 0; k < (end - begin) * 0x10; k++) {
+                    /* do not allow non-ASCII filenames */
+                    if (parse_long_name(&lfn, direntries + k) < 0) {
+                        fprintf(stderr, "Warning: non-ASCII filename\n");
+                        return -1;
+                    }
+                    /* no access to the direntry of a read-only file */
+                    else if (is_short_name(direntries+k) &&
+                            (direntries[k].attributes & 1)) {
+                        if (memcmp(direntries + k,
+                                    array_get(&(s->directory), dir_index + k),
+                                    sizeof(direntry_t))) {
+                            fprintf(stderr, "Warning: tried to write to write-protected file\n");
+                            return -1;
+                        }
+                    }
+                }
+            }
+            i = mapping->end;
+        } else
+            i++;
     }
 
     /*
@@ -2920,14 +2920,14 @@ DLOG(checkpoint());
 DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors));
     ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors);
     if (ret < 0) {
-	fprintf(stderr, "Error writing to qcow backend\n");
-	return ret;
+        fprintf(stderr, "Error writing to qcow backend\n");
+        return ret;
     }
 
     for (i = sector2cluster(s, sector_num);
-	    i <= sector2cluster(s, sector_num + nb_sectors - 1); i++)
-	if (i >= 0)
-	    s->used_clusters[i] |= USED_ALLOCATED;
+            i <= sector2cluster(s, sector_num + nb_sectors - 1); i++)
+        if (i >= 0)
+            s->used_clusters[i] |= USED_ALLOCATED;
 
 DLOG(checkpoint());
     /* TODO: add timeout */
@@ -2966,7 +2966,7 @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }
 
 static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
-	int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
+        int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
 {
     *n = bs->total_sectors - sector_num;
     if (*n > nb_sectors) {
@@ -3145,13 +3145,13 @@ static void checkpoint(void) {
     assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY));
 #if 0
     if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf)
-	fprintf(stderr, "Nonono!\n");
+        fprintf(stderr, "Nonono!\n");
     mapping_t* mapping;
     direntry_t* direntry;
     assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next);
     assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next);
     if (vvv->mapping.next<47)
-	return;
+        return;
     assert((mapping = array_get(&(vvv->mapping), 47)));
     assert(mapping->dir_index < vvv->directory.next);
     direntry = array_get(&(vvv->directory), mapping->dir_index);

From 5f5b29dfce9676d9d013376800c65af773d8bd9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:11:55 +0200
Subject: [PATCH 07/40] vvfat: fix typos
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index e83b8bab5d..07a111fc42 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -403,9 +403,9 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs)
     /* FAT12/FAT16/FAT32 */
     /* DOS uses different types when partition is LBA,
        probably to prevent older versions from using CHS on them */
-    partition->fs_type= s->fat_type==12 ? 0x1:
-                        s->fat_type==16 ? (lba?0xe:0x06):
-                         /*fat_tyoe==32*/ (lba?0xc:0x0b);
+    partition->fs_type = s->fat_type == 12 ? 0x1 :
+                         s->fat_type == 16 ? (lba ? 0xe : 0x06) :
+                       /*s->fat_type == 32*/ (lba ? 0xc : 0x0b);
 
     real_mbr->magic[0]=0x55; real_mbr->magic[1]=0xaa;
 }
@@ -805,7 +805,7 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
                 (ROOT_ENTRIES - cur) * sizeof(direntry_t));
     }
 
-     /* reget the mapping, since s->mapping was possibly realloc()ed */
+    /* re-get the mapping, since s->mapping was possibly realloc()ed */
     mapping = array_get(&(s->mapping), mapping_index);
     first_cluster += (s->directory.next - mapping->info.dir.first_dir_index)
         * 0x20 / s->cluster_size;

From ad05b31857b8f21c5ea89ba3e470a58c288f1c36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:11:56 +0200
Subject: [PATCH 08/40] vvfat: rename useless enumeration values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MODE_FAKED and MODE_RENAMED are not and were never used.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 07a111fc42..18d9559533 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -286,8 +286,7 @@ typedef struct mapping_t {
     union {
         /* offset is
          * - the offset in the file (in clusters) for a file, or
-         * - the next cluster of the directory for a directory, and
-         * - the address of the buffer for a faked entry
+         * - the next cluster of the directory for a directory
          */
         struct {
             uint32_t offset;
@@ -300,9 +299,13 @@ typedef struct mapping_t {
     /* path contains the full path, i.e. it always starts with s->path */
     char* path;
 
-    enum { MODE_UNDEFINED = 0, MODE_NORMAL = 1, MODE_MODIFIED = 2,
-        MODE_DIRECTORY = 4, MODE_FAKED = 8,
-        MODE_DELETED = 16, MODE_RENAMED = 32 } mode;
+    enum {
+        MODE_UNDEFINED = 0,
+        MODE_NORMAL = 1,
+        MODE_MODIFIED = 2,
+        MODE_DIRECTORY = 4,
+        MODE_DELETED = 8,
+    } mode;
     int read_only;
 } mapping_t;
 

From 4dc705dc7e96704df6ae3ba22fc3a3f0d0db7608 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:11:57 +0200
Subject: [PATCH 09/40] vvfat: introduce offset_to_bootsector, offset_to_fat
 and offset_to_root_dir
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- offset_to_bootsector is the number of sectors up to FAT bootsector
- offset_to_fat is the number of sectors up to first File Allocation Table
- offset_to_root_dir is the number of sectors up to root directory sector

Replace first_sectors_number - 1 by offset_to_bootsector.
Replace first_sectors_number by offset_to_fat.
Replace faked_sectors by offset_to_rootdir.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 70 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 18d9559533..88d1879be4 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -319,22 +319,24 @@ static void print_mapping(const struct mapping_t* mapping);
 typedef struct BDRVVVFATState {
     CoMutex lock;
     BlockDriverState* bs; /* pointer to parent */
-    unsigned int first_sectors_number; /* 1 for a single partition, 0x40 for a disk with partition table */
     unsigned char first_sectors[0x40*0x200];
 
     int fat_type; /* 16 or 32 */
     array_t fat,directory,mapping;
     char volume_label[11];
 
+    uint32_t offset_to_bootsector; /* 0 for floppy, 0x3f for disk */
+
     unsigned int cluster_size;
     unsigned int sectors_per_cluster;
     unsigned int sectors_per_fat;
     unsigned int sectors_of_root_directory;
     uint32_t last_cluster_of_root_directory;
-    unsigned int faked_sectors; /* how many sectors are faked before file data */
     uint32_t sector_count; /* total number of sectors of the partition */
     uint32_t cluster_count; /* total number of clusters of this partition */
     uint32_t max_fat_value;
+    uint32_t offset_to_fat;
+    uint32_t offset_to_root_dir;
 
     int current_fd;
     mapping_t* current_mapping;
@@ -393,15 +395,15 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs)
     partition->attributes=0x80; /* bootable */
 
     /* LBA is used when partition is outside the CHS geometry */
-    lba  = sector2CHS(&partition->start_CHS, s->first_sectors_number - 1,
+    lba  = sector2CHS(&partition->start_CHS, s->offset_to_bootsector,
                      cyls, heads, secs);
     lba |= sector2CHS(&partition->end_CHS,   s->bs->total_sectors - 1,
                      cyls, heads, secs);
 
     /*LBA partitions are identified only by start/length_sector_long not by CHS*/
-    partition->start_sector_long  = cpu_to_le32(s->first_sectors_number - 1);
+    partition->start_sector_long  = cpu_to_le32(s->offset_to_bootsector);
     partition->length_sector_long = cpu_to_le32(s->bs->total_sectors
-                                                - s->first_sectors_number + 1);
+                                                - s->offset_to_bootsector);
 
     /* FAT12/FAT16/FAT32 */
     /* DOS uses different types when partition is LBA,
@@ -822,12 +824,12 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
 
 static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num)
 {
-    return (sector_num-s->faked_sectors)/s->sectors_per_cluster;
+    return (sector_num - s->offset_to_root_dir) / s->sectors_per_cluster;
 }
 
 static inline off_t cluster2sector(BDRVVVFATState* s, uint32_t cluster_num)
 {
-    return s->faked_sectors + s->sectors_per_cluster * cluster_num;
+    return s->offset_to_root_dir + s->sectors_per_cluster * cluster_num;
 }
 
 static int init_directories(BDRVVVFATState* s,
@@ -854,6 +856,9 @@ static int init_directories(BDRVVVFATState* s,
     i = 1+s->sectors_per_cluster*0x200*8/s->fat_type;
     s->sectors_per_fat=(s->sector_count+i)/i; /* round up */
 
+    s->offset_to_fat = s->offset_to_bootsector + 1;
+    s->offset_to_root_dir = s->offset_to_fat + s->sectors_per_fat * 2;
+
     array_init(&(s->mapping),sizeof(mapping_t));
     array_init(&(s->directory),sizeof(direntry_t));
 
@@ -867,7 +872,6 @@ static int init_directories(BDRVVVFATState* s,
     /* Now build FAT, and write back information into directory */
     init_fat(s);
 
-    s->faked_sectors=s->first_sectors_number+s->sectors_per_fat*2;
     s->cluster_count=sector2cluster(s, s->sector_count);
 
     mapping = array_get_next(&(s->mapping));
@@ -945,7 +949,8 @@ static int init_directories(BDRVVVFATState* s,
 
     s->current_mapping = NULL;
 
-    bootsector=(bootsector_t*)(s->first_sectors+(s->first_sectors_number-1)*0x200);
+    bootsector = (bootsector_t *)(s->first_sectors
+                                  + s->offset_to_bootsector * 0x200);
     bootsector->jump[0]=0xeb;
     bootsector->jump[1]=0x3e;
     bootsector->jump[2]=0x90;
@@ -956,16 +961,18 @@ static int init_directories(BDRVVVFATState* s,
     bootsector->number_of_fats=0x2; /* number of FATs */
     bootsector->root_entries=cpu_to_le16(s->sectors_of_root_directory*0x10);
     bootsector->total_sectors16=s->sector_count>0xffff?0:cpu_to_le16(s->sector_count);
-    bootsector->media_type=(s->first_sectors_number>1?0xf8:0xf0); /* media descriptor (f8=hd, f0=3.5 fd)*/
+    /* media descriptor: hard disk=0xf8, floppy=0xf0 */
+    bootsector->media_type = (s->offset_to_bootsector > 0 ? 0xf8 : 0xf0);
     s->fat.pointer[0] = bootsector->media_type;
     bootsector->sectors_per_fat=cpu_to_le16(s->sectors_per_fat);
     bootsector->sectors_per_track = cpu_to_le16(secs);
     bootsector->number_of_heads = cpu_to_le16(heads);
-    bootsector->hidden_sectors=cpu_to_le32(s->first_sectors_number==1?0:0x3f);
+    bootsector->hidden_sectors = cpu_to_le32(s->offset_to_bootsector);
     bootsector->total_sectors=cpu_to_le32(s->sector_count>0xffff?s->sector_count:0);
 
     /* LATER TODO: if FAT32, this is wrong */
-    bootsector->u.fat16.drive_number=s->first_sectors_number==1?0:0x80; /* fda=0, hda=0x80 */
+    /* drive_number: fda=0, hda=0x80 */
+    bootsector->u.fat16.drive_number = s->offset_to_bootsector == 0 ? 0 : 0x80;
     bootsector->u.fat16.current_head=0;
     bootsector->u.fat16.signature=0x29;
     bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd);
@@ -1122,7 +1129,6 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
             secs = s->fat_type == 12 ? 18 : 36;
             s->sectors_per_cluster = 1;
         }
-        s->first_sectors_number = 1;
         cyls = 80;
         heads = 2;
     } else {
@@ -1130,7 +1136,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
         if (!s->fat_type) {
             s->fat_type = 16;
         }
-        s->first_sectors_number = 0x40;
+        s->offset_to_bootsector = 0x3f;
         cyls = s->fat_type == 12 ? 64 : 1024;
         heads = 16;
         secs = 63;
@@ -1166,7 +1172,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
     fprintf(stderr, "vvfat %s chs %d,%d,%d\n",
             dirname, cyls, heads, secs);
 
-    s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
+    s->sector_count = cyls * heads * secs - s->offset_to_bootsector;
 
     if (qemu_opt_get_bool(opts, "rw", false)) {
         if (!bdrv_is_read_only(bs)) {
@@ -1196,7 +1202,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;
+    s->sector_count = s->offset_to_root_dir
+                    + s->sectors_per_cluster * s->cluster_count;
 
     /* Disable migration when vvfat is used rw */
     if (s->qcow) {
@@ -1212,7 +1219,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    if (s->first_sectors_number == 0x40) {
+    if (s->offset_to_bootsector > 0) {
         init_mbr(s, cyls, heads, secs);
     }
 
@@ -1425,15 +1432,24 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
             }
 DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
         }
-        if(sector_num<s->faked_sectors) {
-            if(sector_num<s->first_sectors_number)
-                memcpy(buf+i*0x200,&(s->first_sectors[sector_num*0x200]),0x200);
-            else if(sector_num-s->first_sectors_number<s->sectors_per_fat)
-                memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number)*0x200]),0x200);
-            else if(sector_num-s->first_sectors_number-s->sectors_per_fat<s->sectors_per_fat)
-                memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number-s->sectors_per_fat)*0x200]),0x200);
+        if (sector_num < s->offset_to_root_dir) {
+            if (sector_num < s->offset_to_fat) {
+                memcpy(buf + i * 0x200,
+                       &(s->first_sectors[sector_num * 0x200]),
+                       0x200);
+            } else if (sector_num < s->offset_to_fat + s->sectors_per_fat) {
+                memcpy(buf + i * 0x200,
+                       &(s->fat.pointer[(sector_num
+                                       - s->offset_to_fat) * 0x200]),
+                       0x200);
+            } else if (sector_num < s->offset_to_root_dir) {
+                memcpy(buf + i * 0x200,
+                       &(s->fat.pointer[(sector_num - s->offset_to_fat
+                                       - s->sectors_per_fat) * 0x200]),
+                       0x200);
+            }
         } else {
-            uint32_t sector=sector_num-s->faked_sectors,
+            uint32_t sector = sector_num - s->offset_to_root_dir,
             sector_offset_in_cluster=(sector%s->sectors_per_cluster),
             cluster_num=sector/s->sectors_per_cluster;
             if(cluster_num > s->cluster_count || read_cluster(s, cluster_num) != 0) {
@@ -2045,7 +2061,7 @@ DLOG(checkpoint());
         memcpy(s->fat2, s->fat.pointer, size);
     }
     check = vvfat_read(s->bs,
-            s->first_sectors_number, s->fat2, s->sectors_per_fat);
+            s->offset_to_fat, s->fat2, s->sectors_per_fat);
     if (check) {
         fprintf(stderr, "Could not copy fat\n");
         return 0;
@@ -2864,7 +2880,7 @@ DLOG(checkpoint());
      * - do not allow to write non-ASCII filenames
      */
 
-    if (sector_num < s->first_sectors_number)
+    if (sector_num < s->offset_to_fat)
         return -1;
 
     for (i = sector2cluster(s, sector_num);

From 92e28d82209b0ff49db801e9a56a400cc1f44967 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:11:58 +0200
Subject: [PATCH 10/40] vvfat: fix field names in FAT12/FAT16 and FAT32 boot
 sectors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Specification: "FAT: General overview of on-disk format" v1.03, pages 11-13
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 88d1879be4..676cacb76b 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -217,23 +217,30 @@ typedef struct bootsector_t {
     union {
         struct {
             uint8_t drive_number;
-            uint8_t current_head;
+            uint8_t reserved1;
             uint8_t signature;
             uint32_t id;
             uint8_t volume_label[11];
+            uint8_t fat_type[8];
+            uint8_t ignored[0x1c0];
         } QEMU_PACKED fat16;
         struct {
             uint32_t sectors_per_fat;
             uint16_t flags;
             uint8_t major,minor;
-            uint32_t first_cluster_of_root_directory;
+            uint32_t first_cluster_of_root_dir;
             uint16_t info_sector;
             uint16_t backup_boot_sector;
-            uint16_t ignored;
+            uint8_t reserved[12];
+            uint8_t drive_number;
+            uint8_t reserved1;
+            uint8_t signature;
+            uint32_t id;
+            uint8_t volume_label[11];
+            uint8_t fat_type[8];
+            uint8_t ignored[0x1a4];
         } QEMU_PACKED fat32;
     } u;
-    uint8_t fat_type[8];
-    uint8_t ignored[0x1c0];
     uint8_t magic[2];
 } QEMU_PACKED bootsector_t;
 
@@ -973,13 +980,13 @@ static int init_directories(BDRVVVFATState* s,
     /* LATER TODO: if FAT32, this is wrong */
     /* drive_number: fda=0, hda=0x80 */
     bootsector->u.fat16.drive_number = s->offset_to_bootsector == 0 ? 0 : 0x80;
-    bootsector->u.fat16.current_head=0;
     bootsector->u.fat16.signature=0x29;
     bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd);
 
     memcpy(bootsector->u.fat16.volume_label, s->volume_label,
            sizeof(bootsector->u.fat16.volume_label));
-    memcpy(bootsector->fat_type,(s->fat_type==12?"FAT12   ":s->fat_type==16?"FAT16   ":"FAT32   "),8);
+    memcpy(bootsector->u.fat16.fat_type,
+           s->fat_type == 12 ? "FAT12   " : "FAT16   ", 8);
     bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa;
 
     return 0;

From f82d92bb028a1d674bab4ccc7e6cde6c04956230 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:11:59 +0200
Subject: [PATCH 11/40] vvfat: always create . and .. entries at first and in
 that order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

readdir() doesn't always return . and .. entries at first and in that order.
This leads to not creating them at first in the directory, which raises some
errors on file system checking utilities like MS-DOS Scandisk.

Specification: "FAT: General overview of on-disk format" v1.03, page 25

Fixes: https://bugs.launchpad.net/qemu/+bug/1599539
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 676cacb76b..0c6d0f407f 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -727,6 +727,12 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
     i = mapping->info.dir.first_dir_index =
             first_cluster == 0 ? 0 : s->directory.next;
 
+    if (first_cluster != 0) {
+        /* create the top entries of a subdirectory */
+        (void)create_short_and_long_name(s, i, ".", 1);
+        (void)create_short_and_long_name(s, i, "..", 1);
+    }
+
     /* actually read the directory, and allocate the mappings */
     while((entry=readdir(dir))) {
         unsigned int length=strlen(dirname)+2+strlen(entry->d_name);
@@ -748,8 +754,11 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
         }
 
         /* create directory entry for this file */
-        direntry=create_short_and_long_name(s, i, entry->d_name,
-                is_dot || is_dotdot);
+        if (!is_dot && !is_dotdot) {
+            direntry = create_short_and_long_name(s, i, entry->d_name, 0);
+        } else {
+            direntry = array_get(&(s->directory), is_dot ? i : i + 1);
+        }
         direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20);
         direntry->reserved[0]=direntry->reserved[1]=0;
         direntry->ctime=fat_datetime(st.st_ctime,1);

From 09ec4119fb5a48f6783c23e275e698d977a11ca9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:12:00 +0200
Subject: [PATCH 12/40] vvfat: correctly create long names for non-ASCII
 filenames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Assume that input filename is encoded as UTF-8, so correctly create UTF-16 encoding.

Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 0c6d0f407f..c52c9ba914 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -424,28 +424,19 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs)
 
 /* direntry functions */
 
-/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next multiple of 26 */
-static inline int short2long_name(char* dest,const char* src)
+static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename)
 {
-    int i;
-    int len;
-    for(i=0;i<129 && src[i];i++) {
-        dest[2*i]=src[i];
-        dest[2*i+1]=0;
-    }
-    len=2*i;
-    dest[2*i]=dest[2*i+1]=0;
-    for(i=2*i+2;(i%26);i++)
-        dest[i]=0xff;
-    return len;
-}
+    int number_of_entries, i;
+    glong length;
+    direntry_t *entry;
 
-static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* filename)
-{
-    char buffer[258];
-    int length=short2long_name(buffer,filename),
-        number_of_entries=(length+25)/26,i;
-    direntry_t* entry;
+    gunichar2 *longname = g_utf8_to_utf16(filename, -1, NULL, &length, NULL);
+    if (!longname) {
+        fprintf(stderr, "vvfat: invalid UTF-8 name: %s\n", filename);
+        return NULL;
+    }
+
+    number_of_entries = (length * 2 + 25) / 26;
 
     for(i=0;i<number_of_entries;i++) {
         entry=array_get_next(&(s->directory));
@@ -460,8 +451,15 @@ static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* fil
         else if(offset<22) offset=14+offset-10;
         else offset=28+offset-22;
         entry=array_get(&(s->directory),s->directory.next-1-(i/26));
-        entry->name[offset]=buffer[i];
+        if (i >= 2 * length + 2) {
+            entry->name[offset] = 0xff;
+        } else if (i % 2 == 0) {
+            entry->name[offset] = longname[i / 2] & 0xff;
+        } else {
+            entry->name[offset] = longname[i / 2] >> 8;
+        }
     }
+    g_free(longname);
     return array_get(&(s->directory),s->directory.next-number_of_entries);
 }
 

From 0c36111f57ec2188f679e7fa810291b7386bdca1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:12:01 +0200
Subject: [PATCH 13/40] vvfat: correctly create base short names for non-ASCII
 filenames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

More specifically, create short name from filename and change blacklist of
invalid chars to whitelist of valid chars.

Windows 9x also now correctly see long file names of filenames containing a space,
but Scandisk still complains about mismatch between SFN and LFN.

[kwolf: Build fix for this intermediate patch (it included declarations
 for variables that are only used in the next patch) ]

Specification: "FAT: General overview of on-disk format" v1.03, pages 30-31
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 104 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 76 insertions(+), 28 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index c52c9ba914..2125ddb480 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -515,6 +515,80 @@ static void set_begin_of_direntry(direntry_t* direntry, uint32_t begin)
     direntry->begin_hi = cpu_to_le16((begin >> 16) & 0xffff);
 }
 
+static uint8_t to_valid_short_char(gunichar c)
+{
+    c = g_unichar_toupper(c);
+    if ((c >= '0' && c <= '9') ||
+        (c >= 'A' && c <= 'Z') ||
+        strchr("$%'-_@~`!(){}^#&", c) != 0) {
+        return c;
+    } else {
+        return 0;
+    }
+}
+
+static direntry_t *create_short_filename(BDRVVVFATState *s,
+                                         const char *filename)
+{
+    int j = 0;
+    direntry_t *entry = array_get_next(&(s->directory));
+    const gchar *p, *last_dot = NULL;
+    gunichar c;
+    bool lossy_conversion = false;
+
+    if (!entry) {
+        return NULL;
+    }
+    memset(entry->name, 0x20, sizeof(entry->name));
+
+    /* copy filename and search last dot */
+    for (p = filename; ; p = g_utf8_next_char(p)) {
+        c = g_utf8_get_char(p);
+        if (c == '\0') {
+            break;
+        } else if (c == '.') {
+            if (j == 0) {
+                /* '.' at start of filename */
+                lossy_conversion = true;
+            } else {
+                if (last_dot) {
+                    lossy_conversion = true;
+                }
+                last_dot = p;
+            }
+        } else if (!last_dot) {
+            /* first part of the name; copy it */
+            uint8_t v = to_valid_short_char(c);
+            if (j < 8 && v) {
+                entry->name[j++] = v;
+            } else {
+                lossy_conversion = true;
+            }
+        }
+    }
+
+    /* copy extension (if any) */
+    if (last_dot) {
+        j = 0;
+        for (p = g_utf8_next_char(last_dot); ; p = g_utf8_next_char(p)) {
+            c = g_utf8_get_char(p);
+            if (c == '\0') {
+                break;
+            } else {
+                /* extension; copy it */
+                uint8_t v = to_valid_short_char(c);
+                if (j < 3 && v) {
+                    entry->name[8 + (j++)] = v;
+                } else {
+                    lossy_conversion = true;
+                }
+            }
+        }
+    }
+    (void)lossy_conversion;
+    return entry;
+}
+
 /* fat functions */
 
 static inline uint8_t fat_chksum(const direntry_t* entry)
@@ -613,7 +687,7 @@ static inline void init_fat(BDRVVVFATState* s)
 static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
         unsigned int directory_start, const char* filename, int is_dot)
 {
-    int i,j,long_index=s->directory.next;
+    int long_index = s->directory.next;
     direntry_t* entry = NULL;
     direntry_t* entry_long = NULL;
 
@@ -625,33 +699,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
     }
 
     entry_long=create_long_filename(s,filename);
-
-    i = strlen(filename);
-    for(j = i - 1; j>0  && filename[j]!='.';j--);
-    if (j > 0)
-        i = (j > 8 ? 8 : j);
-    else if (i > 8)
-        i = 8;
-
-    entry=array_get_next(&(s->directory));
-    memset(entry->name, 0x20, sizeof(entry->name));
-    memcpy(entry->name, filename, i);
-
-    if (j > 0) {
-        for (i = 0; i < 3 && filename[j + 1 + i]; i++) {
-            entry->name[8 + i] = filename[j + 1 + i];
-        }
-    }
-
-    /* upcase & remove unwanted characters */
-    for(i=10;i>=0;i--) {
-        if(i==10 || i==7) for(;i>0 && entry->name[i]==' ';i--);
-        if(entry->name[i]<=' ' || entry->name[i]>0x7f
-                || strchr(".*?<>|\":/\\[];,+='",entry->name[i]))
-            entry->name[i]='_';
-        else if(entry->name[i]>='a' && entry->name[i]<='z')
-            entry->name[i]+='A'-'a';
-    }
+    entry = create_short_filename(s, filename);
 
     /* mangle duplicates */
     while(1) {

From 339cebcc019da9f0e9aa53ca0dac852853abc7f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:12:02 +0200
Subject: [PATCH 14/40] vvfat: correctly generate numeric-tail of short file
 names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

More specifically:
- try without numeric-tail only if LFN didn't have invalid short chars
- start at ~1 (instead of ~0)
- handle case if numeric tail is more than one char (ie > 10)

Windows 9x Scandisk doesn't see anymore mismatches between short file names and
long file names for non-ASCII filenames.

Specification: "FAT: General overview of on-disk format" v1.03, page 31
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 65 ++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 2125ddb480..62f411b98c 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -528,13 +528,15 @@ static uint8_t to_valid_short_char(gunichar c)
 }
 
 static direntry_t *create_short_filename(BDRVVVFATState *s,
-                                         const char *filename)
+                                         const char *filename,
+                                         unsigned int directory_start)
 {
-    int j = 0;
+    int i, j = 0;
     direntry_t *entry = array_get_next(&(s->directory));
     const gchar *p, *last_dot = NULL;
     gunichar c;
     bool lossy_conversion = false;
+    char tail[11];
 
     if (!entry) {
         return NULL;
@@ -585,8 +587,32 @@ static direntry_t *create_short_filename(BDRVVVFATState *s,
             }
         }
     }
-    (void)lossy_conversion;
-    return entry;
+
+    /* numeric-tail generation */
+    for (j = 0; j < 8; j++) {
+        if (entry->name[j] == ' ') {
+            break;
+        }
+    }
+    for (i = lossy_conversion ? 1 : 0; i < 999999; i++) {
+        direntry_t *entry1;
+        if (i > 0) {
+            int len = sprintf(tail, "~%d", i);
+            memcpy(entry->name + MIN(j, 8 - len), tail, len);
+        }
+        for (entry1 = array_get(&(s->directory), directory_start);
+             entry1 < entry; entry1++) {
+            if (!is_long_name(entry1) &&
+                !memcmp(entry1->name, entry->name, 11)) {
+                break; /* found dupe */
+            }
+        }
+        if (entry1 == entry) {
+            /* no dupe found */
+            return entry;
+        }
+    }
+    return NULL;
 }
 
 /* fat functions */
@@ -699,36 +725,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
     }
 
     entry_long=create_long_filename(s,filename);
-    entry = create_short_filename(s, filename);
-
-    /* mangle duplicates */
-    while(1) {
-        direntry_t* entry1=array_get(&(s->directory),directory_start);
-        int j;
-
-        for(;entry1<entry;entry1++)
-            if(!is_long_name(entry1) && !memcmp(entry1->name,entry->name,11))
-                break; /* found dupe */
-        if(entry1==entry) /* no dupe found */
-            break;
-
-        /* use all 8 characters of name */
-        if(entry->name[7]==' ') {
-            int j;
-            for(j=6;j>0 && entry->name[j]==' ';j--)
-                entry->name[j]='~';
-        }
-
-        /* increment number */
-        for(j=7;j>0 && entry->name[j]=='9';j--)
-            entry->name[j]='0';
-        if(j>0) {
-            if(entry->name[j]<'0' || entry->name[j]>'9')
-                entry->name[j]='0';
-            else
-                entry->name[j]++;
-        }
-    }
+    entry = create_short_filename(s, filename, directory_start);
 
     /* calculate checksum; propagate to long name */
     if(entry_long) {

From 6817efea3a0d1bf87be815970cdb014c5a64b628 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:12:03 +0200
Subject: [PATCH 15/40] vvfat: limit number of entries in root directory in
 FAT12/FAT16
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FAT12/FAT16 root directory is two sectors in size, which allows only 512 directory entries.
Prevent QEMU startup if too much files exist, instead of overflowing root directory.

Also introduce variable root_entries, which will be required for FAT32.

Fixes: https://bugs.launchpad.net/qemu/+bug/1599539/comments/4
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 62f411b98c..dc9af01167 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -337,8 +337,9 @@ typedef struct BDRVVVFATState {
     unsigned int cluster_size;
     unsigned int sectors_per_cluster;
     unsigned int sectors_per_fat;
-    unsigned int sectors_of_root_directory;
     uint32_t last_cluster_of_root_directory;
+    /* how many entries are available in root directory (0 for FAT32) */
+    uint16_t root_entries;
     uint32_t sector_count; /* total number of sectors of the partition */
     uint32_t cluster_count; /* total number of clusters of this partition */
     uint32_t max_fat_value;
@@ -785,6 +786,12 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
         int is_dot=!strcmp(entry->d_name,".");
         int is_dotdot=!strcmp(entry->d_name,"..");
 
+        if (first_cluster == 0 && s->directory.next >= s->root_entries - 1) {
+            fprintf(stderr, "Too many entries in root directory\n");
+            closedir(dir);
+            return -2;
+        }
+
         if(first_cluster == 0 && (is_dotdot || is_dot))
             continue;
 
@@ -858,15 +865,15 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
         memset(direntry,0,sizeof(direntry_t));
     }
 
-/* TODO: if there are more entries, bootsector has to be adjusted! */
-#define ROOT_ENTRIES (0x02 * 0x10 * s->sectors_per_cluster)
-    if (mapping_index == 0 && s->directory.next < ROOT_ENTRIES) {
+    if (s->fat_type != 32 &&
+        mapping_index == 0 &&
+        s->directory.next < s->root_entries) {
         /* root directory */
         int cur = s->directory.next;
-        array_ensure_allocated(&(s->directory), ROOT_ENTRIES - 1);
-        s->directory.next = ROOT_ENTRIES;
+        array_ensure_allocated(&(s->directory), s->root_entries - 1);
+        s->directory.next = s->root_entries;
         memset(array_get(&(s->directory), cur), 0,
-                (ROOT_ENTRIES - cur) * sizeof(direntry_t));
+                (s->root_entries - cur) * sizeof(direntry_t));
     }
 
     /* re-get the mapping, since s->mapping was possibly realloc()ed */
@@ -931,6 +938,8 @@ static int init_directories(BDRVVVFATState* s,
     /* Now build FAT, and write back information into directory */
     init_fat(s);
 
+    /* TODO: if there are more entries, bootsector has to be adjusted! */
+    s->root_entries = 0x02 * 0x10 * s->sectors_per_cluster;
     s->cluster_count=sector2cluster(s, s->sector_count);
 
     mapping = array_get_next(&(s->mapping));
@@ -999,7 +1008,6 @@ static int init_directories(BDRVVVFATState* s,
     }
 
     mapping = array_get(&(s->mapping), 0);
-    s->sectors_of_root_directory = mapping->end * s->sectors_per_cluster;
     s->last_cluster_of_root_directory = mapping->end;
 
     /* the FAT signature */
@@ -1018,7 +1026,7 @@ static int init_directories(BDRVVVFATState* s,
     bootsector->sectors_per_cluster=s->sectors_per_cluster;
     bootsector->reserved_sectors=cpu_to_le16(1);
     bootsector->number_of_fats=0x2; /* number of FATs */
-    bootsector->root_entries=cpu_to_le16(s->sectors_of_root_directory*0x10);
+    bootsector->root_entries = cpu_to_le16(s->root_entries);
     bootsector->total_sectors16=s->sector_count>0xffff?0:cpu_to_le16(s->sector_count);
     /* media descriptor: hard disk=0xf8, floppy=0xf0 */
     bootsector->media_type = (s->offset_to_bootsector > 0 ? 0xf8 : 0xf0);

From 78f002c901a89bebbf7337c1797e3d9e8249309c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:12:04 +0200
Subject: [PATCH 16/40] vvfat: handle KANJI lead byte 0xe5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Specification: "FAT: General overview of on-disk format" v1.03, page 23
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index dc9af01167..9cb48ef6ac 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -589,6 +589,10 @@ static direntry_t *create_short_filename(BDRVVVFATState *s,
         }
     }
 
+    if (entry->name[0] == 0xe5) {
+        entry->name[0] = 0x05;
+    }
+
     /* numeric-tail generation */
     for (j = 0; j < 8; j++) {
         if (entry->name[j] == ' ') {
@@ -709,8 +713,6 @@ static inline void init_fat(BDRVVVFATState* s)
 
 }
 
-/* TODO: in create_short_filename, 0xe5->0x05 is not yet handled! */
-/* TODO: in parse_short_filename, 0x05->0xe5 is not yet handled! */
 static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
         unsigned int directory_start, const char* filename, int is_dot)
 {
@@ -1743,6 +1745,9 @@ static int parse_short_name(BDRVVVFATState* s,
     } else
         lfn->name[i + j + 1] = '\0';
 
+    if (lfn->name[0] == 0x05) {
+        lfn->name[0] = 0xe5;
+    }
     lfn->len = strlen((char*)lfn->name);
 
     return 0;

From 8b544293ef855b41ea91d7bc361d5340e7fda902 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= <hpoussin@reactos.org>
Date: Mon, 22 May 2017 23:12:05 +0200
Subject: [PATCH 17/40] vvfat: change OEM name to 'MSWIN4.1'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to specification:
"'MSWIN4.1' is the recommanded setting, because it is the setting least likely
to cause compatibility problems. If you want to put something else in here,
that is your option, but the result may be that some FAT drivers might not
recognize the volume."

Specification: "FAT: General overview of on-disk format" v1.03, page 9
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vvfat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 9cb48ef6ac..f55104c338 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1023,7 +1023,7 @@ static int init_directories(BDRVVVFATState* s,
     bootsector->jump[0]=0xeb;
     bootsector->jump[1]=0x3e;
     bootsector->jump[2]=0x90;
-    memcpy(bootsector->name,"QEMU    ",8);
+    memcpy(bootsector->name, "MSWIN4.1", 8);
     bootsector->sector_size=cpu_to_le16(0x200);
     bootsector->sectors_per_cluster=s->sectors_per_cluster;
     bootsector->reserved_sectors=cpu_to_le16(1);

From 6b4df5483333fc6ad950aadba8194f04ff450197 Mon Sep 17 00:00:00 2001
From: "Daniel P. Berrange" <berrange@redhat.com>
Date: Tue, 4 Jul 2017 13:30:09 +0100
Subject: [PATCH 18/40] qemu-img: drop -e and -6 options from the 'create' &
 'convert' commands

The '-e' and '-6' options to the 'create' & 'convert' commands were
"deprecated" in favour of the more generic '-o' option many years ago:

  commit eec77d9e712bd4157a4e1c0b5a9249d168add738
  Author: Jes Sorensen <Jes.Sorensen@redhat.com>
  Date:   Tue Dec 7 17:44:34 2010 +0100

    qemu-img: Deprecate obsolete -6 and -e options

Except this was never actually a deprecation, which would imply giving
the user a warning while the functionality continues to work for a
number of releases before eventual removal. Instead the options were
immediately turned into an error + exit. Given that the functionality
is already broken, there's no point in keeping these psuedo-deprecation
messages around any longer.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-img.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 91ad6bebbf..c5f00db050 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -464,7 +464,7 @@ static int img_create(int argc, char **argv)
             {"object", required_argument, 0, OPTION_OBJECT},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":F:b:f:he6o:q",
+        c = getopt_long(argc, argv, ":F:b:f:ho:q",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -488,14 +488,6 @@ static int img_create(int argc, char **argv)
         case 'f':
             fmt = optarg;
             break;
-        case 'e':
-            error_report("option -e is deprecated, please use \'-o "
-                  "encryption\' instead!");
-            goto fail;
-        case '6':
-            error_report("option -6 is deprecated, please use \'-o "
-                  "compat6\' instead!");
-            goto fail;
         case 'o':
             if (!is_valid_option_list(optarg)) {
                 error_report("Invalid option list: %s", optarg);
@@ -1985,7 +1977,7 @@ static int img_convert(int argc, char **argv)
             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:WU",
+        c = getopt_long(argc, argv, ":hf:O:B:co:s:l:S:pt:T:qnm:WU",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -2012,14 +2004,6 @@ static int img_convert(int argc, char **argv)
         case 'c':
             s.compressed = true;
             break;
-        case 'e':
-            error_report("option -e is deprecated, please use \'-o "
-                  "encryption\' instead!");
-            goto fail_getopt;
-        case '6':
-            error_report("option -6 is deprecated, please use \'-o "
-                  "compat6\' instead!");
-            goto fail_getopt;
         case 'o':
             if (!is_valid_option_list(optarg)) {
                 error_report("Invalid option list: %s", optarg);

From c616f16e0c9a2d0b2f13785d37ca0f18d54d571f Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 12 May 2017 12:33:49 +0200
Subject: [PATCH 19/40] blockdev: Print a warning for legacy drive options that
 belong to -device

We likely do not want to carry these legacy -drive options along forever.
Let's emit a deprecation warning for the -drive options that have a
replacement with the -device option, so that the (hopefully few) remaining
users are aware of this and can adapt their scripts / behaviour accordingly.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c      | 14 ++++++++++++++
 qemu-options.hx |  9 +++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index f92dcf24bf..e2016b6f37 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -50,6 +50,7 @@
 #include "qmp-commands.h"
 #include "block/trace.h"
 #include "sysemu/arch_init.h"
+#include "sysemu/qtest.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
 #include "qemu/throttle-options.h"
@@ -798,6 +799,9 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     const char *filename;
     Error *local_err = NULL;
     int i;
+    const char *deprecated[] = {
+        "serial", "trans", "secs", "heads", "cyls", "addr"
+    };
 
     /* Change legacy command line options into QMP ones */
     static const struct {
@@ -881,6 +885,16 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
                 "update your scripts.\n");
     }
 
+    /* Other deprecated options */
+    if (!qtest_enabled()) {
+        for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
+            if (qemu_opt_get(legacy_opts, deprecated[i]) != NULL) {
+                error_report("'%s' is deprecated, please use the corresponding "
+                             "option of '-device' instead", deprecated[i]);
+            }
+        }
+    }
+
     /* Media type */
     value = qemu_opt_get(legacy_opts, "media");
     if (value) {
diff --git a/qemu-options.hx b/qemu-options.hx
index 297bd8aca4..ddab656eb3 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -818,6 +818,8 @@ of available connectors of a given interface type.
 This option defines the type of the media: disk or cdrom.
 @item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}]
 These options have the same definition as they have in @option{-hdachs}.
+These parameters are deprecated, use the corresponding parameters
+of @code{-device} instead.
 @item snapshot=@var{snapshot}
 @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
 (see @option{-snapshot}).
@@ -852,9 +854,12 @@ Specify which disk @var{format} will be used rather than detecting
 the format.  Can be used to specify format=raw to avoid interpreting
 an untrusted format header.
 @item serial=@var{serial}
-This option specifies the serial number to assign to the device.
+This option specifies the serial number to assign to the device. This
+parameter is deprecated, use the corresponding parameter of @code{-device}
+instead.
 @item addr=@var{addr}
-Specify the controller's PCI address (if=virtio only).
+Specify the controller's PCI address (if=virtio only). This parameter is
+deprecated, use the corresponding parameter of @code{-device} instead.
 @item werror=@var{action},rerror=@var{action}
 Specify which @var{action} to take on write and read errors. Valid actions are:
 "ignore" (ignore the error and try to continue), "stop" (pause QEMU),

From f3e4ce4af336f2ea306fa0f40ec1a5149864ca8c Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:39 -0500
Subject: [PATCH 20/40] blockjob: Track job ratelimits via bytes, not sectors

The user interface specifies job rate limits in bytes/second.
It's pointless to have our internal representation track things
in sectors/second, particularly since we want to move away from
sector-based interfaces.

Fix up a doc typo found while verifying that the ratelimit
code handles the scaling difference.

Repetition of expressions like 'n * BDRV_SECTOR_SIZE' will be
cleaned up later when functions are converted to iterate over
images by bytes rather than by sectors.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/backup.c           |  5 +++--
 block/commit.c           |  5 +++--
 block/mirror.c           | 13 +++++++------
 block/stream.c           |  5 +++--
 include/qemu/ratelimit.h |  3 ++-
 5 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 5387fbd84e..9ca1d8ec88 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -208,7 +208,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
         error_setg(errp, QERR_INVALID_PARAMETER, "speed");
         return;
     }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
 }
 
 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -359,7 +359,8 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
      */
     if (job->common.speed) {
         uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
-                                                      job->sectors_read);
+                                                      job->sectors_read *
+                                                      BDRV_SECTOR_SIZE);
         job->sectors_read = 0;
         block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
     } else {
diff --git a/block/commit.c b/block/commit.c
index 524bd54946..6993994a13 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -209,7 +209,8 @@ static void coroutine_fn commit_run(void *opaque)
         s->common.offset += n * BDRV_SECTOR_SIZE;
 
         if (copy && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, n);
+            delay_ns = ratelimit_calculate_delay(&s->limit,
+                                                 n * BDRV_SECTOR_SIZE);
         }
     }
 
@@ -231,7 +232,7 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
         error_setg(errp, QERR_INVALID_PARAMETER, "speed");
         return;
     }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
 }
 
 static const BlockJobDriver commit_job_driver = {
diff --git a/block/mirror.c b/block/mirror.c
index 61a862dcf3..eb27efc62f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -396,7 +396,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
     while (nb_chunks > 0 && sector_num < end) {
         int64_t ret;
-        int io_sectors, io_sectors_acct;
+        int io_sectors;
+        int64_t io_bytes_acct;
         BlockDriverState *file;
         enum MirrorMethod {
             MIRROR_METHOD_COPY,
@@ -444,16 +445,16 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         switch (mirror_method) {
         case MIRROR_METHOD_COPY:
             io_sectors = mirror_do_read(s, sector_num, io_sectors);
-            io_sectors_acct = io_sectors;
+            io_bytes_acct = io_sectors * BDRV_SECTOR_SIZE;
             break;
         case MIRROR_METHOD_ZERO:
         case MIRROR_METHOD_DISCARD:
             mirror_do_zero_or_discard(s, sector_num, io_sectors,
                                       mirror_method == MIRROR_METHOD_DISCARD);
             if (write_zeroes_ok) {
-                io_sectors_acct = 0;
+                io_bytes_acct = 0;
             } else {
-                io_sectors_acct = io_sectors;
+                io_bytes_acct = io_sectors * BDRV_SECTOR_SIZE;
             }
             break;
         default:
@@ -463,7 +464,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         sector_num += io_sectors;
         nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
         if (s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
+            delay_ns = ratelimit_calculate_delay(&s->limit, io_bytes_acct);
         }
     }
     return delay_ns;
@@ -929,7 +930,7 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
         error_setg(errp, QERR_INVALID_PARAMETER, "speed");
         return;
     }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
 }
 
 static void mirror_complete(BlockJob *job, Error **errp)
diff --git a/block/stream.c b/block/stream.c
index 52d329f5c6..29273a5d23 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -191,7 +191,8 @@ static void coroutine_fn stream_run(void *opaque)
         /* Publish progress */
         s->common.offset += n * BDRV_SECTOR_SIZE;
         if (copy && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, n);
+            delay_ns = ratelimit_calculate_delay(&s->limit,
+                                                 n * BDRV_SECTOR_SIZE);
         }
     }
 
@@ -220,7 +221,7 @@ static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
         error_setg(errp, QERR_INVALID_PARAMETER, "speed");
         return;
     }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
 }
 
 static const BlockJobDriver stream_job_driver = {
diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h
index 8da1232574..8dece483f5 100644
--- a/include/qemu/ratelimit.h
+++ b/include/qemu/ratelimit.h
@@ -24,7 +24,8 @@ typedef struct {
 
 /** Calculate and return delay for next request in ns
  *
- * Record that we sent @p n data units. If we may send more data units
+ * Record that we sent @n data units (where @n matches the scale chosen
+ * during ratelimit_set_speed). If we may send more data units
  * in the current time slice, return 0 (i.e. no delay). Otherwise
  * return the amount of time (in ns) until the start of the next time
  * slice that will permit sending the next chunk of data.

From 5cb1a49e01db12a38790c982aafde94977f7b0ee Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:40 -0500
Subject: [PATCH 21/40] trace: Show blockjob actions via bytes, not sectors

Upcoming patches are going to switch to byte-based interfaces
instead of sector-based.  Even worse, trace_backup_do_cow_enter()
had a weird mix of cluster and sector indices.

The trace interface is low enough that there are no stability
guarantees, and therefore nothing wrong with changing our units,
even in cases like trace_backup_do_cow_skip() where we are not
changing the trace output.  So make the tracing uniformly use
bytes.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/backup.c     | 16 ++++++++++------
 block/commit.c     |  3 ++-
 block/mirror.c     | 26 +++++++++++++++++---------
 block/stream.c     |  3 ++-
 block/trace-events | 14 +++++++-------
 5 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 9ca1d8ec88..06431ac79e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -102,6 +102,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
     void *bounce_buffer = NULL;
     int ret = 0;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
+    int64_t bytes_per_cluster = sectors_per_cluster * BDRV_SECTOR_SIZE;
     int64_t start, end;
     int n;
 
@@ -110,18 +111,20 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
     start = sector_num / sectors_per_cluster;
     end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
 
-    trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
+    trace_backup_do_cow_enter(job, start * bytes_per_cluster,
+                              sector_num * BDRV_SECTOR_SIZE,
+                              nb_sectors * BDRV_SECTOR_SIZE);
 
     wait_for_overlapping_requests(job, start, end);
     cow_request_begin(&cow_request, job, start, end);
 
     for (; start < end; start++) {
         if (test_bit(start, job->done_bitmap)) {
-            trace_backup_do_cow_skip(job, start);
+            trace_backup_do_cow_skip(job, start * bytes_per_cluster);
             continue; /* already copied */
         }
 
-        trace_backup_do_cow_process(job, start);
+        trace_backup_do_cow_process(job, start * bytes_per_cluster);
 
         n = MIN(sectors_per_cluster,
                 job->common.len / BDRV_SECTOR_SIZE -
@@ -138,7 +141,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                             bounce_qiov.size, &bounce_qiov,
                             is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
         if (ret < 0) {
-            trace_backup_do_cow_read_fail(job, start, ret);
+            trace_backup_do_cow_read_fail(job, start * bytes_per_cluster, ret);
             if (error_is_read) {
                 *error_is_read = true;
             }
@@ -154,7 +157,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                                  job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
         }
         if (ret < 0) {
-            trace_backup_do_cow_write_fail(job, start, ret);
+            trace_backup_do_cow_write_fail(job, start * bytes_per_cluster, ret);
             if (error_is_read) {
                 *error_is_read = false;
             }
@@ -177,7 +180,8 @@ out:
 
     cow_request_end(&cow_request);
 
-    trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
+    trace_backup_do_cow_return(job, sector_num * BDRV_SECTOR_SIZE,
+                               nb_sectors * BDRV_SECTOR_SIZE, ret);
 
     qemu_co_rwlock_unlock(&job->flush_rwlock);
 
diff --git a/block/commit.c b/block/commit.c
index 6993994a13..4cda7f2425 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -190,7 +190,8 @@ static void coroutine_fn commit_run(void *opaque)
                                       COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
                                       &n);
         copy = (ret == 1);
-        trace_commit_one_iteration(s, sector_num, n, ret);
+        trace_commit_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
+                                   n * BDRV_SECTOR_SIZE, ret);
         if (copy) {
             ret = commit_populate(s->top, s->base, sector_num, n, buf);
             bytes_written += n * BDRV_SECTOR_SIZE;
diff --git a/block/mirror.c b/block/mirror.c
index eb27efc62f..b4dfe952de 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -103,7 +103,8 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
     int64_t chunk_num;
     int i, nb_chunks, sectors_per_chunk;
 
-    trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
+    trace_mirror_iteration_done(s, op->sector_num * BDRV_SECTOR_SIZE,
+                                op->nb_sectors * BDRV_SECTOR_SIZE, ret);
 
     s->in_flight--;
     s->sectors_in_flight -= op->nb_sectors;
@@ -268,7 +269,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
 
     while (s->buf_free_count < nb_chunks) {
-        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+        trace_mirror_yield_in_flight(s, sector_num * BDRV_SECTOR_SIZE,
+                                     s->in_flight);
         mirror_wait_for_io(s);
     }
 
@@ -294,7 +296,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     /* Copy the dirty cluster.  */
     s->in_flight++;
     s->sectors_in_flight += nb_sectors;
-    trace_mirror_one_iteration(s, sector_num, nb_sectors);
+    trace_mirror_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
+                               nb_sectors * BDRV_SECTOR_SIZE);
 
     blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0,
                    mirror_read_complete, op);
@@ -347,14 +350,16 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     if (sector_num < 0) {
         bdrv_set_dirty_iter(s->dbi, 0);
         sector_num = bdrv_dirty_iter_next(s->dbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) *
+                                  BDRV_SECTOR_SIZE);
         assert(sector_num >= 0);
     }
     bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
 
     first_chunk = sector_num / sectors_per_chunk;
     while (test_bit(first_chunk, s->in_flight_bitmap)) {
-        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+        trace_mirror_yield_in_flight(s, sector_num * BDRV_SECTOR_SIZE,
+                                     s->in_flight);
         mirror_wait_for_io(s);
     }
 
@@ -433,7 +438,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         }
 
         while (s->in_flight >= MAX_IN_FLIGHT) {
-            trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+            trace_mirror_yield_in_flight(s, sector_num * BDRV_SECTOR_SIZE,
+                                         s->in_flight);
             mirror_wait_for_io(s);
         }
 
@@ -823,7 +829,8 @@ static void coroutine_fn mirror_run(void *opaque)
             s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
             if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
-                trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
+                trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE,
+                                   s->buf_free_count, s->in_flight);
                 mirror_wait_for_io(s);
                 continue;
             } else if (cnt != 0) {
@@ -864,7 +871,7 @@ static void coroutine_fn mirror_run(void *opaque)
              * whether to switch to target check one last time if I/O has
              * come in the meanwhile, and if not flush the data to disk.
              */
-            trace_mirror_before_drain(s, cnt);
+            trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE);
 
             bdrv_drained_begin(bs);
             cnt = bdrv_get_dirty_count(s->dirty_bitmap);
@@ -883,7 +890,8 @@ static void coroutine_fn mirror_run(void *opaque)
         }
 
         ret = 0;
-        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
+        trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE,
+                                  s->synced, delay_ns);
         if (!s->synced) {
             block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
             if (block_job_is_cancelled(&s->common)) {
diff --git a/block/stream.c b/block/stream.c
index 29273a5d23..6cb393972b 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -168,7 +168,8 @@ static void coroutine_fn stream_run(void *opaque)
 
             copy = (ret == 1);
         }
-        trace_stream_one_iteration(s, sector_num, n, ret);
+        trace_stream_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
+                                   n * BDRV_SECTOR_SIZE, ret);
         if (copy) {
             ret = stream_populate(blk, sector_num, n, buf);
         }
diff --git a/block/trace-events b/block/trace-events
index 752de6a054..4a4df25323 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -15,11 +15,11 @@ bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p off
 bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
 
 # block/stream.c
-stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
 stream_start(void *bs, void *base, void *s) "bs %p base %p s %p"
 
 # block/commit.c
-commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
 commit_start(void *bs, void *base, void *top, void *s) "bs %p base %p top %p s %p"
 
 # block/mirror.c
@@ -28,14 +28,14 @@ mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_sleep(void *s, int64_t cnt, int synced, uint64_t delay_ns) "s %p dirty count %"PRId64" synced %d delay %"PRIu64"ns"
-mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
-mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
+mirror_one_iteration(void *s, int64_t offset, uint64_t bytes) "s %p offset %" PRId64 " bytes %" PRIu64
+mirror_iteration_done(void *s, int64_t offset, uint64_t bytes, int ret) "s %p offset %" PRId64 " bytes %" PRIu64 " ret %d"
 mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
-mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_in_flight(void *s, int64_t offset, int in_flight) "s %p offset %" PRId64 " in_flight %d"
 
 # block/backup.c
-backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
-backup_do_cow_return(void *job, int64_t sector_num, int nb_sectors, int ret) "job %p sector_num %"PRId64" nb_sectors %d ret %d"
+backup_do_cow_enter(void *job, int64_t start, int64_t offset, uint64_t bytes) "job %p start %" PRId64 " offset %" PRId64 " bytes %" PRIu64
+backup_do_cow_return(void *job, int64_t offset, uint64_t bytes, int ret) "job %p offset %" PRId64 " bytes %" PRIu64 " ret %d"
 backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"

From 8493211c02bbc74d1ae051422f90bbfd20debb5e Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:41 -0500
Subject: [PATCH 22/40] stream: Switch stream_populate() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Start by converting an
internal function (no semantic change).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/stream.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 6cb393972b..746d525cac 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -41,20 +41,20 @@ typedef struct StreamBlockJob {
 } StreamBlockJob;
 
 static int coroutine_fn stream_populate(BlockBackend *blk,
-                                        int64_t sector_num, int nb_sectors,
+                                        int64_t offset, uint64_t bytes,
                                         void *buf)
 {
     struct iovec iov = {
         .iov_base = buf,
-        .iov_len  = nb_sectors * BDRV_SECTOR_SIZE,
+        .iov_len  = bytes,
     };
     QEMUIOVector qiov;
 
+    assert(bytes < SIZE_MAX);
     qemu_iovec_init_external(&qiov, &iov, 1);
 
     /* Copy-on-read the unallocated clusters */
-    return blk_co_preadv(blk, sector_num * BDRV_SECTOR_SIZE, qiov.size, &qiov,
-                         BDRV_REQ_COPY_ON_READ);
+    return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
 }
 
 typedef struct {
@@ -171,7 +171,8 @@ static void coroutine_fn stream_run(void *opaque)
         trace_stream_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
                                    n * BDRV_SECTOR_SIZE, ret);
         if (copy) {
-            ret = stream_populate(blk, sector_num, n, buf);
+            ret = stream_populate(blk, sector_num * BDRV_SECTOR_SIZE,
+                                  n * BDRV_SECTOR_SIZE, buf);
         }
         if (ret < 0) {
             BlockErrorAction action =

From 158c6492571c82c5632070c7ccee36b3dffd3ca9 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:42 -0500
Subject: [PATCH 23/40] stream: Drop reached_end for stream_complete()

stream_complete() skips the work of rewriting the backing file if
the job was cancelled, if data->reached_end is false, or if there
was an error detected (non-zero data->ret) during the streaming.
But note that in stream_run(), data->reached_end is only set if the
loop ran to completion, and data->ret is only 0 in two cases:
either the loop ran to completion (possibly by cancellation, but
stream_complete checks for that), or we took an early goto out
because there is no bs->backing.  Thus, we can preserve the same
semantics without the use of reached_end, by merely checking for
bs->backing (and logically, if there was no backing file, streaming
is a no-op, so there is no backing file to rewrite).

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/stream.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 746d525cac..12f1659942 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -59,7 +59,6 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 
 typedef struct {
     int ret;
-    bool reached_end;
 } StreamCompleteData;
 
 static void stream_complete(BlockJob *job, void *opaque)
@@ -70,7 +69,7 @@ static void stream_complete(BlockJob *job, void *opaque)
     BlockDriverState *base = s->base;
     Error *local_err = NULL;
 
-    if (!block_job_is_cancelled(&s->common) && data->reached_end &&
+    if (!block_job_is_cancelled(&s->common) && bs->backing &&
         data->ret == 0) {
         const char *base_id = NULL, *base_fmt = NULL;
         if (base) {
@@ -211,7 +210,6 @@ out:
     /* Modify backing chain and close BDSes in main loop */
     data = g_malloc(sizeof(*data));
     data->ret = ret;
-    data->reached_end = sector_num == end;
     block_job_defer_to_main_loop(&s->common, stream_complete, data);
 }
 

From d535435f4a3968a897803d38bf1642f3b644979a Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:43 -0500
Subject: [PATCH 24/40] stream: Switch stream_run() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of streaming to track by bytes instead of sectors
(although we are still guaranteed that we iterate by steps that
are sector-aligned).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/stream.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 12f1659942..e3dd2ac3c8 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -107,12 +107,11 @@ static void coroutine_fn stream_run(void *opaque)
     BlockBackend *blk = s->common.blk;
     BlockDriverState *bs = blk_bs(blk);
     BlockDriverState *base = s->base;
-    int64_t sector_num = 0;
-    int64_t end = -1;
+    int64_t offset = 0;
     uint64_t delay_ns = 0;
     int error = 0;
     int ret = 0;
-    int n = 0;
+    int n = 0; /* sectors */
     void *buf;
 
     if (!bs->backing) {
@@ -125,7 +124,6 @@ static void coroutine_fn stream_run(void *opaque)
         goto out;
     }
 
-    end = s->common.len >> BDRV_SECTOR_BITS;
     buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);
 
     /* Turn on copy-on-read for the whole block device so that guest read
@@ -137,7 +135,7 @@ static void coroutine_fn stream_run(void *opaque)
         bdrv_enable_copy_on_read(bs);
     }
 
-    for (sector_num = 0; sector_num < end; sector_num += n) {
+    for ( ; offset < s->common.len; offset += n * BDRV_SECTOR_SIZE) {
         bool copy;
 
         /* Note that even when no rate limit is applied we need to yield
@@ -150,28 +148,26 @@ static void coroutine_fn stream_run(void *opaque)
 
         copy = false;
 
-        ret = bdrv_is_allocated(bs, sector_num,
+        ret = bdrv_is_allocated(bs, offset / BDRV_SECTOR_SIZE,
                                 STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
         if (ret == 1) {
             /* Allocated in the top, no need to copy.  */
         } else if (ret >= 0) {
             /* Copy if allocated in the intermediate images.  Limit to the
-             * known-unallocated area [sector_num, sector_num+n).  */
+             * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
             ret = bdrv_is_allocated_above(backing_bs(bs), base,
-                                          sector_num, n, &n);
+                                          offset / BDRV_SECTOR_SIZE, n, &n);
 
             /* Finish early if end of backing file has been reached */
             if (ret == 0 && n == 0) {
-                n = end - sector_num;
+                n = (s->common.len - offset) / BDRV_SECTOR_SIZE;
             }
 
             copy = (ret == 1);
         }
-        trace_stream_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
-                                   n * BDRV_SECTOR_SIZE, ret);
+        trace_stream_one_iteration(s, offset, n * BDRV_SECTOR_SIZE, ret);
         if (copy) {
-            ret = stream_populate(blk, sector_num * BDRV_SECTOR_SIZE,
-                                  n * BDRV_SECTOR_SIZE, buf);
+            ret = stream_populate(blk, offset, n * BDRV_SECTOR_SIZE, buf);
         }
         if (ret < 0) {
             BlockErrorAction action =

From d8a9858408bcc57a3f4934c5082dd4dddebae583 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:44 -0500
Subject: [PATCH 25/40] commit: Switch commit_populate() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Start by converting an
internal function (no semantic change).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/commit.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 4cda7f2425..6f67d78ed9 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -47,26 +47,25 @@ typedef struct CommitBlockJob {
 } CommitBlockJob;
 
 static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
-                                        int64_t sector_num, int nb_sectors,
+                                        int64_t offset, uint64_t bytes,
                                         void *buf)
 {
     int ret = 0;
     QEMUIOVector qiov;
     struct iovec iov = {
         .iov_base = buf,
-        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+        .iov_len = bytes,
     };
 
+    assert(bytes < SIZE_MAX);
     qemu_iovec_init_external(&qiov, &iov, 1);
 
-    ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
-                        qiov.size, &qiov, 0);
+    ret = blk_co_preadv(bs, offset, qiov.size, &qiov, 0);
     if (ret < 0) {
         return ret;
     }
 
-    ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
-                         qiov.size, &qiov, 0);
+    ret = blk_co_pwritev(base, offset, qiov.size, &qiov, 0);
     if (ret < 0) {
         return ret;
     }
@@ -193,7 +192,9 @@ static void coroutine_fn commit_run(void *opaque)
         trace_commit_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
                                    n * BDRV_SECTOR_SIZE, ret);
         if (copy) {
-            ret = commit_populate(s->top, s->base, sector_num, n, buf);
+            ret = commit_populate(s->top, s->base,
+                                  sector_num * BDRV_SECTOR_SIZE,
+                                  n * BDRV_SECTOR_SIZE, buf);
             bytes_written += n * BDRV_SECTOR_SIZE;
         }
         if (ret < 0) {

From 317a6676a279bf71c881992387f75e16e009259c Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:45 -0500
Subject: [PATCH 26/40] commit: Switch commit_run() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of committing to track by bytes instead of sectors
(although we are still guaranteed that we iterate by steps that
are sector-aligned).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/commit.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 6f67d78ed9..c3a7bca20c 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -143,17 +143,16 @@ static void coroutine_fn commit_run(void *opaque)
 {
     CommitBlockJob *s = opaque;
     CommitCompleteData *data;
-    int64_t sector_num, end;
+    int64_t offset;
     uint64_t delay_ns = 0;
     int ret = 0;
-    int n = 0;
+    int n = 0; /* sectors */
     void *buf = NULL;
     int bytes_written = 0;
     int64_t base_len;
 
     ret = s->common.len = blk_getlength(s->top);
 
-
     if (s->common.len < 0) {
         goto out;
     }
@@ -170,10 +169,9 @@ static void coroutine_fn commit_run(void *opaque)
         }
     }
 
-    end = s->common.len >> BDRV_SECTOR_BITS;
     buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
 
-    for (sector_num = 0; sector_num < end; sector_num += n) {
+    for (offset = 0; offset < s->common.len; offset += n * BDRV_SECTOR_SIZE) {
         bool copy;
 
         /* Note that even when no rate limit is applied we need to yield
@@ -185,15 +183,13 @@ static void coroutine_fn commit_run(void *opaque)
         }
         /* Copy if allocated above the base */
         ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
-                                      sector_num,
+                                      offset / BDRV_SECTOR_SIZE,
                                       COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
                                       &n);
         copy = (ret == 1);
-        trace_commit_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
-                                   n * BDRV_SECTOR_SIZE, ret);
+        trace_commit_one_iteration(s, offset, n * BDRV_SECTOR_SIZE, ret);
         if (copy) {
-            ret = commit_populate(s->top, s->base,
-                                  sector_num * BDRV_SECTOR_SIZE,
+            ret = commit_populate(s->top, s->base, offset,
                                   n * BDRV_SECTOR_SIZE, buf);
             bytes_written += n * BDRV_SECTOR_SIZE;
         }

From b436982f04fb33bb29fcdea190bd1fdc97dc65ef Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:46 -0500
Subject: [PATCH 27/40] mirror: Switch MirrorBlockJob to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Continue by converting an
internal structure (no semantic change), and all references to the
buffer size.

Add an assertion that our use of s->granularity >> BDRV_SECTOR_BITS
(necessary for interaction with sector-based dirty bitmaps, until
a later patch converts those to be byte-based) does not suffer from
truncation problems.

[checkpatch has a false positive on use of MIN() in this patch]

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/mirror.c | 84 +++++++++++++++++++++++++-------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index b4dfe952de..10f4e9b494 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -24,9 +24,8 @@
 
 #define SLICE_TIME    100000000ULL /* ns */
 #define MAX_IN_FLIGHT 16
-#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
-#define DEFAULT_MIRROR_BUF_SIZE \
-    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
+#define MAX_IO_BYTES (1 << 20) /* 1 Mb */
+#define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES)
 
 /* The mirroring buffer is a list of granularity-sized chunks.
  * Free chunks are organized in a list.
@@ -67,11 +66,11 @@ typedef struct MirrorBlockJob {
     uint64_t last_pause_ns;
     unsigned long *in_flight_bitmap;
     int in_flight;
-    int64_t sectors_in_flight;
+    int64_t bytes_in_flight;
     int ret;
     bool unmap;
     bool waiting_for_io;
-    int target_cluster_sectors;
+    int target_cluster_size;
     int max_iov;
     bool initial_zeroing_ongoing;
 } MirrorBlockJob;
@@ -79,8 +78,8 @@ typedef struct MirrorBlockJob {
 typedef struct MirrorOp {
     MirrorBlockJob *s;
     QEMUIOVector qiov;
-    int64_t sector_num;
-    int nb_sectors;
+    int64_t offset;
+    uint64_t bytes;
 } MirrorOp;
 
 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
@@ -101,13 +100,12 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
     MirrorBlockJob *s = op->s;
     struct iovec *iov;
     int64_t chunk_num;
-    int i, nb_chunks, sectors_per_chunk;
+    int i, nb_chunks;
 
-    trace_mirror_iteration_done(s, op->sector_num * BDRV_SECTOR_SIZE,
-                                op->nb_sectors * BDRV_SECTOR_SIZE, ret);
+    trace_mirror_iteration_done(s, op->offset, op->bytes, ret);
 
     s->in_flight--;
-    s->sectors_in_flight -= op->nb_sectors;
+    s->bytes_in_flight -= op->bytes;
     iov = op->qiov.iov;
     for (i = 0; i < op->qiov.niov; i++) {
         MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
@@ -115,16 +113,15 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
         s->buf_free_count++;
     }
 
-    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    chunk_num = op->sector_num / sectors_per_chunk;
-    nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk);
+    chunk_num = op->offset / s->granularity;
+    nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
     bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
     if (ret >= 0) {
         if (s->cow_bitmap) {
             bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
         }
         if (!s->initial_zeroing_ongoing) {
-            s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+            s->common.offset += op->bytes;
         }
     }
     qemu_iovec_destroy(&op->qiov);
@@ -144,7 +141,8 @@ static void mirror_write_complete(void *opaque, int ret)
     if (ret < 0) {
         BlockErrorAction action;
 
-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
+                              op->bytes >> BDRV_SECTOR_BITS);
         action = mirror_error_action(s, false, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
             s->ret = ret;
@@ -163,7 +161,8 @@ static void mirror_read_complete(void *opaque, int ret)
     if (ret < 0) {
         BlockErrorAction action;
 
-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
+                              op->bytes >> BDRV_SECTOR_BITS);
         action = mirror_error_action(s, true, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
             s->ret = ret;
@@ -171,7 +170,7 @@ static void mirror_read_complete(void *opaque, int ret)
 
         mirror_iteration_done(op, ret);
     } else {
-        blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+        blk_aio_pwritev(s->target, op->offset, &op->qiov,
                         0, mirror_write_complete, op);
     }
     aio_context_release(blk_get_aio_context(s->common.blk));
@@ -211,7 +210,8 @@ static int mirror_cow_align(MirrorBlockJob *s,
         align_nb_sectors = max_sectors;
         if (need_cow) {
             align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
-                                               s->target_cluster_sectors);
+                                               s->target_cluster_size >>
+                                               BDRV_SECTOR_BITS);
         }
     }
     /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
@@ -277,8 +277,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     /* Allocate a MirrorOp that is used as an AIO callback.  */
     op = g_new(MirrorOp, 1);
     op->s = s;
-    op->sector_num = sector_num;
-    op->nb_sectors = nb_sectors;
+    op->offset = sector_num * BDRV_SECTOR_SIZE;
+    op->bytes = nb_sectors * BDRV_SECTOR_SIZE;
 
     /* Now make a QEMUIOVector taking enough granularity-sized chunks
      * from s->buf_free.
@@ -295,7 +295,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
 
     /* Copy the dirty cluster.  */
     s->in_flight++;
-    s->sectors_in_flight += nb_sectors;
+    s->bytes_in_flight += nb_sectors * BDRV_SECTOR_SIZE;
     trace_mirror_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
                                nb_sectors * BDRV_SECTOR_SIZE);
 
@@ -315,19 +315,17 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
      * so the freeing in mirror_iteration_done is nop. */
     op = g_new0(MirrorOp, 1);
     op->s = s;
-    op->sector_num = sector_num;
-    op->nb_sectors = nb_sectors;
+    op->offset = sector_num * BDRV_SECTOR_SIZE;
+    op->bytes = nb_sectors * BDRV_SECTOR_SIZE;
 
     s->in_flight++;
-    s->sectors_in_flight += nb_sectors;
+    s->bytes_in_flight += nb_sectors * BDRV_SECTOR_SIZE;
     if (is_discard) {
         blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
-                         op->nb_sectors << BDRV_SECTOR_BITS,
-                         mirror_write_complete, op);
+                         op->bytes, mirror_write_complete, op);
     } else {
         blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
-                              op->nb_sectors * BDRV_SECTOR_SIZE,
-                              s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
+                              op->bytes, s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                               mirror_write_complete, op);
     }
 }
@@ -342,8 +340,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
     int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
     bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
-    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
-                             MAX_IO_SECTORS);
+    int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
 
     bdrv_dirty_bitmap_lock(s->dirty_bitmap);
     sector_num = bdrv_dirty_iter_next(s->dbi);
@@ -415,9 +412,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
                                           nb_chunks * sectors_per_chunk,
                                           &io_sectors, &file);
         if (ret < 0) {
-            io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
+            io_sectors = MIN(nb_chunks * sectors_per_chunk,
+                             max_io_bytes >> BDRV_SECTOR_BITS);
         } else if (ret & BDRV_BLOCK_DATA) {
-            io_sectors = MIN(io_sectors, max_io_sectors);
+            io_sectors = MIN(io_sectors, max_io_bytes >> BDRV_SECTOR_BITS);
         }
 
         io_sectors -= io_sectors % sectors_per_chunk;
@@ -719,7 +717,6 @@ static void coroutine_fn mirror_run(void *opaque)
     char backing_filename[2]; /* we only need 2 characters because we are only
                                  checking for a NULL string */
     int ret = 0;
-    int target_cluster_size = BDRV_SECTOR_SIZE;
 
     if (block_job_is_cancelled(&s->common)) {
         goto immediate_exit;
@@ -771,14 +768,15 @@ static void coroutine_fn mirror_run(void *opaque)
     bdrv_get_backing_filename(target_bs, backing_filename,
                               sizeof(backing_filename));
     if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
-        target_cluster_size = bdi.cluster_size;
+        s->target_cluster_size = bdi.cluster_size;
+    } else {
+        s->target_cluster_size = BDRV_SECTOR_SIZE;
     }
-    if (backing_filename[0] && !target_bs->backing
-        && s->granularity < target_cluster_size) {
-        s->buf_size = MAX(s->buf_size, target_cluster_size);
+    if (backing_filename[0] && !target_bs->backing &&
+        s->granularity < s->target_cluster_size) {
+        s->buf_size = MAX(s->buf_size, s->target_cluster_size);
         s->cow_bitmap = bitmap_new(length);
     }
-    s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
     s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
 
     s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -814,10 +812,10 @@ static void coroutine_fn mirror_run(void *opaque)
         cnt = bdrv_get_dirty_count(s->dirty_bitmap);
         /* s->common.offset contains the number of bytes already processed so
          * far, cnt is the number of dirty sectors remaining and
-         * s->sectors_in_flight is the number of sectors currently being
+         * s->bytes_in_flight is the number of bytes currently being
          * processed; together those are the current total operation length */
-        s->common.len = s->common.offset +
-                        (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
+        s->common.len = s->common.offset + s->bytes_in_flight +
+            cnt * BDRV_SECTOR_SIZE;
 
         /* Note that even when no rate limit is applied we need to yield
          * periodically with no pending I/O so that bdrv_drain_all() returns.
@@ -1150,6 +1148,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
     }
 
     assert ((granularity & (granularity - 1)) == 0);
+    /* Granularity must be large enough for sector-based dirty bitmap */
+    assert(granularity >= BDRV_SECTOR_SIZE);
 
     if (buf_size < 0) {
         error_setg(errp, "Invalid parameter 'buf-size'");

From e6f2419389a841260e0323ae72751ab489ec6dcc Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:47 -0500
Subject: [PATCH 28/40] mirror: Switch mirror_do_zero_or_discard() to
 byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/mirror.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 10f4e9b494..60eefbd98b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -305,8 +305,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
 }
 
 static void mirror_do_zero_or_discard(MirrorBlockJob *s,
-                                      int64_t sector_num,
-                                      int nb_sectors,
+                                      int64_t offset,
+                                      uint64_t bytes,
                                       bool is_discard)
 {
     MirrorOp *op;
@@ -315,16 +315,16 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
      * so the freeing in mirror_iteration_done is nop. */
     op = g_new0(MirrorOp, 1);
     op->s = s;
-    op->offset = sector_num * BDRV_SECTOR_SIZE;
-    op->bytes = nb_sectors * BDRV_SECTOR_SIZE;
+    op->offset = offset;
+    op->bytes = bytes;
 
     s->in_flight++;
-    s->bytes_in_flight += nb_sectors * BDRV_SECTOR_SIZE;
+    s->bytes_in_flight += bytes;
     if (is_discard) {
-        blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
+        blk_aio_pdiscard(s->target, offset,
                          op->bytes, mirror_write_complete, op);
     } else {
-        blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
+        blk_aio_pwrite_zeroes(s->target, offset,
                               op->bytes, s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                               mirror_write_complete, op);
     }
@@ -453,7 +453,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
             break;
         case MIRROR_METHOD_ZERO:
         case MIRROR_METHOD_DISCARD:
-            mirror_do_zero_or_discard(s, sector_num, io_sectors,
+            mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE,
+                                      io_sectors * BDRV_SECTOR_SIZE,
                                       mirror_method == MIRROR_METHOD_DISCARD);
             if (write_zeroes_ok) {
                 io_bytes_acct = 0;
@@ -657,7 +658,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
                 continue;
             }
 
-            mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
+            mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE,
+                                      nb_sectors * BDRV_SECTOR_SIZE, false);
             sector_num += nb_sectors;
         }
 

From 931e52607fbecb40543b298d7ee8740d0e4af3ef Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:48 -0500
Subject: [PATCH 29/40] mirror: Update signature of mirror_clip_sectors()

Rather than having a void function that modifies its input
in-place as the output, change the signature to reduce a layer
of indirection and return the result.

Suggested-by: John Snow <jsnow@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/mirror.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 60eefbd98b..a41ef25eee 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -176,12 +176,12 @@ static void mirror_read_complete(void *opaque, int ret)
     aio_context_release(blk_get_aio_context(s->common.blk));
 }
 
-static inline void mirror_clip_sectors(MirrorBlockJob *s,
-                                       int64_t sector_num,
-                                       int *nb_sectors)
+static inline int mirror_clip_sectors(MirrorBlockJob *s,
+                                      int64_t sector_num,
+                                      int nb_sectors)
 {
-    *nb_sectors = MIN(*nb_sectors,
-                      s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
+    return MIN(nb_sectors,
+               s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
 }
 
 /* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
@@ -216,7 +216,8 @@ static int mirror_cow_align(MirrorBlockJob *s,
     }
     /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
      * that doesn't matter because it's already the end of source image. */
-    mirror_clip_sectors(s, align_sector_num, &align_nb_sectors);
+    align_nb_sectors = mirror_clip_sectors(s, align_sector_num,
+                                           align_nb_sectors);
 
     ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
     *sector_num = align_sector_num;
@@ -445,7 +446,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
             return 0;
         }
 
-        mirror_clip_sectors(s, sector_num, &io_sectors);
+        io_sectors = mirror_clip_sectors(s, sector_num, io_sectors);
         switch (mirror_method) {
         case MIRROR_METHOD_COPY:
             io_sectors = mirror_do_read(s, sector_num, io_sectors);

From 782d97efec66d743f87f28f1d040cdfacc380b1e Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:49 -0500
Subject: [PATCH 30/40] mirror: Switch mirror_cow_align() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change), and add mirror_clip_bytes() as a
counterpart to mirror_clip_sectors().  Some of the conversion is
a bit tricky, requiring temporaries to convert between units; it
will be cleared up in a following patch.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/mirror.c | 63 +++++++++++++++++++++++++++++---------------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index a41ef25eee..0378bd2977 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -176,6 +176,15 @@ static void mirror_read_complete(void *opaque, int ret)
     aio_context_release(blk_get_aio_context(s->common.blk));
 }
 
+/* Clip bytes relative to offset to not exceed end-of-file */
+static inline int64_t mirror_clip_bytes(MirrorBlockJob *s,
+                                        int64_t offset,
+                                        int64_t bytes)
+{
+    return MIN(bytes, s->bdev_length - offset);
+}
+
+/* Clip nb_sectors relative to sector_num to not exceed end-of-file */
 static inline int mirror_clip_sectors(MirrorBlockJob *s,
                                       int64_t sector_num,
                                       int nb_sectors)
@@ -184,44 +193,38 @@ static inline int mirror_clip_sectors(MirrorBlockJob *s,
                s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
 }
 
-/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
- * return the offset of the adjusted tail sector against original. */
-static int mirror_cow_align(MirrorBlockJob *s,
-                            int64_t *sector_num,
-                            int *nb_sectors)
+/* Round offset and/or bytes to target cluster if COW is needed, and
+ * return the offset of the adjusted tail against original. */
+static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
+                            unsigned int *bytes)
 {
     bool need_cow;
     int ret = 0;
-    int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
-    int64_t align_sector_num = *sector_num;
-    int align_nb_sectors = *nb_sectors;
-    int max_sectors = chunk_sectors * s->max_iov;
+    int64_t align_offset = *offset;
+    unsigned int align_bytes = *bytes;
+    int max_bytes = s->granularity * s->max_iov;
 
-    need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
-    need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
+    need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
+    need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
                           s->cow_bitmap);
     if (need_cow) {
-        bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
-                                       *nb_sectors, &align_sector_num,
-                                       &align_nb_sectors);
+        bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes,
+                               &align_offset, &align_bytes);
     }
 
-    if (align_nb_sectors > max_sectors) {
-        align_nb_sectors = max_sectors;
+    if (align_bytes > max_bytes) {
+        align_bytes = max_bytes;
         if (need_cow) {
-            align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
-                                               s->target_cluster_size >>
-                                               BDRV_SECTOR_BITS);
+            align_bytes = QEMU_ALIGN_DOWN(align_bytes, s->target_cluster_size);
         }
     }
-    /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
+    /* Clipping may result in align_bytes unaligned to chunk boundary, but
      * that doesn't matter because it's already the end of source image. */
-    align_nb_sectors = mirror_clip_sectors(s, align_sector_num,
-                                           align_nb_sectors);
+    align_bytes = mirror_clip_bytes(s, align_offset, align_bytes);
 
-    ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
-    *sector_num = align_sector_num;
-    *nb_sectors = align_nb_sectors;
+    ret = align_offset + align_bytes - (*offset + *bytes);
+    *offset = align_offset;
+    *bytes = align_bytes;
     assert(ret >= 0);
     return ret;
 }
@@ -257,10 +260,18 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
     nb_sectors = MIN(max_sectors, nb_sectors);
     assert(nb_sectors);
+    assert(nb_sectors < BDRV_REQUEST_MAX_SECTORS);
     ret = nb_sectors;
 
     if (s->cow_bitmap) {
-        ret += mirror_cow_align(s, &sector_num, &nb_sectors);
+        int64_t offset = sector_num * BDRV_SECTOR_SIZE;
+        unsigned int bytes = nb_sectors * BDRV_SECTOR_SIZE;
+        int gap;
+
+        gap = mirror_cow_align(s, &offset, &bytes);
+        sector_num = offset / BDRV_SECTOR_SIZE;
+        nb_sectors = bytes / BDRV_SECTOR_SIZE;
+        ret += gap / BDRV_SECTOR_SIZE;
     }
     assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
     /* The sector range must meet granularity because:

From ae4cc8777b456ab87ca1f02b98b006ec0c96335e Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:50 -0500
Subject: [PATCH 31/40] mirror: Switch mirror_do_read() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function, preserving all existing semantics, and adding one more
assertion that things are still sector-aligned (so that conversions
to sectors in mirror_read_complete don't need to round).

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/mirror.c | 74 ++++++++++++++++++++++----------------------------
 1 file changed, 33 insertions(+), 41 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 0378bd2977..262fddf2ce 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -196,7 +196,7 @@ static inline int mirror_clip_sectors(MirrorBlockJob *s,
 /* Round offset and/or bytes to target cluster if COW is needed, and
  * return the offset of the adjusted tail against original. */
 static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
-                            unsigned int *bytes)
+                            uint64_t *bytes)
 {
     bool need_cow;
     int ret = 0;
@@ -204,6 +204,7 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
     unsigned int align_bytes = *bytes;
     int max_bytes = s->granularity * s->max_iov;
 
+    assert(*bytes < INT_MAX);
     need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
     need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
                           s->cow_bitmap);
@@ -238,59 +239,51 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
 }
 
 /* Submit async read while handling COW.
- * Returns: The number of sectors copied after and including sector_num,
- *          excluding any sectors copied prior to sector_num due to alignment.
- *          This will be nb_sectors if no alignment is necessary, or
- *          (new_end - sector_num) if tail is rounded up or down due to
+ * Returns: The number of bytes copied after and including offset,
+ *          excluding any bytes copied prior to offset due to alignment.
+ *          This will be @bytes if no alignment is necessary, or
+ *          (new_end - offset) if tail is rounded up or down due to
  *          alignment or buffer limit.
  */
-static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
-                          int nb_sectors)
+static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
+                               uint64_t bytes)
 {
     BlockBackend *source = s->common.blk;
-    int sectors_per_chunk, nb_chunks;
-    int ret;
+    int nb_chunks;
+    uint64_t ret;
     MirrorOp *op;
-    int max_sectors;
+    uint64_t max_bytes;
 
-    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    max_sectors = sectors_per_chunk * s->max_iov;
+    max_bytes = s->granularity * s->max_iov;
 
     /* We can only handle as much as buf_size at a time. */
-    nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
-    nb_sectors = MIN(max_sectors, nb_sectors);
-    assert(nb_sectors);
-    assert(nb_sectors < BDRV_REQUEST_MAX_SECTORS);
-    ret = nb_sectors;
+    bytes = MIN(s->buf_size, MIN(max_bytes, bytes));
+    assert(bytes);
+    assert(bytes < BDRV_REQUEST_MAX_BYTES);
+    ret = bytes;
 
     if (s->cow_bitmap) {
-        int64_t offset = sector_num * BDRV_SECTOR_SIZE;
-        unsigned int bytes = nb_sectors * BDRV_SECTOR_SIZE;
-        int gap;
-
-        gap = mirror_cow_align(s, &offset, &bytes);
-        sector_num = offset / BDRV_SECTOR_SIZE;
-        nb_sectors = bytes / BDRV_SECTOR_SIZE;
-        ret += gap / BDRV_SECTOR_SIZE;
+        ret += mirror_cow_align(s, &offset, &bytes);
     }
-    assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
-    /* The sector range must meet granularity because:
+    assert(bytes <= s->buf_size);
+    /* The offset is granularity-aligned because:
      * 1) Caller passes in aligned values;
      * 2) mirror_cow_align is used only when target cluster is larger. */
-    assert(!(sector_num % sectors_per_chunk));
-    nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
+    assert(QEMU_IS_ALIGNED(offset, s->granularity));
+    /* The range is sector-aligned, since bdrv_getlength() rounds up. */
+    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
+    nb_chunks = DIV_ROUND_UP(bytes, s->granularity);
 
     while (s->buf_free_count < nb_chunks) {
-        trace_mirror_yield_in_flight(s, sector_num * BDRV_SECTOR_SIZE,
-                                     s->in_flight);
+        trace_mirror_yield_in_flight(s, offset, s->in_flight);
         mirror_wait_for_io(s);
     }
 
     /* Allocate a MirrorOp that is used as an AIO callback.  */
     op = g_new(MirrorOp, 1);
     op->s = s;
-    op->offset = sector_num * BDRV_SECTOR_SIZE;
-    op->bytes = nb_sectors * BDRV_SECTOR_SIZE;
+    op->offset = offset;
+    op->bytes = bytes;
 
     /* Now make a QEMUIOVector taking enough granularity-sized chunks
      * from s->buf_free.
@@ -298,7 +291,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     qemu_iovec_init(&op->qiov, nb_chunks);
     while (nb_chunks-- > 0) {
         MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
-        size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
+        size_t remaining = bytes - op->qiov.size;
 
         QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
         s->buf_free_count--;
@@ -307,12 +300,10 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
 
     /* Copy the dirty cluster.  */
     s->in_flight++;
-    s->bytes_in_flight += nb_sectors * BDRV_SECTOR_SIZE;
-    trace_mirror_one_iteration(s, sector_num * BDRV_SECTOR_SIZE,
-                               nb_sectors * BDRV_SECTOR_SIZE);
+    s->bytes_in_flight += bytes;
+    trace_mirror_one_iteration(s, offset, bytes);
 
-    blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0,
-                   mirror_read_complete, op);
+    blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op);
     return ret;
 }
 
@@ -460,8 +451,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         io_sectors = mirror_clip_sectors(s, sector_num, io_sectors);
         switch (mirror_method) {
         case MIRROR_METHOD_COPY:
-            io_sectors = mirror_do_read(s, sector_num, io_sectors);
-            io_bytes_acct = io_sectors * BDRV_SECTOR_SIZE;
+            io_bytes_acct = mirror_do_read(s, sector_num * BDRV_SECTOR_SIZE,
+                                           io_sectors * BDRV_SECTOR_SIZE);
+            io_sectors = io_bytes_acct / BDRV_SECTOR_SIZE;
             break;
         case MIRROR_METHOD_ZERO:
         case MIRROR_METHOD_DISCARD:

From fb2ef7919bd7b125a2ff6cb70689a7ab93b9d05a Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:51 -0500
Subject: [PATCH 32/40] mirror: Switch mirror_iteration() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of mirroring to track by bytes instead of sectors
(although we are still guaranteed that we iterate by steps that
are both sector-aligned and multiples of the granularity).  Drop
the now-unused mirror_clip_sectors().

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/mirror.c | 105 ++++++++++++++++++++++---------------------------
 1 file changed, 46 insertions(+), 59 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 262fddf2ce..b33f4bb1d6 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -184,15 +184,6 @@ static inline int64_t mirror_clip_bytes(MirrorBlockJob *s,
     return MIN(bytes, s->bdev_length - offset);
 }
 
-/* Clip nb_sectors relative to sector_num to not exceed end-of-file */
-static inline int mirror_clip_sectors(MirrorBlockJob *s,
-                                      int64_t sector_num,
-                                      int nb_sectors)
-{
-    return MIN(nb_sectors,
-               s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
-}
-
 /* Round offset and/or bytes to target cluster if COW is needed, and
  * return the offset of the adjusted tail against original. */
 static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
@@ -336,30 +327,28 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
     BlockDriverState *source = s->source;
-    int64_t sector_num, first_chunk;
+    int64_t offset, first_chunk;
     uint64_t delay_ns = 0;
     /* At least the first dirty chunk is mirrored in one iteration. */
     int nb_chunks = 1;
-    int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
     int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
     bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
     int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
 
     bdrv_dirty_bitmap_lock(s->dirty_bitmap);
-    sector_num = bdrv_dirty_iter_next(s->dbi);
-    if (sector_num < 0) {
+    offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+    if (offset < 0) {
         bdrv_set_dirty_iter(s->dbi, 0);
-        sector_num = bdrv_dirty_iter_next(s->dbi);
+        offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
         trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) *
                                   BDRV_SECTOR_SIZE);
-        assert(sector_num >= 0);
+        assert(offset >= 0);
     }
     bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
 
-    first_chunk = sector_num / sectors_per_chunk;
+    first_chunk = offset / s->granularity;
     while (test_bit(first_chunk, s->in_flight_bitmap)) {
-        trace_mirror_yield_in_flight(s, sector_num * BDRV_SECTOR_SIZE,
-                                     s->in_flight);
+        trace_mirror_yield_in_flight(s, offset, s->in_flight);
         mirror_wait_for_io(s);
     }
 
@@ -368,25 +357,26 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     /* Find the number of consective dirty chunks following the first dirty
      * one, and wait for in flight requests in them. */
     bdrv_dirty_bitmap_lock(s->dirty_bitmap);
-    while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
+    while (nb_chunks * s->granularity < s->buf_size) {
         int64_t next_dirty;
-        int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
-        int64_t next_chunk = next_sector / sectors_per_chunk;
-        if (next_sector >= end ||
-            !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_sector)) {
+        int64_t next_offset = offset + nb_chunks * s->granularity;
+        int64_t next_chunk = next_offset / s->granularity;
+        if (next_offset >= s->bdev_length ||
+            !bdrv_get_dirty_locked(source, s->dirty_bitmap,
+                                   next_offset >> BDRV_SECTOR_BITS)) {
             break;
         }
         if (test_bit(next_chunk, s->in_flight_bitmap)) {
             break;
         }
 
-        next_dirty = bdrv_dirty_iter_next(s->dbi);
-        if (next_dirty > next_sector || next_dirty < 0) {
+        next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+        if (next_dirty > next_offset || next_dirty < 0) {
             /* The bitmap iterator's cache is stale, refresh it */
-            bdrv_set_dirty_iter(s->dbi, next_sector);
-            next_dirty = bdrv_dirty_iter_next(s->dbi);
+            bdrv_set_dirty_iter(s->dbi, next_offset >> BDRV_SECTOR_BITS);
+            next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
         }
-        assert(next_dirty == next_sector);
+        assert(next_dirty == next_offset);
         nb_chunks++;
     }
 
@@ -394,14 +384,15 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
      * calling bdrv_get_block_status_above could yield - if some blocks are
      * marked dirty in this window, we need to know.
      */
-    bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, sector_num,
-                                  nb_chunks * sectors_per_chunk);
+    bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset >> BDRV_SECTOR_BITS,
+                                   nb_chunks * sectors_per_chunk);
     bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
 
-    bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
-    while (nb_chunks > 0 && sector_num < end) {
+    bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
+    while (nb_chunks > 0 && offset < s->bdev_length) {
         int64_t ret;
         int io_sectors;
+        unsigned int io_bytes;
         int64_t io_bytes_acct;
         BlockDriverState *file;
         enum MirrorMethod {
@@ -410,28 +401,28 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
             MIRROR_METHOD_DISCARD
         } mirror_method = MIRROR_METHOD_COPY;
 
-        assert(!(sector_num % sectors_per_chunk));
-        ret = bdrv_get_block_status_above(source, NULL, sector_num,
+        assert(!(offset % s->granularity));
+        ret = bdrv_get_block_status_above(source, NULL,
+                                          offset >> BDRV_SECTOR_BITS,
                                           nb_chunks * sectors_per_chunk,
                                           &io_sectors, &file);
+        io_bytes = io_sectors * BDRV_SECTOR_SIZE;
         if (ret < 0) {
-            io_sectors = MIN(nb_chunks * sectors_per_chunk,
-                             max_io_bytes >> BDRV_SECTOR_BITS);
+            io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
         } else if (ret & BDRV_BLOCK_DATA) {
-            io_sectors = MIN(io_sectors, max_io_bytes >> BDRV_SECTOR_BITS);
+            io_bytes = MIN(io_bytes, max_io_bytes);
         }
 
-        io_sectors -= io_sectors % sectors_per_chunk;
-        if (io_sectors < sectors_per_chunk) {
-            io_sectors = sectors_per_chunk;
+        io_bytes -= io_bytes % s->granularity;
+        if (io_bytes < s->granularity) {
+            io_bytes = s->granularity;
         } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
-            int64_t target_sector_num;
-            int target_nb_sectors;
-            bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
-                                           io_sectors,  &target_sector_num,
-                                           &target_nb_sectors);
-            if (target_sector_num == sector_num &&
-                target_nb_sectors == io_sectors) {
+            int64_t target_offset;
+            unsigned int target_bytes;
+            bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
+                                   &target_offset, &target_bytes);
+            if (target_offset == offset &&
+                target_bytes == io_bytes) {
                 mirror_method = ret & BDRV_BLOCK_ZERO ?
                                     MIRROR_METHOD_ZERO :
                                     MIRROR_METHOD_DISCARD;
@@ -439,8 +430,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         }
 
         while (s->in_flight >= MAX_IN_FLIGHT) {
-            trace_mirror_yield_in_flight(s, sector_num * BDRV_SECTOR_SIZE,
-                                         s->in_flight);
+            trace_mirror_yield_in_flight(s, offset, s->in_flight);
             mirror_wait_for_io(s);
         }
 
@@ -448,30 +438,27 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
             return 0;
         }
 
-        io_sectors = mirror_clip_sectors(s, sector_num, io_sectors);
+        io_bytes = mirror_clip_bytes(s, offset, io_bytes);
         switch (mirror_method) {
         case MIRROR_METHOD_COPY:
-            io_bytes_acct = mirror_do_read(s, sector_num * BDRV_SECTOR_SIZE,
-                                           io_sectors * BDRV_SECTOR_SIZE);
-            io_sectors = io_bytes_acct / BDRV_SECTOR_SIZE;
+            io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes);
             break;
         case MIRROR_METHOD_ZERO:
         case MIRROR_METHOD_DISCARD:
-            mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE,
-                                      io_sectors * BDRV_SECTOR_SIZE,
+            mirror_do_zero_or_discard(s, offset, io_bytes,
                                       mirror_method == MIRROR_METHOD_DISCARD);
             if (write_zeroes_ok) {
                 io_bytes_acct = 0;
             } else {
-                io_bytes_acct = io_sectors * BDRV_SECTOR_SIZE;
+                io_bytes_acct = io_bytes;
             }
             break;
         default:
             abort();
         }
-        assert(io_sectors);
-        sector_num += io_sectors;
-        nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
+        assert(io_bytes);
+        offset += io_bytes;
+        nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
         if (s->common.speed) {
             delay_ns = ratelimit_calculate_delay(&s->limit, io_bytes_acct);
         }

From e8a81e9cadd0ec0e2cf3f564ae73809b0d8780da Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:52 -0500
Subject: [PATCH 33/40] block: Drop unused bdrv_round_sectors_to_clusters()

Now that the last user [mirror_iteration()] has converted to using
bytes, we no longer need a function to round sectors to clusters.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/io.c            | 21 ---------------------
 include/block/block.h |  4 ----
 2 files changed, 25 deletions(-)

diff --git a/block/io.c b/block/io.c
index 5c146b5a10..a0a36dfaa8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -418,27 +418,6 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
     req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
 }
 
-/**
- * Round a region to cluster boundaries (sector-based)
- */
-void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
-                                    int64_t sector_num, int nb_sectors,
-                                    int64_t *cluster_sector_num,
-                                    int *cluster_nb_sectors)
-{
-    BlockDriverInfo bdi;
-
-    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
-        *cluster_sector_num = sector_num;
-        *cluster_nb_sectors = nb_sectors;
-    } else {
-        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
-        *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
-        *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
-                                            nb_sectors, c);
-    }
-}
-
 /**
  * Round a region to cluster boundaries
  */
diff --git a/include/block/block.h b/include/block/block.h
index afe1b61524..121f42becf 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -475,10 +475,6 @@ const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
 int bdrv_get_flags(BlockDriverState *bs);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
-void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
-                                    int64_t sector_num, int nb_sectors,
-                                    int64_t *cluster_sector_num,
-                                    int *cluster_nb_sectors);
 void bdrv_round_to_clusters(BlockDriverState *bs,
                             int64_t offset, unsigned int bytes,
                             int64_t *cluster_offset,

From cf79cdf662aad365b53fc955f45fd47d9883c8df Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:53 -0500
Subject: [PATCH 34/40] backup: Switch BackupBlockJob to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Continue by converting an
internal structure (no semantic change), and all references to
tracking progress.  Drop a redundant local variable bytes_per_cluster.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/backup.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 06431ac79e..4e64710b39 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -39,7 +39,7 @@ typedef struct BackupBlockJob {
     BlockdevOnError on_source_error;
     BlockdevOnError on_target_error;
     CoRwlock flush_rwlock;
-    uint64_t sectors_read;
+    uint64_t bytes_read;
     unsigned long *done_bitmap;
     int64_t cluster_size;
     bool compress;
@@ -102,16 +102,15 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
     void *bounce_buffer = NULL;
     int ret = 0;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
-    int64_t bytes_per_cluster = sectors_per_cluster * BDRV_SECTOR_SIZE;
-    int64_t start, end;
-    int n;
+    int64_t start, end; /* clusters */
+    int n; /* bytes */
 
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
 
     start = sector_num / sectors_per_cluster;
     end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
 
-    trace_backup_do_cow_enter(job, start * bytes_per_cluster,
+    trace_backup_do_cow_enter(job, start * job->cluster_size,
                               sector_num * BDRV_SECTOR_SIZE,
                               nb_sectors * BDRV_SECTOR_SIZE);
 
@@ -120,28 +119,27 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
 
     for (; start < end; start++) {
         if (test_bit(start, job->done_bitmap)) {
-            trace_backup_do_cow_skip(job, start * bytes_per_cluster);
+            trace_backup_do_cow_skip(job, start * job->cluster_size);
             continue; /* already copied */
         }
 
-        trace_backup_do_cow_process(job, start * bytes_per_cluster);
+        trace_backup_do_cow_process(job, start * job->cluster_size);
 
-        n = MIN(sectors_per_cluster,
-                job->common.len / BDRV_SECTOR_SIZE -
-                start * sectors_per_cluster);
+        n = MIN(job->cluster_size,
+                job->common.len - start * job->cluster_size);
 
         if (!bounce_buffer) {
             bounce_buffer = blk_blockalign(blk, job->cluster_size);
         }
         iov.iov_base = bounce_buffer;
-        iov.iov_len = n * BDRV_SECTOR_SIZE;
+        iov.iov_len = n;
         qemu_iovec_init_external(&bounce_qiov, &iov, 1);
 
         ret = blk_co_preadv(blk, start * job->cluster_size,
                             bounce_qiov.size, &bounce_qiov,
                             is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
         if (ret < 0) {
-            trace_backup_do_cow_read_fail(job, start * bytes_per_cluster, ret);
+            trace_backup_do_cow_read_fail(job, start * job->cluster_size, ret);
             if (error_is_read) {
                 *error_is_read = true;
             }
@@ -157,7 +155,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                                  job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
         }
         if (ret < 0) {
-            trace_backup_do_cow_write_fail(job, start * bytes_per_cluster, ret);
+            trace_backup_do_cow_write_fail(job, start * job->cluster_size, ret);
             if (error_is_read) {
                 *error_is_read = false;
             }
@@ -169,8 +167,8 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
         /* Publish progress, guest I/O counts as progress too.  Note that the
          * offset field is an opaque progress value, it is not a disk offset.
          */
-        job->sectors_read += n;
-        job->common.offset += n * BDRV_SECTOR_SIZE;
+        job->bytes_read += n;
+        job->common.offset += n;
     }
 
 out:
@@ -363,9 +361,8 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
      */
     if (job->common.speed) {
         uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
-                                                      job->sectors_read *
-                                                      BDRV_SECTOR_SIZE);
-        job->sectors_read = 0;
+                                                      job->bytes_read);
+        job->bytes_read = 0;
         block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
     } else {
         block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);

From f6ac207893a661a6c32819c43a2ab2789b40d12f Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:54 -0500
Subject: [PATCH 35/40] backup: Switch block_backup.h to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Continue by converting
the public interface to backup jobs (no semantic change), including
a change to CowRequest to track by bytes instead of cluster indices.

Note that this does not change the difference between the public
interface (starting point, and size of the subsequent range) and
the internal interface (starting and end points).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Xie Changlong <xiechanglong@cmss.chinamobile.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/backup.c               | 31 +++++++++++++++----------------
 block/replication.c          | 12 ++++++++----
 include/block/block_backup.h | 11 +++++------
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 4e64710b39..503fbec8c1 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -64,7 +64,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
     do {
         retry = false;
         QLIST_FOREACH(req, &job->inflight_reqs, list) {
-            if (end > req->start && start < req->end) {
+            if (end > req->start_byte && start < req->end_byte) {
                 qemu_co_queue_wait(&req->wait_queue, NULL);
                 retry = true;
                 break;
@@ -75,10 +75,10 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
 
 /* Keep track of an in-flight request */
 static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
-                                     int64_t start, int64_t end)
+                              int64_t start, int64_t end)
 {
-    req->start = start;
-    req->end = end;
+    req->start_byte = start;
+    req->end_byte = end;
     qemu_co_queue_init(&req->wait_queue);
     QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
 }
@@ -114,8 +114,10 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                               sector_num * BDRV_SECTOR_SIZE,
                               nb_sectors * BDRV_SECTOR_SIZE);
 
-    wait_for_overlapping_requests(job, start, end);
-    cow_request_begin(&cow_request, job, start, end);
+    wait_for_overlapping_requests(job, start * job->cluster_size,
+                                  end * job->cluster_size);
+    cow_request_begin(&cow_request, job, start * job->cluster_size,
+                      end * job->cluster_size);
 
     for (; start < end; start++) {
         if (test_bit(start, job->done_bitmap)) {
@@ -277,32 +279,29 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
     bitmap_zero(backup_job->done_bitmap, len);
 }
 
-void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
-                                          int nb_sectors)
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
+                                          uint64_t bytes)
 {
     BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
-    int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
     int64_t start, end;
 
     assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
 
-    start = sector_num / sectors_per_cluster;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
+    end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
     wait_for_overlapping_requests(backup_job, start, end);
 }
 
 void backup_cow_request_begin(CowRequest *req, BlockJob *job,
-                              int64_t sector_num,
-                              int nb_sectors)
+                              int64_t offset, uint64_t bytes)
 {
     BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
-    int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
     int64_t start, end;
 
     assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
 
-    start = sector_num / sectors_per_cluster;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
+    end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
     cow_request_begin(req, backup_job, start, end);
 }
 
diff --git a/block/replication.c b/block/replication.c
index 3885f04c31..8f3aba7a20 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -234,10 +234,14 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs,
     }
 
     if (job) {
-        backup_wait_for_overlapping_requests(child->bs->job, sector_num,
-                                             remaining_sectors);
-        backup_cow_request_begin(&req, child->bs->job, sector_num,
-                                 remaining_sectors);
+        uint64_t remaining_bytes = remaining_sectors * BDRV_SECTOR_SIZE;
+
+        backup_wait_for_overlapping_requests(child->bs->job,
+                                             sector_num * BDRV_SECTOR_SIZE,
+                                             remaining_bytes);
+        backup_cow_request_begin(&req, child->bs->job,
+                                 sector_num * BDRV_SECTOR_SIZE,
+                                 remaining_bytes);
         ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors,
                             qiov);
         backup_cow_request_end(&req);
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
index 8a759477a3..994a3bd2ec 100644
--- a/include/block/block_backup.h
+++ b/include/block/block_backup.h
@@ -21,17 +21,16 @@
 #include "block/block_int.h"
 
 typedef struct CowRequest {
-    int64_t start;
-    int64_t end;
+    int64_t start_byte;
+    int64_t end_byte;
     QLIST_ENTRY(CowRequest) list;
     CoQueue wait_queue; /* coroutines blocked on this request */
 } CowRequest;
 
-void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
-                                          int nb_sectors);
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
+                                          uint64_t bytes);
 void backup_cow_request_begin(CowRequest *req, BlockJob *job,
-                              int64_t sector_num,
-                              int nb_sectors);
+                              int64_t offset, uint64_t bytes);
 void backup_cow_request_end(CowRequest *req);
 
 void backup_do_checkpoint(BlockJob *job, Error **errp);

From 03f5d60bbf264ea68e50db2a7b2383a8da5122d2 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:55 -0500
Subject: [PATCH 36/40] backup: Switch backup_do_cow() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Convert another internal
function (no semantic change).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/backup.c | 62 +++++++++++++++++++++-----------------------------
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 503fbec8c1..f7e14dbe73 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -91,7 +91,7 @@ static void cow_request_end(CowRequest *req)
 }
 
 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
-                                      int64_t sector_num, int nb_sectors,
+                                      int64_t offset, uint64_t bytes,
                                       bool *error_is_read,
                                       bool is_write_notifier)
 {
@@ -101,34 +101,28 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
     QEMUIOVector bounce_qiov;
     void *bounce_buffer = NULL;
     int ret = 0;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
-    int64_t start, end; /* clusters */
+    int64_t start, end; /* bytes */
     int n; /* bytes */
 
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
 
-    start = sector_num / sectors_per_cluster;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
+    end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
 
-    trace_backup_do_cow_enter(job, start * job->cluster_size,
-                              sector_num * BDRV_SECTOR_SIZE,
-                              nb_sectors * BDRV_SECTOR_SIZE);
+    trace_backup_do_cow_enter(job, start, offset, bytes);
 
-    wait_for_overlapping_requests(job, start * job->cluster_size,
-                                  end * job->cluster_size);
-    cow_request_begin(&cow_request, job, start * job->cluster_size,
-                      end * job->cluster_size);
+    wait_for_overlapping_requests(job, start, end);
+    cow_request_begin(&cow_request, job, start, end);
 
-    for (; start < end; start++) {
-        if (test_bit(start, job->done_bitmap)) {
-            trace_backup_do_cow_skip(job, start * job->cluster_size);
+    for (; start < end; start += job->cluster_size) {
+        if (test_bit(start / job->cluster_size, job->done_bitmap)) {
+            trace_backup_do_cow_skip(job, start);
             continue; /* already copied */
         }
 
-        trace_backup_do_cow_process(job, start * job->cluster_size);
+        trace_backup_do_cow_process(job, start);
 
-        n = MIN(job->cluster_size,
-                job->common.len - start * job->cluster_size);
+        n = MIN(job->cluster_size, job->common.len - start);
 
         if (!bounce_buffer) {
             bounce_buffer = blk_blockalign(blk, job->cluster_size);
@@ -137,11 +131,10 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
         iov.iov_len = n;
         qemu_iovec_init_external(&bounce_qiov, &iov, 1);
 
-        ret = blk_co_preadv(blk, start * job->cluster_size,
-                            bounce_qiov.size, &bounce_qiov,
+        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
                             is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
         if (ret < 0) {
-            trace_backup_do_cow_read_fail(job, start * job->cluster_size, ret);
+            trace_backup_do_cow_read_fail(job, start, ret);
             if (error_is_read) {
                 *error_is_read = true;
             }
@@ -149,22 +142,22 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
         }
 
         if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
+            ret = blk_co_pwrite_zeroes(job->target, start,
                                        bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
         } else {
-            ret = blk_co_pwritev(job->target, start * job->cluster_size,
+            ret = blk_co_pwritev(job->target, start,
                                  bounce_qiov.size, &bounce_qiov,
                                  job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
         }
         if (ret < 0) {
-            trace_backup_do_cow_write_fail(job, start * job->cluster_size, ret);
+            trace_backup_do_cow_write_fail(job, start, ret);
             if (error_is_read) {
                 *error_is_read = false;
             }
             goto out;
         }
 
-        set_bit(start, job->done_bitmap);
+        set_bit(start / job->cluster_size, job->done_bitmap);
 
         /* Publish progress, guest I/O counts as progress too.  Note that the
          * offset field is an opaque progress value, it is not a disk offset.
@@ -180,8 +173,7 @@ out:
 
     cow_request_end(&cow_request);
 
-    trace_backup_do_cow_return(job, sector_num * BDRV_SECTOR_SIZE,
-                               nb_sectors * BDRV_SECTOR_SIZE, ret);
+    trace_backup_do_cow_return(job, offset, bytes, ret);
 
     qemu_co_rwlock_unlock(&job->flush_rwlock);
 
@@ -194,14 +186,12 @@ static int coroutine_fn backup_before_write_notify(
 {
     BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
     BdrvTrackedRequest *req = opaque;
-    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
-    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
 
     assert(req->bs == blk_bs(job->common.blk));
-    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
+    assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));
 
-    return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
+    return backup_do_cow(job, req->offset, req->bytes, NULL, true);
 }
 
 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -406,8 +396,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                 if (yield_and_check(job)) {
                     goto out;
                 }
-                ret = backup_do_cow(job, cluster * sectors_per_cluster,
-                                    sectors_per_cluster, &error_is_read,
+                ret = backup_do_cow(job, cluster * job->cluster_size,
+                                    job->cluster_size, &error_is_read,
                                     false);
                 if ((ret < 0) &&
                     backup_error_action(job, error_is_read, -ret) ==
@@ -509,8 +499,8 @@ static void coroutine_fn backup_run(void *opaque)
             if (alloced < 0) {
                 ret = alloced;
             } else {
-                ret = backup_do_cow(job, start * sectors_per_cluster,
-                                    sectors_per_cluster, &error_is_read,
+                ret = backup_do_cow(job, start * job->cluster_size,
+                                    job->cluster_size, &error_is_read,
                                     false);
             }
             if (ret < 0) {

From 6f8e35e2414433a56b4bd548b87b8ac2aedecb77 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:56 -0500
Subject: [PATCH 37/40] backup: Switch backup_run() to byte-based

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the internal
loop iteration of backups to track by bytes instead of sectors
(although we are still guaranteed that we iterate by steps that
are cluster-aligned).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/backup.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index f7e14dbe73..2bd1d94231 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -370,11 +370,10 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
     int ret = 0;
     int clusters_per_iter;
     uint32_t granularity;
-    int64_t sector;
+    int64_t offset;
     int64_t cluster;
     int64_t end;
     int64_t last_cluster = -1;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
     BdrvDirtyBitmapIter *dbi;
 
     granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
@@ -382,8 +381,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
     dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);
 
     /* Find the next dirty sector(s) */
-    while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
-        cluster = sector / sectors_per_cluster;
+    while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) {
+        cluster = offset / job->cluster_size;
 
         /* Fake progress updates for any clusters we skipped */
         if (cluster != last_cluster + 1) {
@@ -410,7 +409,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
         /* If the bitmap granularity is smaller than the backup granularity,
          * we need to advance the iterator pointer to the next cluster. */
         if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(dbi, cluster * sectors_per_cluster);
+            bdrv_set_dirty_iter(dbi,
+                                cluster * job->cluster_size / BDRV_SECTOR_SIZE);
         }
 
         last_cluster = cluster - 1;
@@ -432,17 +432,15 @@ static void coroutine_fn backup_run(void *opaque)
     BackupBlockJob *job = opaque;
     BackupCompleteData *data;
     BlockDriverState *bs = blk_bs(job->common.blk);
-    int64_t start, end;
+    int64_t offset;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
     int ret = 0;
 
     QLIST_INIT(&job->inflight_reqs);
     qemu_co_rwlock_init(&job->flush_rwlock);
 
-    start = 0;
-    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
-
-    job->done_bitmap = bitmap_new(end);
+    job->done_bitmap = bitmap_new(DIV_ROUND_UP(job->common.len,
+                                               job->cluster_size));
 
     job->before_write.notify = backup_before_write_notify;
     bdrv_add_before_write_notifier(bs, &job->before_write);
@@ -457,7 +455,8 @@ static void coroutine_fn backup_run(void *opaque)
         ret = backup_run_incremental(job);
     } else {
         /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (; start < end; start++) {
+        for (offset = 0; offset < job->common.len;
+             offset += job->cluster_size) {
             bool error_is_read;
             int alloced = 0;
 
@@ -480,8 +479,8 @@ static void coroutine_fn backup_run(void *opaque)
                      * needed but at some point that is always the case. */
                     alloced =
                         bdrv_is_allocated(bs,
-                                start * sectors_per_cluster + i,
-                                sectors_per_cluster - i, &n);
+                                          (offset >> BDRV_SECTOR_BITS) + i,
+                                          sectors_per_cluster - i, &n);
                     i += n;
 
                     if (alloced || n == 0) {
@@ -499,9 +498,8 @@ static void coroutine_fn backup_run(void *opaque)
             if (alloced < 0) {
                 ret = alloced;
             } else {
-                ret = backup_do_cow(job, start * job->cluster_size,
-                                    job->cluster_size, &error_is_read,
-                                    false);
+                ret = backup_do_cow(job, offset, job->cluster_size,
+                                    &error_is_read, false);
             }
             if (ret < 0) {
                 /* Depending on error action, fail now or retry cluster */
@@ -510,7 +508,7 @@ static void coroutine_fn backup_run(void *opaque)
                 if (action == BLOCK_ERROR_ACTION_REPORT) {
                     break;
                 } else {
-                    start--;
+                    offset -= job->cluster_size;
                     continue;
                 }
             }

From d6a644bbfef81bb6c7ab11656ad71e326f75ac77 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:57 -0500
Subject: [PATCH 38/40] block: Make bdrv_is_allocated() byte-based

We are gradually moving away from sector-based interfaces, towards
byte-based.  In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.

Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated.  For now,
the io.c layer still assert()s that all callers are sector-aligned
on input and that *pnum is sector-aligned on return to the caller,
but that can be relaxed when a later patch implements byte-based
block status.  Therefore, this code adds usages like
DIV_ROUND_UP(,BDRV_SECTOR_SIZE) to callers that still want aligned
values, where the call might reasonbly give non-aligned results
in the future; on the other hand, no rounding is needed for callers
that should just continue to work with byte alignment.

For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_is_allocated().  But
some code, particularly bdrv_commit(), gets a lot simpler because it
no longer has to mess with sectors; also, it is now possible to pass
NULL if the caller does not care how much of the image is allocated
beyond the initial offset.  Leave comments where we can further
simplify once a later patch eliminates the need for sector-aligned
requests through bdrv_is_allocated().

For ease of review, bdrv_is_allocated_above() will be tackled
separately.

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/backup.c        | 17 ++++-------
 block/commit.c        | 21 ++++++-------
 block/io.c            | 54 ++++++++++++++++++++++-----------
 block/stream.c        |  7 +++--
 block/vvfat.c         | 34 ++++++++++++---------
 include/block/block.h |  4 +--
 migration/block.c     | 16 ++++++----
 qemu-img.c            |  8 ++++-
 qemu-io-cmds.c        | 70 ++++++++++++++++++++-----------------------
 9 files changed, 126 insertions(+), 105 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 2bd1d94231..b69184eac5 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -47,12 +47,6 @@ typedef struct BackupBlockJob {
     QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
 
-/* Size of a cluster in sectors, instead of bytes. */
-static inline int64_t cluster_size_sectors(BackupBlockJob *job)
-{
-  return job->cluster_size / BDRV_SECTOR_SIZE;
-}
-
 /* See if in-flight requests overlap and wait for them to complete */
 static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
                                                        int64_t start,
@@ -433,7 +427,6 @@ static void coroutine_fn backup_run(void *opaque)
     BackupCompleteData *data;
     BlockDriverState *bs = blk_bs(job->common.blk);
     int64_t offset;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
     int ret = 0;
 
     QLIST_INIT(&job->inflight_reqs);
@@ -465,12 +458,13 @@ static void coroutine_fn backup_run(void *opaque)
             }
 
             if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-                int i, n;
+                int i;
+                int64_t n;
 
                 /* Check to see if these blocks are already in the
                  * backing file. */
 
-                for (i = 0; i < sectors_per_cluster;) {
+                for (i = 0; i < job->cluster_size;) {
                     /* bdrv_is_allocated() only returns true/false based
                      * on the first set of sectors it comes across that
                      * are are all in the same state.
@@ -478,9 +472,8 @@ static void coroutine_fn backup_run(void *opaque)
                      * backup cluster length.  We end up copying more than
                      * needed but at some point that is always the case. */
                     alloced =
-                        bdrv_is_allocated(bs,
-                                          (offset >> BDRV_SECTOR_BITS) + i,
-                                          sectors_per_cluster - i, &n);
+                        bdrv_is_allocated(bs, offset + i,
+                                          job->cluster_size - i, &n);
                     i += n;
 
                     if (alloced || n == 0) {
diff --git a/block/commit.c b/block/commit.c
index c3a7bca20c..241aa95b3f 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -443,7 +443,7 @@ fail:
 }
 
 
-#define COMMIT_BUF_SECTORS 2048
+#define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)
 
 /* commit COW file into the raw image */
 int bdrv_commit(BlockDriverState *bs)
@@ -452,8 +452,9 @@ int bdrv_commit(BlockDriverState *bs)
     BlockDriverState *backing_file_bs = NULL;
     BlockDriverState *commit_top_bs = NULL;
     BlockDriver *drv = bs->drv;
-    int64_t sector, total_sectors, length, backing_length;
-    int n, ro, open_flags;
+    int64_t offset, length, backing_length;
+    int ro, open_flags;
+    int64_t n;
     int ret = 0;
     uint8_t *buf = NULL;
     Error *local_err = NULL;
@@ -531,30 +532,26 @@ int bdrv_commit(BlockDriverState *bs)
         }
     }
 
-    total_sectors = length >> BDRV_SECTOR_BITS;
-
     /* blk_try_blockalign() for src will choose an alignment that works for
      * backing as well, so no need to compare the alignment manually. */
-    buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+    buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
     if (buf == NULL) {
         ret = -ENOMEM;
         goto ro_cleanup;
     }
 
-    for (sector = 0; sector < total_sectors; sector += n) {
-        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
+    for (offset = 0; offset < length; offset += n) {
+        ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
         if (ret < 0) {
             goto ro_cleanup;
         }
         if (ret) {
-            ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
-                            n * BDRV_SECTOR_SIZE);
+            ret = blk_pread(src, offset, buf, n);
             if (ret < 0) {
                 goto ro_cleanup;
             }
 
-            ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
-                             n * BDRV_SECTOR_SIZE, 0);
+            ret = blk_pwrite(backing, offset, buf, n, 0);
             if (ret < 0) {
                 goto ro_cleanup;
             }
diff --git a/block/io.c b/block/io.c
index a0a36dfaa8..6656d7fe3e 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1033,17 +1033,18 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
     }
 
     if (flags & BDRV_REQ_COPY_ON_READ) {
-        int64_t start_sector = offset >> BDRV_SECTOR_BITS;
-        int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
-        unsigned int nb_sectors = end_sector - start_sector;
-        int pnum;
+        /* TODO: Simplify further once bdrv_is_allocated no longer
+         * requires sector alignment */
+        int64_t start = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
+        int64_t end = QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE);
+        int64_t pnum;
 
-        ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
+        ret = bdrv_is_allocated(bs, start, end - start, &pnum);
         if (ret < 0) {
             goto out;
         }
 
-        if (!ret || pnum != nb_sectors) {
+        if (!ret || pnum != end - start) {
             ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
             goto out;
         }
@@ -1900,15 +1901,25 @@ int64_t bdrv_get_block_status(BlockDriverState *bs,
                                        sector_num, nb_sectors, pnum, file);
 }
 
-int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                                   int nb_sectors, int *pnum)
+int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
+                                   int64_t bytes, int64_t *pnum)
 {
     BlockDriverState *file;
-    int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum,
-                                        &file);
+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+    int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+    int64_t ret;
+    int psectors;
+
+    assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
+    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE) && bytes < INT_MAX);
+    ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &psectors,
+                                &file);
     if (ret < 0) {
         return ret;
     }
+    if (pnum) {
+        *pnum = psectors * BDRV_SECTOR_SIZE;
+    }
     return !!(ret & BDRV_BLOCK_ALLOCATED);
 }
 
@@ -1917,7 +1928,8 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
  *
  * Return true if the given sector is allocated in any image between
  * BASE and TOP (inclusive).  BASE can be NULL to check if the given
- * sector is allocated in any image of the chain.  Return false otherwise.
+ * sector is allocated in any image of the chain.  Return false otherwise,
+ * or negative errno on failure.
  *
  * 'pnum' is set to the number of sectors (including and immediately following
  *  the specified sector) that are known to be in the same
@@ -1934,13 +1946,19 @@ int bdrv_is_allocated_above(BlockDriverState *top,
 
     intermediate = top;
     while (intermediate && intermediate != base) {
-        int pnum_inter;
-        ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
+        int64_t pnum_inter;
+        int psectors_inter;
+
+        ret = bdrv_is_allocated(intermediate, sector_num * BDRV_SECTOR_SIZE,
+                                nb_sectors * BDRV_SECTOR_SIZE,
                                 &pnum_inter);
         if (ret < 0) {
             return ret;
-        } else if (ret) {
-            *pnum = pnum_inter;
+        }
+        assert(pnum_inter < INT_MAX * BDRV_SECTOR_SIZE);
+        psectors_inter = pnum_inter >> BDRV_SECTOR_BITS;
+        if (ret) {
+            *pnum = psectors_inter;
             return 1;
         }
 
@@ -1950,10 +1968,10 @@ int bdrv_is_allocated_above(BlockDriverState *top,
          *
          * [sector_num+x, nr_sectors] allocated.
          */
-        if (n > pnum_inter &&
+        if (n > psectors_inter &&
             (intermediate == top ||
-             sector_num + pnum_inter < intermediate->total_sectors)) {
-            n = pnum_inter;
+             sector_num + psectors_inter < intermediate->total_sectors)) {
+            n = psectors_inter;
         }
 
         intermediate = backing_bs(intermediate);
diff --git a/block/stream.c b/block/stream.c
index e3dd2ac3c8..df9679c0fc 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -137,6 +137,7 @@ static void coroutine_fn stream_run(void *opaque)
 
     for ( ; offset < s->common.len; offset += n * BDRV_SECTOR_SIZE) {
         bool copy;
+        int64_t count = 0;
 
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
@@ -148,8 +149,10 @@ static void coroutine_fn stream_run(void *opaque)
 
         copy = false;
 
-        ret = bdrv_is_allocated(bs, offset / BDRV_SECTOR_SIZE,
-                                STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
+        ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &count);
+        /* TODO relax this once bdrv_is_allocated does not enforce sectors */
+        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+        n = count >> BDRV_SECTOR_BITS;
         if (ret == 1) {
             /* Allocated in the top, no need to copy.  */
         } else if (ret >= 0) {
diff --git a/block/vvfat.c b/block/vvfat.c
index f55104c338..4fd28e1e87 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1482,24 +1482,27 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
         if (sector_num >= bs->total_sectors)
            return -1;
         if (s->qcow) {
-            int n;
+            int64_t n;
             int ret;
-            ret = bdrv_is_allocated(s->qcow->bs, sector_num,
-                                    nb_sectors - i, &n);
+            ret = bdrv_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE,
+                                    (nb_sectors - i) * BDRV_SECTOR_SIZE, &n);
             if (ret < 0) {
                 return ret;
             }
             if (ret) {
-                DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
-                             (int)sector_num, n));
-                if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
+                DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64
+                             " allocated\n", sector_num,
+                             n >> BDRV_SECTOR_BITS));
+                if (bdrv_read(s->qcow, sector_num, buf + i * 0x200,
+                              n >> BDRV_SECTOR_BITS)) {
                     return -1;
                 }
-                i += n - 1;
-                sector_num += n - 1;
+                i += (n >> BDRV_SECTOR_BITS) - 1;
+                sector_num += (n >> BDRV_SECTOR_BITS) - 1;
                 continue;
             }
-DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
+            DLOG(fprintf(stderr, "sector %" PRId64 " not allocated\n",
+                         sector_num));
         }
         if (sector_num < s->offset_to_root_dir) {
             if (sector_num < s->offset_to_fat) {
@@ -1779,7 +1782,7 @@ static inline bool cluster_was_modified(BDRVVVFATState *s,
                                         uint32_t cluster_num)
 {
     int was_modified = 0;
-    int i, dummy;
+    int i;
 
     if (s->qcow == NULL) {
         return 0;
@@ -1787,8 +1790,9 @@ static inline bool cluster_was_modified(BDRVVVFATState *s,
 
     for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
         was_modified = bdrv_is_allocated(s->qcow->bs,
-                                         cluster2sector(s, cluster_num) + i,
-                                         1, &dummy);
+                                         (cluster2sector(s, cluster_num) +
+                                          i) * BDRV_SECTOR_SIZE,
+                                         BDRV_SECTOR_SIZE, NULL);
     }
 
     /*
@@ -1935,7 +1939,7 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
             }
 
             if (copy_it) {
-                int i, dummy;
+                int i;
                 /*
                  * This is horribly inefficient, but that is okay, since
                  * it is rarely executed, if at all.
@@ -1946,7 +1950,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
                 for (i = 0; i < s->sectors_per_cluster; i++) {
                     int res;
 
-                    res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
+                    res = bdrv_is_allocated(s->qcow->bs,
+                                            (offset + i) * BDRV_SECTOR_SIZE,
+                                            BDRV_SECTOR_SIZE, NULL);
                     if (res < 0) {
                         return -1;
                     }
diff --git a/include/block/block.h b/include/block/block.h
index 121f42becf..8d16cc1d98 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -427,8 +427,8 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
                                     int64_t sector_num,
                                     int nb_sectors, int *pnum,
                                     BlockDriverState **file);
-int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                      int *pnum);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                      int64_t *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
                             int64_t sector_num, int nb_sectors, int *pnum);
 
diff --git a/migration/block.c b/migration/block.c
index 7674ae1078..86c0b96cd1 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -34,7 +34,7 @@
 #define BLK_MIG_FLAG_PROGRESS           0x04
 #define BLK_MIG_FLAG_ZERO_BLOCK         0x08
 
-#define MAX_IS_ALLOCATED_SEARCH 65536
+#define MAX_IS_ALLOCATED_SEARCH (65536 * BDRV_SECTOR_SIZE)
 
 #define MAX_INFLIGHT_IO 512
 
@@ -267,16 +267,20 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
     BlockBackend *bb = bmds->blk;
     BlkMigBlock *blk;
     int nr_sectors;
+    int64_t count;
 
     if (bmds->shared_base) {
         qemu_mutex_lock_iothread();
         aio_context_acquire(blk_get_aio_context(bb));
-        /* Skip unallocated sectors; intentionally treats failure as
-         * an allocated sector */
+        /* Skip unallocated sectors; intentionally treats failure or
+         * partial sector as an allocated sector */
         while (cur_sector < total_sectors &&
-               !bdrv_is_allocated(blk_bs(bb), cur_sector,
-                                  MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
-            cur_sector += nr_sectors;
+               !bdrv_is_allocated(blk_bs(bb), cur_sector * BDRV_SECTOR_SIZE,
+                                  MAX_IS_ALLOCATED_SEARCH, &count)) {
+            if (count < BDRV_SECTOR_SIZE) {
+                break;
+            }
+            cur_sector += count >> BDRV_SECTOR_BITS;
         }
         aio_context_release(blk_get_aio_context(bb));
         qemu_mutex_unlock_iothread();
diff --git a/qemu-img.c b/qemu-img.c
index c5f00db050..86e24335fc 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3258,6 +3258,7 @@ static int img_rebase(int argc, char **argv)
         int64_t new_backing_num_sectors = 0;
         uint64_t sector;
         int n;
+        int64_t count;
         float local_progress = 0;
 
         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
@@ -3305,12 +3306,17 @@ static int img_rebase(int argc, char **argv)
             }
 
             /* If the cluster is allocated, we don't need to take action */
-            ret = bdrv_is_allocated(bs, sector, n, &n);
+            ret = bdrv_is_allocated(bs, sector << BDRV_SECTOR_BITS,
+                                    n << BDRV_SECTOR_BITS, &count);
             if (ret < 0) {
                 error_report("error while reading image metadata: %s",
                              strerror(-ret));
                 goto out;
             }
+            /* TODO relax this once bdrv_is_allocated does not enforce
+             * sector alignment */
+            assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+            n = count >> BDRV_SECTOR_BITS;
             if (ret) {
                 continue;
             }
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index b0ea327024..f4fdf2dcce 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -1760,12 +1760,12 @@ out:
 static int alloc_f(BlockBackend *blk, int argc, char **argv)
 {
     BlockDriverState *bs = blk_bs(blk);
-    int64_t offset, sector_num, nb_sectors, remaining, count;
+    int64_t offset, start, remaining, count;
     char s1[64];
-    int num, ret;
-    int64_t sum_alloc;
+    int ret;
+    int64_t num, sum_alloc;
 
-    offset = cvtnum(argv[1]);
+    start = offset = cvtnum(argv[1]);
     if (offset < 0) {
         print_cvtnum_err(offset, argv[1]);
         return 0;
@@ -1793,32 +1793,30 @@ static int alloc_f(BlockBackend *blk, int argc, char **argv)
                count);
         return 0;
     }
-    nb_sectors = count >> BDRV_SECTOR_BITS;
 
-    remaining = nb_sectors;
+    remaining = count;
     sum_alloc = 0;
-    sector_num = offset >> 9;
     while (remaining) {
-        ret = bdrv_is_allocated(bs, sector_num, remaining, &num);
+        ret = bdrv_is_allocated(bs, offset, remaining, &num);
         if (ret < 0) {
             printf("is_allocated failed: %s\n", strerror(-ret));
             return 0;
         }
-        sector_num += num;
+        offset += num;
         remaining -= num;
         if (ret) {
             sum_alloc += num;
         }
         if (num == 0) {
-            nb_sectors -= remaining;
+            count -= remaining;
             remaining = 0;
         }
     }
 
-    cvtstr(offset, s1, sizeof(s1));
+    cvtstr(start, s1, sizeof(s1));
 
     printf("%"PRId64"/%"PRId64" bytes allocated at offset %s\n",
-           sum_alloc << BDRV_SECTOR_BITS, nb_sectors << BDRV_SECTOR_BITS, s1);
+           sum_alloc, count, s1);
     return 0;
 }
 
@@ -1833,14 +1831,15 @@ static const cmdinfo_t alloc_cmd = {
 };
 
 
-static int map_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                            int64_t nb_sectors, int64_t *pnum)
+static int map_is_allocated(BlockDriverState *bs, int64_t offset,
+                            int64_t bytes, int64_t *pnum)
 {
-    int num, num_checked;
+    int64_t num;
+    int num_checked;
     int ret, firstret;
 
-    num_checked = MIN(nb_sectors, INT_MAX);
-    ret = bdrv_is_allocated(bs, sector_num, num_checked, &num);
+    num_checked = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
+    ret = bdrv_is_allocated(bs, offset, num_checked, &num);
     if (ret < 0) {
         return ret;
     }
@@ -1848,12 +1847,12 @@ static int map_is_allocated(BlockDriverState *bs, int64_t sector_num,
     firstret = ret;
     *pnum = num;
 
-    while (nb_sectors > 0 && ret == firstret) {
-        sector_num += num;
-        nb_sectors -= num;
+    while (bytes > 0 && ret == firstret) {
+        offset += num;
+        bytes -= num;
 
-        num_checked = MIN(nb_sectors, INT_MAX);
-        ret = bdrv_is_allocated(bs, sector_num, num_checked, &num);
+        num_checked = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
+        ret = bdrv_is_allocated(bs, offset, num_checked, &num);
         if (ret == firstret && num) {
             *pnum += num;
         } else {
@@ -1866,25 +1865,21 @@ static int map_is_allocated(BlockDriverState *bs, int64_t sector_num,
 
 static int map_f(BlockBackend *blk, int argc, char **argv)
 {
-    int64_t offset;
-    int64_t nb_sectors, total_sectors;
+    int64_t offset, bytes;
     char s1[64], s2[64];
     int64_t num;
     int ret;
     const char *retstr;
 
     offset = 0;
-    total_sectors = blk_nb_sectors(blk);
-    if (total_sectors < 0) {
-        error_report("Failed to query image length: %s",
-                     strerror(-total_sectors));
+    bytes = blk_getlength(blk);
+    if (bytes < 0) {
+        error_report("Failed to query image length: %s", strerror(-bytes));
         return 0;
     }
 
-    nb_sectors = total_sectors;
-
-    do {
-        ret = map_is_allocated(blk_bs(blk), offset, nb_sectors, &num);
+    while (bytes) {
+        ret = map_is_allocated(blk_bs(blk), offset, bytes, &num);
         if (ret < 0) {
             error_report("Failed to get allocation status: %s", strerror(-ret));
             return 0;
@@ -1894,15 +1889,14 @@ static int map_f(BlockBackend *blk, int argc, char **argv)
         }
 
         retstr = ret ? "    allocated" : "not allocated";
-        cvtstr(num << BDRV_SECTOR_BITS, s1, sizeof(s1));
-        cvtstr(offset << BDRV_SECTOR_BITS, s2, sizeof(s2));
+        cvtstr(num, s1, sizeof(s1));
+        cvtstr(offset, s2, sizeof(s2));
         printf("%s (0x%" PRIx64 ") bytes %s at offset %s (0x%" PRIx64 ")\n",
-               s1, num << BDRV_SECTOR_BITS, retstr,
-               s2, offset << BDRV_SECTOR_BITS);
+               s1, num, retstr, s2, offset);
 
         offset += num;
-        nb_sectors -= num;
-    } while (offset < total_sectors);
+        bytes -= num;
+    }
 
     return 0;
 }

From c00716beb30ba996bd6fdfd5f41bb07e4414144f Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:58 -0500
Subject: [PATCH 39/40] block: Minimize raw use of bds->total_sectors

bdrv_is_allocated_above() was relying on intermediate->total_sectors,
which is a field that can have stale contents depending on the value
of intermediate->has_variable_length.  An audit shows that we are safe
(we were first calling through bdrv_co_get_block_status() which in
turn calls bdrv_nb_sectors() and therefore just refreshed the current
length), but it's nicer to favor our accessor functions to avoid having
to repeat such an audit, even if it means refresh_total_sectors() is
called more frequently.

Suggested-by: John Snow <jsnow@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/io.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/block/io.c b/block/io.c
index 6656d7fe3e..aad7fd30a4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1947,6 +1947,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
     intermediate = top;
     while (intermediate && intermediate != base) {
         int64_t pnum_inter;
+        int64_t size_inter;
         int psectors_inter;
 
         ret = bdrv_is_allocated(intermediate, sector_num * BDRV_SECTOR_SIZE,
@@ -1962,15 +1963,12 @@ int bdrv_is_allocated_above(BlockDriverState *top,
             return 1;
         }
 
-        /*
-         * [sector_num, nb_sectors] is unallocated on top but intermediate
-         * might have
-         *
-         * [sector_num+x, nr_sectors] allocated.
-         */
+        size_inter = bdrv_nb_sectors(intermediate);
+        if (size_inter < 0) {
+            return size_inter;
+        }
         if (n > psectors_inter &&
-            (intermediate == top ||
-             sector_num + psectors_inter < intermediate->total_sectors)) {
+            (intermediate == top || sector_num + psectors_inter < size_inter)) {
             n = psectors_inter;
         }
 

From 51b0a488882328f8f02519bb47ca7e0e7fbe12ff Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Fri, 7 Jul 2017 07:44:59 -0500
Subject: [PATCH 40/40] block: Make bdrv_is_allocated_above() byte-based

We are gradually moving away from sector-based interfaces, towards
byte-based.  In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.

Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated.  For now,
the io.c layer still assert()s that all callers are sector-aligned,
but that can be relaxed when a later patch implements byte-based
block status.  Therefore, for the most part this patch is just the
addition of scaling at the callers followed by inverse scaling at
bdrv_is_allocated().  But some code, particularly stream_run(),
gets a lot simpler because it no longer has to mess with sectors.
Leave comments where we can further simplify by switching to
byte-based iterations, once later patches eliminate the need for
sector-aligned operations.

For ease of review, bdrv_is_allocated() was tackled separately.

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/commit.c        | 20 ++++++++------------
 block/io.c            | 38 ++++++++++++++++++--------------------
 block/mirror.c        |  8 +++++++-
 block/replication.c   | 17 ++++++++++++-----
 block/stream.c        | 23 +++++++++--------------
 include/block/block.h |  2 +-
 qemu-img.c            | 13 ++++++++++---
 7 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 241aa95b3f..774a8a599c 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -146,7 +146,7 @@ static void coroutine_fn commit_run(void *opaque)
     int64_t offset;
     uint64_t delay_ns = 0;
     int ret = 0;
-    int n = 0; /* sectors */
+    int64_t n = 0; /* bytes */
     void *buf = NULL;
     int bytes_written = 0;
     int64_t base_len;
@@ -171,7 +171,7 @@ static void coroutine_fn commit_run(void *opaque)
 
     buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
 
-    for (offset = 0; offset < s->common.len; offset += n * BDRV_SECTOR_SIZE) {
+    for (offset = 0; offset < s->common.len; offset += n) {
         bool copy;
 
         /* Note that even when no rate limit is applied we need to yield
@@ -183,15 +183,12 @@ static void coroutine_fn commit_run(void *opaque)
         }
         /* Copy if allocated above the base */
         ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
-                                      offset / BDRV_SECTOR_SIZE,
-                                      COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
-                                      &n);
+                                      offset, COMMIT_BUFFER_SIZE, &n);
         copy = (ret == 1);
-        trace_commit_one_iteration(s, offset, n * BDRV_SECTOR_SIZE, ret);
+        trace_commit_one_iteration(s, offset, n, ret);
         if (copy) {
-            ret = commit_populate(s->top, s->base, offset,
-                                  n * BDRV_SECTOR_SIZE, buf);
-            bytes_written += n * BDRV_SECTOR_SIZE;
+            ret = commit_populate(s->top, s->base, offset, n, buf);
+            bytes_written += n;
         }
         if (ret < 0) {
             BlockErrorAction action =
@@ -204,11 +201,10 @@ static void coroutine_fn commit_run(void *opaque)
             }
         }
         /* Publish progress */
-        s->common.offset += n * BDRV_SECTOR_SIZE;
+        s->common.offset += n;
 
         if (copy && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit,
-                                                 n * BDRV_SECTOR_SIZE);
+            delay_ns = ratelimit_calculate_delay(&s->limit, n);
         }
     }
 
diff --git a/block/io.c b/block/io.c
index aad7fd30a4..23170a57ee 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1926,50 +1926,48 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
 /*
  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
  *
- * Return true if the given sector is allocated in any image between
- * BASE and TOP (inclusive).  BASE can be NULL to check if the given
- * sector is allocated in any image of the chain.  Return false otherwise,
+ * Return true if (a prefix of) the given range is allocated in any image
+ * between BASE and TOP (inclusive).  BASE can be NULL to check if the given
+ * offset is allocated in any image of the chain.  Return false otherwise,
  * or negative errno on failure.
  *
- * 'pnum' is set to the number of sectors (including and immediately following
- *  the specified sector) that are known to be in the same
- *  allocated/unallocated state.
+ * 'pnum' is set to the number of bytes (including and immediately
+ * following the specified offset) that are known to be in the same
+ * allocated/unallocated state.  Note that a subsequent call starting
+ * at 'offset + *pnum' may return the same allocation status (in other
+ * words, the result is not necessarily the maximum possible range);
+ * but 'pnum' will only be 0 when end of file is reached.
  *
  */
 int bdrv_is_allocated_above(BlockDriverState *top,
                             BlockDriverState *base,
-                            int64_t sector_num,
-                            int nb_sectors, int *pnum)
+                            int64_t offset, int64_t bytes, int64_t *pnum)
 {
     BlockDriverState *intermediate;
-    int ret, n = nb_sectors;
+    int ret;
+    int64_t n = bytes;
 
     intermediate = top;
     while (intermediate && intermediate != base) {
         int64_t pnum_inter;
         int64_t size_inter;
-        int psectors_inter;
 
-        ret = bdrv_is_allocated(intermediate, sector_num * BDRV_SECTOR_SIZE,
-                                nb_sectors * BDRV_SECTOR_SIZE,
-                                &pnum_inter);
+        ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
         if (ret < 0) {
             return ret;
         }
-        assert(pnum_inter < INT_MAX * BDRV_SECTOR_SIZE);
-        psectors_inter = pnum_inter >> BDRV_SECTOR_BITS;
         if (ret) {
-            *pnum = psectors_inter;
+            *pnum = pnum_inter;
             return 1;
         }
 
-        size_inter = bdrv_nb_sectors(intermediate);
+        size_inter = bdrv_getlength(intermediate);
         if (size_inter < 0) {
             return size_inter;
         }
-        if (n > psectors_inter &&
-            (intermediate == top || sector_num + psectors_inter < size_inter)) {
-            n = psectors_inter;
+        if (n > pnum_inter &&
+            (intermediate == top || offset + pnum_inter < size_inter)) {
+            n = pnum_inter;
         }
 
         intermediate = backing_bs(intermediate);
diff --git a/block/mirror.c b/block/mirror.c
index b33f4bb1d6..eaf0fe7858 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -621,6 +621,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
     BlockDriverState *bs = s->source;
     BlockDriverState *target_bs = blk_bs(s->target);
     int ret, n;
+    int64_t count;
 
     end = s->bdev_length / BDRV_SECTOR_SIZE;
 
@@ -670,11 +671,16 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             return 0;
         }
 
-        ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
+        ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE,
+                                      nb_sectors * BDRV_SECTOR_SIZE, &count);
         if (ret < 0) {
             return ret;
         }
 
+        /* TODO: Relax this once bdrv_is_allocated_above and dirty
+         * bitmaps no longer require sector alignment. */
+        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+        n = count >> BDRV_SECTOR_BITS;
         assert(n > 0);
         if (ret == 1) {
             bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
diff --git a/block/replication.c b/block/replication.c
index 8f3aba7a20..bf4462c8e7 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -264,7 +264,8 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
     BdrvChild *top = bs->file;
     BdrvChild *base = s->secondary_disk;
     BdrvChild *target;
-    int ret, n;
+    int ret;
+    int64_t n;
 
     ret = replication_get_io_status(s);
     if (ret < 0) {
@@ -283,14 +284,20 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
      */
     qemu_iovec_init(&hd_qiov, qiov->niov);
     while (remaining_sectors > 0) {
-        ret = bdrv_is_allocated_above(top->bs, base->bs, sector_num,
-                                      remaining_sectors, &n);
+        int64_t count;
+
+        ret = bdrv_is_allocated_above(top->bs, base->bs,
+                                      sector_num * BDRV_SECTOR_SIZE,
+                                      remaining_sectors * BDRV_SECTOR_SIZE,
+                                      &count);
         if (ret < 0) {
             goto out1;
         }
 
+        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+        n = count >> BDRV_SECTOR_BITS;
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, n * BDRV_SECTOR_SIZE);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);
 
         target = ret ? top : base;
         ret = bdrv_co_writev(target, sector_num, n, &hd_qiov);
@@ -300,7 +307,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
 
         remaining_sectors -= n;
         sector_num += n;
-        bytes_done += n * BDRV_SECTOR_SIZE;
+        bytes_done += count;
     }
 
 out1:
diff --git a/block/stream.c b/block/stream.c
index df9679c0fc..e6f72346e5 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -111,7 +111,7 @@ static void coroutine_fn stream_run(void *opaque)
     uint64_t delay_ns = 0;
     int error = 0;
     int ret = 0;
-    int n = 0; /* sectors */
+    int64_t n = 0; /* bytes */
     void *buf;
 
     if (!bs->backing) {
@@ -135,9 +135,8 @@ static void coroutine_fn stream_run(void *opaque)
         bdrv_enable_copy_on_read(bs);
     }
 
-    for ( ; offset < s->common.len; offset += n * BDRV_SECTOR_SIZE) {
+    for ( ; offset < s->common.len; offset += n) {
         bool copy;
-        int64_t count = 0;
 
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
@@ -149,28 +148,25 @@ static void coroutine_fn stream_run(void *opaque)
 
         copy = false;
 
-        ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &count);
-        /* TODO relax this once bdrv_is_allocated does not enforce sectors */
-        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
-        n = count >> BDRV_SECTOR_BITS;
+        ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n);
         if (ret == 1) {
             /* Allocated in the top, no need to copy.  */
         } else if (ret >= 0) {
             /* Copy if allocated in the intermediate images.  Limit to the
              * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
             ret = bdrv_is_allocated_above(backing_bs(bs), base,
-                                          offset / BDRV_SECTOR_SIZE, n, &n);
+                                          offset, n, &n);
 
             /* Finish early if end of backing file has been reached */
             if (ret == 0 && n == 0) {
-                n = (s->common.len - offset) / BDRV_SECTOR_SIZE;
+                n = s->common.len - offset;
             }
 
             copy = (ret == 1);
         }
-        trace_stream_one_iteration(s, offset, n * BDRV_SECTOR_SIZE, ret);
+        trace_stream_one_iteration(s, offset, n, ret);
         if (copy) {
-            ret = stream_populate(blk, offset, n * BDRV_SECTOR_SIZE, buf);
+            ret = stream_populate(blk, offset, n, buf);
         }
         if (ret < 0) {
             BlockErrorAction action =
@@ -189,10 +185,9 @@ static void coroutine_fn stream_run(void *opaque)
         ret = 0;
 
         /* Publish progress */
-        s->common.offset += n * BDRV_SECTOR_SIZE;
+        s->common.offset += n;
         if (copy && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit,
-                                                 n * BDRV_SECTOR_SIZE);
+            delay_ns = ratelimit_calculate_delay(&s->limit, n);
         }
     }
 
diff --git a/include/block/block.h b/include/block/block.h
index 8d16cc1d98..4a2725267d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -430,7 +430,7 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
 int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
                       int64_t *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
-                            int64_t sector_num, int nb_sectors, int *pnum);
+                            int64_t offset, int64_t bytes, int64_t *pnum);
 
 bool bdrv_is_read_only(BlockDriverState *bs);
 bool bdrv_is_writable(BlockDriverState *bs);
diff --git a/qemu-img.c b/qemu-img.c
index 86e24335fc..f7ffb79db6 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1508,12 +1508,16 @@ static int img_compare(int argc, char **argv)
         }
 
         for (;;) {
+            int64_t count;
+
             nb_sectors = sectors_to_process(total_sectors_over, sector_num);
             if (nb_sectors <= 0) {
                 break;
             }
-            ret = bdrv_is_allocated_above(blk_bs(blk_over), NULL, sector_num,
-                                          nb_sectors, &pnum);
+            ret = bdrv_is_allocated_above(blk_bs(blk_over), NULL,
+                                          sector_num * BDRV_SECTOR_SIZE,
+                                          nb_sectors * BDRV_SECTOR_SIZE,
+                                          &count);
             if (ret < 0) {
                 ret = 3;
                 error_report("Sector allocation test failed for %s",
@@ -1521,7 +1525,10 @@ static int img_compare(int argc, char **argv)
                 goto out;
 
             }
-            nb_sectors = pnum;
+            /* TODO relax this once bdrv_is_allocated_above does not enforce
+             * sector alignment */
+            assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+            nb_sectors = count >> BDRV_SECTOR_BITS;
             if (ret) {
                 ret = check_empty_sectors(blk_over, sector_num, nb_sectors,
                                           filename_over, buf1, quiet);