Skip to content

Commit d53ec3d

Browse files
bertoggXanClic
authored andcommitted
qcow2: Add subcluster support to calculate_l2_meta()
If an image has subclusters then there are more copy-on-write scenarios that we need to consider. Let's say we have a write request from the middle of subcluster #3 until the end of the cluster: 1) If we are writing to a newly allocated cluster then we need copy-on-write. The previous contents of subclusters #0 to #3 must be copied to the new cluster. We can optimize this process by skipping all leading unallocated or zero subclusters (the status of those skipped subclusters will be reflected in the new L2 bitmap). 2) If we are overwriting an existing cluster: 2.1) If subcluster #3 is unallocated or has the all-zeroes bit set then we need copy-on-write (on subcluster #3 only). 2.2) If subcluster #3 was already allocated then there is no need for any copy-on-write. However we still need to update the L2 bitmap to reflect possible changes in the allocation status of subclusters #4 to #31. Because of this, this function checks if all the overwritten subclusters are already allocated and in this case it returns without creating a new QCowL2Meta structure. After all these changes l2meta_cow_start() and l2meta_cow_end() are not necessarily cluster-aligned anymore. We need to update the calculation of old_start and old_end in handle_dependencies() to guarantee that no two requests try to write on the same cluster. Signed-off-by: Alberto Garcia <[email protected]> Reviewed-by: Eric Blake <[email protected]> Reviewed-by: Max Reitz <[email protected]> Message-Id: <4292dd56e4446d386a2fe307311737a711c00708.1594396418.git.berto@igalia.com> Signed-off-by: Max Reitz <[email protected]>
1 parent 97490a1 commit d53ec3d

File tree

1 file changed

+133
-34
lines changed

1 file changed

+133
-34
lines changed

block/qcow2-cluster.c

+133-34
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,6 @@ static int l2_allocate(BlockDriverState *bs, int l1_index)
387387
* If the L2 entry is invalid return -errno and set @type to
388388
* QCOW2_SUBCLUSTER_INVALID.
389389
*/
390-
G_GNUC_UNUSED
391390
static int qcow2_get_subcluster_range_type(BlockDriverState *bs,
392391
uint64_t l2_entry,
393392
uint64_t l2_bitmap,
@@ -1111,56 +1110,148 @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
11111110
* If @keep_old is true it means that the clusters were already
11121111
* allocated and will be overwritten. If false then the clusters are
11131112
* new and we have to decrease the reference count of the old ones.
1113+
*
1114+
* Returns 0 on success, -errno on failure.
11141115
*/
1115-
static void calculate_l2_meta(BlockDriverState *bs,
1116-
uint64_t host_cluster_offset,
1117-
uint64_t guest_offset, unsigned bytes,
1118-
uint64_t *l2_slice, QCowL2Meta **m, bool keep_old)
1116+
static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset,
1117+
uint64_t guest_offset, unsigned bytes,
1118+
uint64_t *l2_slice, QCowL2Meta **m, bool keep_old)
11191119
{
11201120
BDRVQcow2State *s = bs->opaque;
1121-
int l2_index = offset_to_l2_slice_index(s, guest_offset);
1122-
uint64_t l2_entry;
1121+
int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
1122+
uint64_t l2_entry, l2_bitmap;
11231123
unsigned cow_start_from, cow_end_to;
11241124
unsigned cow_start_to = offset_into_cluster(s, guest_offset);
11251125
unsigned cow_end_from = cow_start_to + bytes;
11261126
unsigned nb_clusters = size_to_clusters(s, cow_end_from);
11271127
QCowL2Meta *old_m = *m;
1128-
QCow2ClusterType type;
1128+
QCow2SubclusterType type;
1129+
int i;
1130+
bool skip_cow = keep_old;
11291131

11301132
assert(nb_clusters <= s->l2_slice_size - l2_index);
11311133

1132-
/* Return if there's no COW (all clusters are normal and we keep them) */
1133-
if (keep_old) {
1134-
int i;
1135-
for (i = 0; i < nb_clusters; i++) {
1136-
l2_entry = get_l2_entry(s, l2_slice, l2_index + i);
1137-
if (qcow2_get_cluster_type(bs, l2_entry) != QCOW2_CLUSTER_NORMAL) {
1138-
break;
1134+
/* Check the type of all affected subclusters */
1135+
for (i = 0; i < nb_clusters; i++) {
1136+
l2_entry = get_l2_entry(s, l2_slice, l2_index + i);
1137+
l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i);
1138+
if (skip_cow) {
1139+
unsigned write_from = MAX(cow_start_to, i << s->cluster_bits);
1140+
unsigned write_to = MIN(cow_end_from, (i + 1) << s->cluster_bits);
1141+
int first_sc = offset_to_sc_index(s, write_from);
1142+
int last_sc = offset_to_sc_index(s, write_to - 1);
1143+
int cnt = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap,
1144+
first_sc, &type);
1145+
/* Is any of the subclusters of type != QCOW2_SUBCLUSTER_NORMAL ? */
1146+
if (type != QCOW2_SUBCLUSTER_NORMAL || first_sc + cnt <= last_sc) {
1147+
skip_cow = false;
11391148
}
1149+
} else {
1150+
/* If we can't skip the cow we can still look for invalid entries */
1151+
type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, 0);
11401152
}
1141-
if (i == nb_clusters) {
1142-
return;
1153+
if (type == QCOW2_SUBCLUSTER_INVALID) {
1154+
int l1_index = offset_to_l1_index(s, guest_offset);
1155+
uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
1156+
qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster "
1157+
"entry found (L2 offset: %#" PRIx64
1158+
", L2 index: %#x)",
1159+
l2_offset, l2_index + i);
1160+
return -EIO;
11431161
}
11441162
}
11451163

1164+
if (skip_cow) {
1165+
return 0;
1166+
}
1167+
11461168
/* Get the L2 entry of the first cluster */
11471169
l2_entry = get_l2_entry(s, l2_slice, l2_index);
1148-
type = qcow2_get_cluster_type(bs, l2_entry);
1149-
1150-
if (type == QCOW2_CLUSTER_NORMAL && keep_old) {
1151-
cow_start_from = cow_start_to;
1170+
l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
1171+
sc_index = offset_to_sc_index(s, guest_offset);
1172+
type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);
1173+
1174+
if (!keep_old) {
1175+
switch (type) {
1176+
case QCOW2_SUBCLUSTER_COMPRESSED:
1177+
cow_start_from = 0;
1178+
break;
1179+
case QCOW2_SUBCLUSTER_NORMAL:
1180+
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
1181+
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
1182+
if (has_subclusters(s)) {
1183+
/* Skip all leading zero and unallocated subclusters */
1184+
uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC;
1185+
cow_start_from =
1186+
MIN(sc_index, ctz32(alloc_bitmap)) << s->subcluster_bits;
1187+
} else {
1188+
cow_start_from = 0;
1189+
}
1190+
break;
1191+
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
1192+
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
1193+
cow_start_from = sc_index << s->subcluster_bits;
1194+
break;
1195+
default:
1196+
g_assert_not_reached();
1197+
}
11521198
} else {
1153-
cow_start_from = 0;
1199+
switch (type) {
1200+
case QCOW2_SUBCLUSTER_NORMAL:
1201+
cow_start_from = cow_start_to;
1202+
break;
1203+
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
1204+
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
1205+
cow_start_from = sc_index << s->subcluster_bits;
1206+
break;
1207+
default:
1208+
g_assert_not_reached();
1209+
}
11541210
}
11551211

11561212
/* Get the L2 entry of the last cluster */
1157-
l2_entry = get_l2_entry(s, l2_slice, l2_index + nb_clusters - 1);
1158-
type = qcow2_get_cluster_type(bs, l2_entry);
1159-
1160-
if (type == QCOW2_CLUSTER_NORMAL && keep_old) {
1161-
cow_end_to = cow_end_from;
1213+
l2_index += nb_clusters - 1;
1214+
l2_entry = get_l2_entry(s, l2_slice, l2_index);
1215+
l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index);
1216+
sc_index = offset_to_sc_index(s, guest_offset + bytes - 1);
1217+
type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index);
1218+
1219+
if (!keep_old) {
1220+
switch (type) {
1221+
case QCOW2_SUBCLUSTER_COMPRESSED:
1222+
cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);
1223+
break;
1224+
case QCOW2_SUBCLUSTER_NORMAL:
1225+
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
1226+
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
1227+
cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);
1228+
if (has_subclusters(s)) {
1229+
/* Skip all trailing zero and unallocated subclusters */
1230+
uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC;
1231+
cow_end_to -=
1232+
MIN(s->subclusters_per_cluster - sc_index - 1,
1233+
clz32(alloc_bitmap)) << s->subcluster_bits;
1234+
}
1235+
break;
1236+
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
1237+
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
1238+
cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size);
1239+
break;
1240+
default:
1241+
g_assert_not_reached();
1242+
}
11621243
} else {
1163-
cow_end_to = ROUND_UP(cow_end_from, s->cluster_size);
1244+
switch (type) {
1245+
case QCOW2_SUBCLUSTER_NORMAL:
1246+
cow_end_to = cow_end_from;
1247+
break;
1248+
case QCOW2_SUBCLUSTER_ZERO_ALLOC:
1249+
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
1250+
cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size);
1251+
break;
1252+
default:
1253+
g_assert_not_reached();
1254+
}
11641255
}
11651256

11661257
*m = g_malloc0(sizeof(**m));
@@ -1185,6 +1276,8 @@ static void calculate_l2_meta(BlockDriverState *bs,
11851276

11861277
qemu_co_queue_init(&(*m)->dependent_requests);
11871278
QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
1279+
1280+
return 0;
11881281
}
11891282

11901283
/*
@@ -1273,8 +1366,8 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
12731366

12741367
uint64_t start = guest_offset;
12751368
uint64_t end = start + bytes;
1276-
uint64_t old_start = l2meta_cow_start(old_alloc);
1277-
uint64_t old_end = l2meta_cow_end(old_alloc);
1369+
uint64_t old_start = start_of_cluster(s, l2meta_cow_start(old_alloc));
1370+
uint64_t old_end = ROUND_UP(l2meta_cow_end(old_alloc), s->cluster_size);
12781371

12791372
if (end <= old_start || start >= old_end) {
12801373
/* No intersection */
@@ -1399,8 +1492,11 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
13991492
- offset_into_cluster(s, guest_offset));
14001493
assert(*bytes != 0);
14011494

1402-
calculate_l2_meta(bs, cluster_offset, guest_offset,
1403-
*bytes, l2_slice, m, true);
1495+
ret = calculate_l2_meta(bs, cluster_offset, guest_offset,
1496+
*bytes, l2_slice, m, true);
1497+
if (ret < 0) {
1498+
goto out;
1499+
}
14041500

14051501
ret = 1;
14061502
} else {
@@ -1576,8 +1672,11 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
15761672
*bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset));
15771673
assert(*bytes != 0);
15781674

1579-
calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, l2_slice,
1580-
m, false);
1675+
ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes,
1676+
l2_slice, m, false);
1677+
if (ret < 0) {
1678+
goto out;
1679+
}
15811680

15821681
ret = 1;
15831682

0 commit comments

Comments
 (0)