Skip to content

Commit 0956ee3

Browse files
committed
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20180710a' into staging
Migration pull 2018-07-10 (for 3.0) Migration fixes and migration test fixes, mostly around postcopy and postcopy recovery # gpg: Signature made Tue 10 Jul 2018 16:27:19 BST # gpg: using RSA key 0516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <[email protected]>" # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20180710a: migration: reorder MIG_CMD_POSTCOPY_RESUME tests: hide stderr for postcopy recovery test tests: add postcopy recovery test tests: introduce wait_for_migration_status() tests: introduce migrate_query*() helpers tests: allow migrate() to take extra flags tests: introduce migrate_postcopy_* helpers migration: show pause/recover state on dst host migration: fix incorrect bitmap size calculation migration: loosen recovery check when load vm migration: simplify check to use qemu file buffer migration: unify incoming processing migration: unbreak postcopy recovery migration: move income process out of multifd migration: delay postcopy paused state Signed-off-by: Peter Maydell <[email protected]>
2 parents 2b83714 + 858b6d6 commit 0956ee3

File tree

8 files changed

+238
-88
lines changed

8 files changed

+238
-88
lines changed

migration/exec.c

-3
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ static gboolean exec_accept_incoming_migration(QIOChannel *ioc,
4949
{
5050
migration_channel_process_incoming(ioc);
5151
object_unref(OBJECT(ioc));
52-
if (!migrate_use_multifd()) {
53-
migration_incoming_process();
54-
}
5552
return G_SOURCE_REMOVE;
5653
}
5754

migration/fd.c

-3
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc,
4949
{
5050
migration_channel_process_incoming(ioc);
5151
object_unref(OBJECT(ioc));
52-
if (!migrate_use_multifd()) {
53-
migration_incoming_process();
54-
}
5552
return G_SOURCE_REMOVE;
5653
}
5754

migration/migration.c

+39-7
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,8 @@ void migration_incoming_process(void)
466466
qemu_coroutine_enter(co);
467467
}
468468

469-
void migration_fd_process_incoming(QEMUFile *f)
469+
/* Returns true if recovered from a paused migration, otherwise false */
470+
static bool postcopy_try_recover(QEMUFile *f)
470471
{
471472
MigrationIncomingState *mis = migration_incoming_get_current();
472473

@@ -491,23 +492,52 @@ void migration_fd_process_incoming(QEMUFile *f)
491492
* that source is ready to reply to page requests.
492493
*/
493494
qemu_sem_post(&mis->postcopy_pause_sem_dst);
494-
} else {
495-
/* New incoming migration */
496-
migration_incoming_setup(f);
497-
migration_incoming_process();
495+
return true;
496+
}
497+
498+
return false;
499+
}
500+
501+
void migration_fd_process_incoming(QEMUFile *f)
502+
{
503+
if (postcopy_try_recover(f)) {
504+
return;
498505
}
506+
507+
migration_incoming_setup(f);
508+
migration_incoming_process();
499509
}
500510

501511
void migration_ioc_process_incoming(QIOChannel *ioc)
502512
{
503513
MigrationIncomingState *mis = migration_incoming_get_current();
514+
bool start_migration;
504515

505516
if (!mis->from_src_file) {
517+
/* The first connection (multifd may have multiple) */
506518
QEMUFile *f = qemu_fopen_channel_input(ioc);
519+
520+
/* If it's a recovery, we're done */
521+
if (postcopy_try_recover(f)) {
522+
return;
523+
}
524+
507525
migration_incoming_setup(f);
508-
return;
526+
527+
/*
528+
* Common migration only needs one channel, so we can start
529+
* right now. Multifd needs more than one channel, we wait.
530+
*/
531+
start_migration = !migrate_use_multifd();
532+
} else {
533+
/* Multiple connections */
534+
assert(migrate_use_multifd());
535+
start_migration = multifd_recv_new_channel(ioc);
536+
}
537+
538+
if (start_migration) {
539+
migration_incoming_process();
509540
}
510-
multifd_recv_new_channel(ioc);
511541
}
512542

513543
/**
@@ -881,6 +911,8 @@ static void fill_destination_migration_info(MigrationInfo *info)
881911
case MIGRATION_STATUS_CANCELLED:
882912
case MIGRATION_STATUS_ACTIVE:
883913
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
914+
case MIGRATION_STATUS_POSTCOPY_PAUSED:
915+
case MIGRATION_STATUS_POSTCOPY_RECOVER:
884916
case MIGRATION_STATUS_FAILED:
885917
case MIGRATION_STATUS_COLO:
886918
info->has_status = true;

migration/ram.c

+18-14
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
235235
bitmap_to_le(le_bitmap, block->receivedmap, nbits);
236236

237237
/* Size of the bitmap, in bytes */
238-
size = nbits / 8;
238+
size = DIV_ROUND_UP(nbits, 8);
239239

240240
/*
241241
* size is always aligned to 8 bytes for 64bit machines, but it
@@ -1311,7 +1311,8 @@ bool multifd_recv_all_channels_created(void)
13111311
return thread_count == atomic_read(&multifd_recv_state->count);
13121312
}
13131313

1314-
void multifd_recv_new_channel(QIOChannel *ioc)
1314+
/* Return true if multifd is ready for the migration, otherwise false */
1315+
bool multifd_recv_new_channel(QIOChannel *ioc)
13151316
{
13161317
MultiFDRecvParams *p;
13171318
Error *local_err = NULL;
@@ -1320,15 +1321,15 @@ void multifd_recv_new_channel(QIOChannel *ioc)
13201321
id = multifd_recv_initial_packet(ioc, &local_err);
13211322
if (id < 0) {
13221323
multifd_recv_terminate_threads(local_err);
1323-
return;
1324+
return false;
13241325
}
13251326

13261327
p = &multifd_recv_state->params[id];
13271328
if (p->c != NULL) {
13281329
error_setg(&local_err, "multifd: received id '%d' already setup'",
13291330
id);
13301331
multifd_recv_terminate_threads(local_err);
1331-
return;
1332+
return false;
13321333
}
13331334
p->c = ioc;
13341335
object_ref(OBJECT(ioc));
@@ -1339,9 +1340,7 @@ void multifd_recv_new_channel(QIOChannel *ioc)
13391340
qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
13401341
QEMU_THREAD_JOINABLE);
13411342
atomic_inc(&multifd_recv_state->count);
1342-
if (multifd_recv_state->count == migrate_multifd_channels()) {
1343-
migration_incoming_process();
1344-
}
1343+
return multifd_recv_state->count == migrate_multifd_channels();
13451344
}
13461345

13471346
/**
@@ -3581,7 +3580,7 @@ static int ram_load_postcopy(QEMUFile *f)
35813580
{
35823581
int flags = 0, ret = 0;
35833582
bool place_needed = false;
3584-
bool matching_page_sizes = false;
3583+
bool matches_target_page_size = false;
35853584
MigrationIncomingState *mis = migration_incoming_get_current();
35863585
/* Temporary page that is later 'placed' */
35873586
void *postcopy_host_page = postcopy_get_tmp_page(mis);
@@ -3621,7 +3620,7 @@ static int ram_load_postcopy(QEMUFile *f)
36213620
ret = -EINVAL;
36223621
break;
36233622
}
3624-
matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
3623+
matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
36253624
/*
36263625
* Postcopy requires that we place whole host pages atomically;
36273626
* these may be huge pages for RAMBlocks that are backed by
@@ -3669,12 +3668,17 @@ static int ram_load_postcopy(QEMUFile *f)
36693668

36703669
case RAM_SAVE_FLAG_PAGE:
36713670
all_zero = false;
3672-
if (!place_needed || !matching_page_sizes) {
3671+
if (!matches_target_page_size) {
3672+
/* For huge pages, we always use temporary buffer */
36733673
qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
36743674
} else {
3675-
/* Avoids the qemu_file copy during postcopy, which is
3676-
* going to do a copy later; can only do it when we
3677-
* do this read in one go (matching page sizes)
3675+
/*
3676+
* For small pages that matches target page size, we
3677+
* avoid the qemu_file copy. Instead we directly use
3678+
* the buffer of QEMUFile to place the page. Note: we
3679+
* cannot do any QEMUFile operation before using that
3680+
* buffer to make sure the buffer is valid when
3681+
* placing the page.
36783682
*/
36793683
qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
36803684
TARGET_PAGE_SIZE);
@@ -3940,7 +3944,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
39403944
int ret = -EINVAL;
39413945
QEMUFile *file = s->rp_state.from_dst_file;
39423946
unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
3943-
uint64_t local_size = nbits / 8;
3947+
uint64_t local_size = DIV_ROUND_UP(nbits, 8);
39443948
uint64_t size, end_mark;
39453949

39463950
trace_ram_dirty_bitmap_reload_begin(block->idstr);

migration/ram.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ int multifd_save_cleanup(Error **errp);
4646
int multifd_load_setup(void);
4747
int multifd_load_cleanup(Error **errp);
4848
bool multifd_recv_all_channels_created(void);
49-
void multifd_recv_new_channel(QIOChannel *ioc);
49+
bool multifd_recv_new_channel(QIOChannel *ioc);
5050

5151
uint64_t ram_pagesize_summary(void);
5252
int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len);

migration/savevm.c

+10-14
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ enum qemu_vm_cmd {
8181
MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
8282
were previously sent during
8383
precopy but are dirty. */
84-
MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
8584
MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
85+
MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
8686
MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
8787
MIG_CMD_MAX
8888
};
@@ -2194,9 +2194,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
21942194
/* Clear the triggered bit to allow one recovery */
21952195
mis->postcopy_recover_triggered = false;
21962196

2197-
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2198-
MIGRATION_STATUS_POSTCOPY_PAUSED);
2199-
22002197
assert(mis->from_src_file);
22012198
qemu_file_shutdown(mis->from_src_file);
22022199
qemu_fclose(mis->from_src_file);
@@ -2209,6 +2206,9 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
22092206
mis->to_src_file = NULL;
22102207
qemu_mutex_unlock(&mis->rp_mutex);
22112208

2209+
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2210+
MIGRATION_STATUS_POSTCOPY_PAUSED);
2211+
22122212
/* Notify the fault thread for the invalidated file handle */
22132213
postcopy_fault_thread_notify(mis);
22142214

@@ -2276,18 +2276,14 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
22762276
qemu_file_set_error(f, ret);
22772277

22782278
/*
2279-
* Detect whether it is:
2280-
*
2281-
* 1. postcopy running (after receiving all device data, which
2282-
* must be in POSTCOPY_INCOMING_RUNNING state. Note that
2283-
* POSTCOPY_INCOMING_LISTENING is still not enough, it's
2284-
* still receiving device states).
2285-
* 2. network failure (-EIO)
2286-
*
2287-
* If so, we try to wait for a recovery.
2279+
* If we are during an active postcopy, then we pause instead
2280+
* of bail out to at least keep the VM's dirty data. Note
2281+
* that POSTCOPY_INCOMING_LISTENING stage is still not enough,
2282+
* during which we're still receiving device states and we
2283+
* still haven't yet started the VM on destination.
22882284
*/
22892285
if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
2290-
ret == -EIO && postcopy_pause_incoming(mis)) {
2286+
postcopy_pause_incoming(mis)) {
22912287
/* Reset f to point to the newly created channel */
22922288
f = mis->from_src_file;
22932289
goto retry;

migration/socket.c

-5
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,7 @@ static void socket_accept_incoming_migration(QIONetListener *listener,
168168
if (migration_has_all_channels()) {
169169
/* Close listening socket as its no longer needed */
170170
qio_net_listener_disconnect(listener);
171-
172171
object_unref(OBJECT(listener));
173-
174-
if (!migrate_use_multifd()) {
175-
migration_incoming_process();
176-
}
177172
}
178173
}
179174

0 commit comments

Comments
 (0)