diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 4b048084c92..d5e5329ece2 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -70,6 +70,7 @@ use nexus_db_model::ExternalIp; use nexus_db_model::HwBaseboardId; use nexus_db_model::Image; use nexus_db_model::Instance; +use nexus_db_model::InstanceIntendedState; use nexus_db_model::InvCollection; use nexus_db_model::InvNvmeDiskFirmware; use nexus_db_model::InvPhysicalDisk; @@ -4351,6 +4352,7 @@ async fn cmd_db_instance_info( const BOOT_DISK: &'static str = "boot disk"; const AUTO_RESTART: &'static str = "auto-restart"; const STATE: &'static str = "nexus state"; + const INTENDED_STATE: &'static str = "intended state"; const LAST_MODIFIED: &'static str = "last modified at"; const LAST_UPDATED: &'static str = "last updated at"; const LAST_AUTO_RESTART: &'static str = " last reincarnated at"; @@ -4376,6 +4378,7 @@ async fn cmd_db_instance_info( AUTO_RESTART, STATE, API_STATE, + INTENDED_STATE, LAST_UPDATED, LAST_MODIFIED, LAST_AUTO_RESTART, @@ -4435,6 +4438,7 @@ async fn cmd_db_instance_info( "{} {API_STATE:>WIDTH$}: {effective_state:?}", if effective_state == InstanceState::Failed { "/!\\" } else { "(i)" } ); + println!(" {INTENDED_STATE:>WIDTH$}: {}", instance.intended_state); println!( " {LAST_UPDATED:>WIDTH$}: {time_updated:?} (generation {})", r#gen.0 @@ -4442,9 +4446,7 @@ async fn cmd_db_instance_info( // Reincarnation status let InstanceKarmicStatus { needs_reincarnation, can_reincarnate } = - instance - .auto_restart - .status(&instance.runtime_state, active_vmm.as_ref()); + instance.auto_restart_status(active_vmm.as_ref()); println!( "{} {NEEDS_REINCARNATION:>WIDTH$}: {needs_reincarnation}", if needs_reincarnation { "(i)" } else { " " } @@ -4790,6 +4792,7 @@ struct VmmStateRow { struct CustomerInstanceRow { id: String, state: String, + intent: InstanceIntendedState, propolis_id: MaybePropolisId, sled_id: MaybeSledId, host_serial: String, @@ -4867,6 +4870,7 @@ async fn cmd_db_instances( id: i.instance().id().to_string(), name: i.instance().name().to_string(), state: i.effective_state().to_string(), + intent: i.instance().intended_state, propolis_id: (&i).into(), sled_id: (&i).into(), host_serial, diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 201c4be2f49..0ef02465b0a 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -52,7 +52,7 @@ EXECUTING COMMAND: omdb ["db", "instances"] termination: Exited(0) --------------------------------------------- stdout: -ID STATE PROPOLIS_ID SLED_ID HOST_SERIAL NAME +ID STATE INTENT PROPOLIS_ID SLED_ID HOST_SERIAL NAME --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index a29a9ec17d3..56e3e80e53e 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use super::InstanceIntendedState as IntendedState; use super::{ ByteCount, Disk, ExternalIp, Generation, InstanceAutoRestartPolicy, InstanceCpuCount, InstanceState, Vmm, VmmState, @@ -70,6 +71,15 @@ pub struct Instance { #[diesel(embed)] pub runtime_state: InstanceRuntimeState, + /// The most recently requested state of the instance. + /// + /// This is used to determine whether the instance should be automatically + /// restarted when it fails. Instances that were requested to stop should + /// not be automatically restarted if they have transitioned to the `Failed` + /// state. + #[diesel(column_name = intended_state)] + pub intended_state: IntendedState, + /// A UUID identifying the saga currently holding the update lock on this /// instance. If this is [`None`] the instance is not locked. Otherwise, if /// this is [`Some`], the instance is locked by the saga owning this UUID. @@ -112,6 +122,12 @@ impl Instance { cooldown: None, }; + let intended_state = if params.start { + IntendedState::Running + } else { + IntendedState::Stopped + }; + Self { identity, project_id, @@ -125,6 +141,7 @@ impl Instance { boot_disk_id: None, runtime_state, + intended_state, updater_gen: Generation::new(), updater_id: None, @@ -134,6 +151,47 @@ impl Instance { pub fn runtime(&self) -> &InstanceRuntimeState { &self.runtime_state } + + /// Returns an instance's karmic status. + pub fn auto_restart_status( + &self, + active_vmm: Option<&Vmm>, + ) -> InstanceKarmicStatus { + let state = &self.runtime_state; + // Instances only need to be automatically restarted if they are in the + // `Failed` state, or if their active VMM is in the `SagaUnwound` state. + // + // If the instance's intended state is not Running, it should + // not be reincarnated, even if it has failed. + let needs_reincarnation = + match (self.intended_state, state.nexus_state, active_vmm) { + (IntendedState::Running, InstanceState::Failed, _vmm) => { + debug_assert!( + _vmm.is_none(), + "a Failed instance will never have an active VMM!" + ); + true + } + (IntendedState::Running, InstanceState::Vmm, Some(ref vmm)) => { + debug_assert_eq!( + state.propolis_id, + Some(vmm.id), + "don't call `Instance::auto_restart_status` with a VMM \ + that isn't this instance's active VMM!?!?" + ); + // Note that we *don't* reincarnate instances with `Failed` active + // VMMs; in that case, an instance-update saga must first run to + // move the *instance* record to the `Failed` state. + vmm.runtime.state == VmmState::SagaUnwound + } + _ => false, + }; + + InstanceKarmicStatus { + needs_reincarnation, + can_reincarnate: self.auto_restart.can_reincarnate(&state), + } + } } impl DatastoreAttachTargetConfig for Instance { @@ -320,43 +378,6 @@ impl InstanceAutoRestart { pub const DEFAULT_POLICY: InstanceAutoRestartPolicy = InstanceAutoRestartPolicy::BestEffort; - /// Returns an instance's karmic status. - pub fn status( - &self, - state: &InstanceRuntimeState, - active_vmm: Option<&Vmm>, - ) -> InstanceKarmicStatus { - // Instances only need to be automatically restarted if they are in the - // `Failed` state, or if their active VMM is in the `SagaUnwound` state. - let needs_reincarnation = match (state.nexus_state, active_vmm) { - (InstanceState::Failed, _vmm) => { - debug_assert!( - _vmm.is_none(), - "a Failed instance will never have an active VMM!" - ); - true - } - (InstanceState::Vmm, Some(ref vmm)) => { - debug_assert_eq!( - state.propolis_id, - Some(vmm.id), - "don't call `InstanceAutoRestart::status with a VMM \ - that isn't this instance's active VMM!?!?" - ); - // Note that we *don't* reincarnate instances with `Failed` active - // VMMs; in that case, an instance-update saga must first run to - // move the *instance* record to the `Failed` state. - vmm.runtime.state == VmmState::SagaUnwound - } - _ => false, - }; - - InstanceKarmicStatus { - needs_reincarnation, - can_reincarnate: self.can_reincarnate(&state), - } - } - /// Returns whether or not this auto-restart configuration will permit an /// instance with the provided `InstanceRuntimeState` to reincarnate. /// @@ -425,10 +446,13 @@ impl InstanceAutoRestart { // If the auto-restart policy is null, then it should // default to "best effort". .or(dsl::auto_restart_policy.is_null())) - // An instance whose last reincarnation was within the cooldown - // interval from now must remain in _bardo_ --- the liminal - // state between death and rebirth --- before its next - // reincarnation. + // An instance should only be automatically restarted if it is intended + // to be in the `Running` state. Instances which were requested to stop + // should not be automatically restarted, even if they have failed. + .and(dsl::intended_state.eq(IntendedState::Running)) + // instance whose last reincarnation was within the cooldown interval + // from now must remain in _bardo_ --- the liminal state between death + // and rebirth --- before its next reincarnation. .and( // If the instance has never previously been reincarnated, then // it's allowed to reincarnate. diff --git a/nexus/db-model/src/instance_intended_state.rs b/nexus/db-model/src/instance_intended_state.rs new file mode 100644 index 00000000000..750df17edc0 --- /dev/null +++ b/nexus/db-model/src/instance_intended_state.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use serde::Deserialize; +use serde::Serialize; +use std::fmt; + +impl_enum_type!( + InstanceIntendedStateEnum: + + #[derive( + Copy, + Clone, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + Serialize, + Deserialize, + )] + pub enum InstanceIntendedState; + + // Enum values + Running => b"running" + Stopped => b"stopped" + GuestShutdown => b"guest_shutdown" + Destroyed => b"destroyed" +); + +impl InstanceIntendedState { + pub fn as_str(&self) -> &'static str { + match self { + Self::Running => "running", + Self::Stopped => "stopped", + Self::GuestShutdown => "guest_shutdown", + Self::Destroyed => "destroyed", + } + } +} + +impl fmt::Display for InstanceIntendedState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.as_str()) + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index d316b76dc62..3627e8c395c 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -38,6 +38,7 @@ mod image; mod instance; mod instance_auto_restart_policy; mod instance_cpu_count; +mod instance_intended_state; mod instance_state; mod internet_gateway; mod inventory; @@ -168,6 +169,7 @@ pub use image::*; pub use instance::*; pub use instance_auto_restart_policy::*; pub use instance_cpu_count::*; +pub use instance_intended_state::*; pub use instance_state::*; pub use internet_gateway::*; pub use inventory::*; diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index a70b0b3e753..d98bd3482f0 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(139, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(140, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(140, "instance-intended-state"), KnownVersion::new(139, "webhooks"), KnownVersion::new(138, "saga-abandoned-state"), KnownVersion::new(137, "oximeter-read-policy"), diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 235ff0d4e77..20cd5f627f4 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -21,6 +21,7 @@ use crate::db::model::Instance; use crate::db::model::InstanceAutoRestart; use crate::db::model::InstanceAutoRestartPolicy; use crate::db::model::InstanceCpuCount; +use crate::db::model::InstanceIntendedState; use crate::db::model::InstanceRuntimeState; use crate::db::model::InstanceState; use crate::db::model::InstanceUpdate; @@ -778,6 +779,47 @@ impl DataStore { Ok(updated) } + pub async fn instance_set_intended_state( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + intended_state: db::model::InstanceIntendedState, + ) -> Result { + use nexus_db_schema::schema::instance::dsl; + opctx.authorize(authz::Action::Modify, authz_instance).await?; + let instance_id = authz_instance.id().into_untyped_uuid(); + + let UpdateAndQueryResult { status, found } = + diesel::update(dsl::instance) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(instance_id)) + // Don't spuriously set time_modified if the intended state won't change. + .filter(dsl::intended_state.ne(intended_state)) + .set(( + dsl::intended_state.eq(intended_state), + dsl::time_modified.eq(chrono::Utc::now()), + )) + .check_if_exists::(instance_id) + .execute_and_check(&*self.pool_connection_unauthorized().await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByResource(authz_instance), + ) + })?; + + slog::info!( + opctx.log, + "set intended instance state"; + "instance_id" => ?instance_id, + "intended_state" => %intended_state, + "changed" => status == UpdateStatus::Updated, + ); + + Ok(found) + } + /// Updates an instance record by setting the instance's migration ID to the /// provided `migration_id` and the target VMM ID to the provided /// `target_propolis_id`, if the instance does not currently have an active @@ -1925,12 +1967,19 @@ impl DataStore { authz_instance: &authz::Instance, lock: &UpdaterLock, new_runtime: &InstanceRuntimeState, + new_intent: Option, ) -> Result { use nexus_db_schema::schema::instance::dsl; let instance_id = authz_instance.id(); let UpdaterLock { updater_id, locked_gen } = *lock; + #[derive(diesel::AsChangeset)] + #[diesel(table_name = nexus_db_schema::schema::instance)] + struct IntentUpdate { + intended_state: Option, + } + let result = diesel::update(dsl::instance) .filter(dsl::time_deleted.is_null()) .filter(dsl::id.eq(instance_id)) @@ -1946,6 +1995,7 @@ impl DataStore { dsl::updater_gen.eq(Generation(locked_gen.0.next())), dsl::updater_id.eq(None::), new_runtime.clone(), + IntentUpdate { intended_state: new_intent }, )) .check_if_exists::(instance_id) .execute_and_check(&*self.pool_connection_authorized(opctx).await?) @@ -2594,7 +2644,8 @@ mod tests { &opctx, &authz_instance, &lock, - &new_runtime + &new_runtime, + None, ) .await ) @@ -2608,7 +2659,8 @@ mod tests { &opctx, &authz_instance, &lock, - &new_runtime + &new_runtime, + None, ) .await ) @@ -2633,7 +2685,8 @@ mod tests { migration_id: Some(Uuid::new_v4()), dst_propolis_id: Some(Uuid::new_v4()), ..new_runtime.clone() - } + }, + None, ) .await ) @@ -2718,6 +2771,7 @@ mod tests { nexus_state: InstanceState::NoVmm, time_last_auto_restarted: None, }, + None, ) .await ) diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 04e8bcc1681..79d312c8b64 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -42,6 +42,7 @@ define_enums! { IdentityTypeEnum => "identity_type", InstanceAutoRestartPolicyEnum => "instance_auto_restart", InstanceStateEnum => "instance_state_v2", + InstanceIntendedStateEnum => "instance_intended_state", IpAttachStateEnum => "ip_attach_state", IpKindEnum => "ip_kind", IpPoolResourceTypeEnum => "ip_pool_resource_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index a8ae6179baa..ba699b579ac 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -431,6 +431,7 @@ table! { migration_id -> Nullable, state -> crate::enums::InstanceStateEnum, time_last_auto_restarted -> Nullable, + intended_state -> crate::enums::InstanceIntendedStateEnum, updater_id -> Nullable, updater_gen-> Int8, } diff --git a/nexus/src/app/background/tasks/instance_reincarnation.rs b/nexus/src/app/background/tasks/instance_reincarnation.rs index 68c411d6e74..7d453a36cad 100644 --- a/nexus/src/app/background/tasks/instance_reincarnation.rs +++ b/nexus/src/app/background/tasks/instance_reincarnation.rs @@ -309,6 +309,7 @@ mod test { use chrono::Utc; use nexus_db_lookup::LookupPath; use nexus_db_model::Generation; + use nexus_db_model::InstanceIntendedState; use nexus_db_model::InstanceRuntimeState; use nexus_db_model::InstanceState; use nexus_db_model::Vmm; @@ -354,6 +355,7 @@ mod test { name: &str, auto_restart: impl Into>, state: InstanceState, + intent: InstanceIntendedState, ) -> authz::Instance { let auto_restart_policy = auto_restart.into(); let instances_url = format!("/v1/instances?project={}", PROJECT_NAME); @@ -393,23 +395,23 @@ mod test { .await; let id = InstanceUuid::from_untyped_uuid(instance.identity.id); - let authz_instance = if state != InstanceState::Vmm - && state != InstanceState::NoVmm - { - put_instance_in_state(cptestctx, opctx, id, state).await - } else { - let datastore = cptestctx.server.server_context().nexus.datastore(); - let (_, _, authz_instance) = LookupPath::new(&opctx, datastore) - .instance_id(id.into_untyped_uuid()) - .lookup_for(authz::Action::Modify) - .await - .expect("instance must exist"); - authz_instance + let datastore = cptestctx.server.server_context().nexus.datastore(); + let (_, _, authz_instance) = LookupPath::new(&opctx, datastore) + .instance_id(id.into_untyped_uuid()) + .lookup_for(authz::Action::Modify) + .await + .expect("instance must exist"); + if state != InstanceState::Vmm && state != InstanceState::NoVmm { + put_instance_in_state(cptestctx, opctx, id, state).await; }; + datastore + .instance_set_intended_state(opctx, &authz_instance, intent) + .await + .unwrap(); eprintln!( "instance {id}: auto_restart_policy={auto_restart_policy:?}; \ - state={state:?}" + state={state:?}; intent={intent}" ); authz_instance } @@ -567,6 +569,7 @@ mod test { "my-cool-instance", InstanceAutoRestartPolicy::BestEffort, InstanceState::Failed, + InstanceIntendedState::Running, ) .await; @@ -612,6 +615,7 @@ mod test { "my-cool-instance", None, InstanceState::Failed, + InstanceIntendedState::Running, ) .await; @@ -660,6 +664,7 @@ mod test { &format!("sotapanna-{i}"), InstanceAutoRestartPolicy::BestEffort, InstanceState::Failed, + InstanceIntendedState::Running, ) .await; will_reincarnate.insert(failed(instance.id())); @@ -675,6 +680,7 @@ mod test { &format!("sadakagami-{i}"), InstanceAutoRestartPolicy::BestEffort, InstanceState::NoVmm, + InstanceIntendedState::Running, ) .await; // Now, give the instance an active VMM which is SagaUnwound. @@ -696,6 +702,7 @@ mod test { &format!("arahant-{i}"), InstanceAutoRestartPolicy::Never, InstanceState::Failed, + InstanceIntendedState::Running, ) .await; @@ -711,6 +718,7 @@ mod test { &format!("arahant-{i}"), InstanceAutoRestartPolicy::Never, InstanceState::NoVmm, + InstanceIntendedState::Running, ) .await; @@ -731,11 +739,36 @@ mod test { &format!("anagami-{i}"), InstanceAutoRestartPolicy::BestEffort, state, + InstanceIntendedState::Running, ) .await; will_not_reincarnate.insert(instance.id()); } + // Some `Failed` instances with policies permitting them to be + // reincarnated, but whose intended state is not `Running`. + let failed_stopped = create_instance( + &cptestctx, + &opctx, + "anagami-4", + InstanceAutoRestartPolicy::BestEffort, + InstanceState::Failed, + InstanceIntendedState::Stopped, + ) + .await; + will_not_reincarnate.insert(failed_stopped.id()); + let unwound_stopped = create_instance( + &cptestctx, + &opctx, + "anagami-5", + InstanceAutoRestartPolicy::BestEffort, + InstanceState::NoVmm, + InstanceIntendedState::Stopped, + ) + .await; + attach_saga_unwound_vmm(&cptestctx, &opctx, &unwound_stopped).await; + will_not_reincarnate.insert(unwound_stopped.id()); + // Activate the task again, and check that our instance had an // instance-start saga started. let status = assert_activation_ok!(task.activate(&opctx).await); @@ -806,6 +839,7 @@ mod test { "victor", InstanceAutoRestartPolicy::BestEffort, InstanceState::Failed, + InstanceIntendedState::Running, ) .await; let instance1_id = InstanceUuid::from_untyped_uuid(instance1.id()); @@ -829,6 +863,7 @@ mod test { "frankenstein", InstanceAutoRestartPolicy::BestEffort, InstanceState::Vmm, + InstanceIntendedState::Running, ) .await; let instance2_id = InstanceUuid::from_untyped_uuid(instance2.id()); diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 7bff48d2877..c2dbf6269da 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -20,6 +20,7 @@ use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; use nexus_db_lookup::LookupPath; use nexus_db_lookup::lookup; +use nexus_db_model::InstanceIntendedState as IntendedState; use nexus_db_model::InstanceUpdate; use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; @@ -584,6 +585,14 @@ impl super::Nexus { let (.., authz_instance, instance) = instance_lookup.fetch_for(authz::Action::Delete).await?; + self.db_datastore + .instance_set_intended_state( + opctx, + &authz_instance, + IntendedState::Destroyed, + ) + .await?; + // TODO: #3593 Correctness // When the set of boundary switches changes, there is no cleanup / // reconciliation logic performed to ensure that the NAT entries are @@ -674,10 +683,18 @@ impl super::Nexus { let (.., authz_instance) = instance_lookup.lookup_for(authz::Action::Modify).await?; - let state = self + let mut state = self .db_datastore .instance_fetch_with_vmm(opctx, &authz_instance) .await?; + state.instance = self + .db_datastore + .instance_set_intended_state( + opctx, + &authz_instance, + IntendedState::Running, + ) + .await?; if let Err(e) = self .instance_request_state( @@ -717,10 +734,18 @@ impl super::Nexus { let (.., authz_instance) = instance_lookup.lookup_for(authz::Action::Modify).await?; - let state = self + let mut state = self .db_datastore .instance_fetch_with_vmm(opctx, &authz_instance) .await?; + state.instance = self + .db_datastore + .instance_set_intended_state( + opctx, + &authz_instance, + IntendedState::Running, + ) + .await?; match instance_start_allowed(&self.log, &state, reason)? { InstanceStartDisposition::AlreadyStarted => Ok(state), @@ -754,10 +779,18 @@ impl super::Nexus { let (.., authz_instance) = instance_lookup.lookup_for(authz::Action::Modify).await?; - let state = self + let mut state = self .db_datastore .instance_fetch_with_vmm(opctx, &authz_instance) .await?; + state.instance = self + .db_datastore + .instance_set_intended_state( + opctx, + &authz_instance, + IntendedState::Stopped, + ) + .await?; if let Err(e) = self .instance_request_state( diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 3dc1465e08f..5605ef688d8 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -349,6 +349,7 @@ use crate::app::db::datastore::VmmStateUpdateResult; use crate::app::db::datastore::instance; use crate::app::db::model::ByteCount; use crate::app::db::model::Generation; +use crate::app::db::model::InstanceIntendedState; use crate::app::db::model::InstanceRuntimeState; use crate::app::db::model::InstanceState; use crate::app::db::model::MigrationState; @@ -412,8 +413,8 @@ pub fn update_saga_needed( ) -> bool { // Currently, an instance-update saga is required if (and only if): // - // - The instance's active VMM has transitioned to `Destroyed` or `Failed`. We don't - // actually know whether the VMM whose state was updated here was the + // - The instance's active VMM has transitioned to `Destroyed` or `Failed`. + // We don't actually know whether the VMM whose state was updated here was the // active VMM or not, so we will always attempt to run an instance-update // saga if the VMM was `Destroyed` (or `Failed`). let vmm_needs_update = @@ -465,6 +466,9 @@ struct UpdatesRequired { /// saga completes. new_runtime: InstanceRuntimeState, + /// A new intended state should be written to the instance record. + new_intent: Option, + /// If this is [`Some`], the instance's active VMM with this UUID has /// transitioned to [`VmmState::Destroyed`], and its resources must be /// cleaned up by a [`destroyed`] subsaga. @@ -510,6 +514,7 @@ impl UpdatesRequired { let mut new_runtime = snapshot.instance.runtime().clone(); new_runtime.gen = Generation(new_runtime.gen.next()); new_runtime.time_updated = Utc::now(); + let mut new_intent = None; let instance_id = snapshot.instance.id(); let mut update_required = false; @@ -532,7 +537,24 @@ impl UpdatesRequired { // instance's new state to `Failed` rather than to `NoVmm`. if active_vmm.runtime.state == VmmState::Failed { active_vmm_failed = true; + } else if snapshot.instance.intended_state + == InstanceIntendedState::Running + && snapshot.migration.is_none() + { + // Did the active VMM shut itself down, when it was + // intended to be running *and* we were not migrating + // out? If so, update the instance's intended state to + // indicate that it stopped itself. + new_intent = Some(InstanceIntendedState::GuestShutdown); + info!( + log, + "instance update (active VMM destroyed): guest shut \ + itself down"; + "instance_id" => %instance_id, + "propolis_id" => %active_vmm.id, + ); } + Some(id) } else { None @@ -710,6 +732,7 @@ impl UpdatesRequired { } Some(Self { + new_intent, new_runtime, destroy_active_vmm, destroy_target_vmm, @@ -1159,6 +1182,7 @@ async fn siu_commit_instance_updates( "instance update: committing new runtime state and unlocking..."; "instance_id" => %instance_id, "new_runtime" => ?update.new_runtime, + "new_intent" => ?update.new_intent, "lock" => ?lock, ); @@ -1169,6 +1193,7 @@ async fn siu_commit_instance_updates( &authz_instance, &lock, &update.new_runtime, + update.new_intent, ) .await .map_err(ActionError::action_failed)?; @@ -1287,10 +1312,9 @@ async fn siu_chain_successor_saga( // auto-restart policy allows it to be automatically restarted. If // it does, activate the instance-reincarnation background task to // automatically restart it. - let karmic_state = new_state.instance.auto_restart.status( - &new_state.instance.runtime_state, - new_state.active_vmm.as_ref(), - ); + let karmic_state = new_state + .instance + .auto_restart_status(new_state.active_vmm.as_ref()); if karmic_state.should_reincarnate() { info!( log, @@ -1300,6 +1324,7 @@ async fn siu_chain_successor_saga( "instance_id" => %instance_id, "auto_restart_config" => ?new_state.instance.auto_restart, "runtime_state" => ?new_state.instance.runtime_state, + "intended_state" => %new_state.instance.intended_state, ); nexus.background_tasks.task_instance_reincarnation.activate(); } else { @@ -1310,6 +1335,7 @@ async fn siu_chain_successor_saga( "auto_restart_config" => ?new_state.instance.auto_restart, "needs_reincarnation" => karmic_state.needs_reincarnation, "karmic_state" => ?karmic_state.can_reincarnate, + "intended_state" => %new_state.instance.intended_state, ) } } diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index b6c27ebfed9..b8c5651d5c0 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -1072,7 +1072,7 @@ async fn test_instance_migrate_v2p_and_routes( // Verifies that if a request to reboot or stop an instance fails because of a // 404 error from sled agent, then the instance moves to the Failed state, and -// can be restarted once it has transitioned to that state.. +// can be restarted once it has transitioned to that state. #[nexus_test] async fn test_instance_failed_after_sled_agent_forgets_vmm_can_be_restarted( cptestctx: &ControlPlaneTestContext, @@ -1423,6 +1423,120 @@ async fn test_instance_failed_by_instance_watcher_automatically_reincarnates( ); } +// Verifies that if an instance transitions to `Failed` due to a failed request +// to stop it, it will not automatically restart, even if it is configured to +// restart it automatically on failure. +#[nexus_test] +async fn test_instance_failed_by_stop_request_does_not_reincarnate( + cptestctx: &ControlPlaneTestContext, +) { + let apictx = &cptestctx.server.server_context(); + let nexus = &apictx.nexus; + let client = &cptestctx.external_client; + let instance_name = "resurgam"; + let instance_url = get_instance_url(instance_name); + let instance_id = dbg!( + make_forgotten_instance( + &cptestctx, + instance_name, + InstanceAutoRestartPolicy::BestEffort, + ) + .await + ); + + // Attempting to stop the forgotten instance will result in a 404 + // NO_SUCH_INSTANCE from the sled-agent, which Nexus turns into a 503. + expect_instance_stop_fail( + client, + instance_name, + http::StatusCode::SERVICE_UNAVAILABLE, + ) + .await; + + // Wait for the instance to transition to Failed. + dbg!( + instance_wait_for_state(client, instance_id, InstanceState::Failed) + .await + ); + + // Activate the reincarnation task. + dbg!( + nexus_test_utils::background::activate_background_task( + &cptestctx.internal_client, + "instance_reincarnation", + ) + .await + ); + + // Unfortunately there isn't really a great way to assert "no start saga has + // been started", so we'll do the somewhat jankier thing of waiting a bit + // and making sure that the instance doesn't transition to Starting. + for secs in 0..30 { + let state = instance_get(client, &instance_url).await; + assert_eq!( + state.runtime.run_state, + InstanceState::Failed, + "instance transitioned out of Failed (to {}) after {secs} \ + seconds! state: {:#?}", + state.runtime.run_state, + state + ); + assert_eq!(state.runtime.time_last_auto_restarted, None); + tokio::time::sleep(Duration::from_secs(1)).await; + } + + // Okay, now let's try restarting the instance, failing it, and then + // asserting it *does* reincarnate. + dbg!(expect_instance_start_ok(client, instance_name).await); + instance_simulate(nexus, &instance_id).await; + let instance_next = instance_get(&client, &instance_url).await; + assert_eq!(instance_next.runtime.run_state, InstanceState::Running); + + // Forcibly unregister the instance from the sled-agent without telling + // Nexus. It will now behave as though it has forgotten the instance and + // return a 404 error with the "NO_SUCH_INSTANCE" error code + let vmm_id = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("running instance must be on a sled") + .propolis_id; + let sled_agent = cptestctx.first_sled_agent(); + sled_agent + .instance_unregister(vmm_id) + .await + .expect("instance_unregister must succeed"); + + // Attempting to reboot the instance will fail. + expect_instance_reboot_fail( + client, + instance_name, + http::StatusCode::SERVICE_UNAVAILABLE, + ) + .await; + + // Wait for the instance to transition to Failed. + dbg!( + instance_wait_for_state(client, instance_id, InstanceState::Failed) + .await + ); + + // Activate the reincarnation task. + dbg!( + nexus_test_utils::background::activate_background_task( + &cptestctx.internal_client, + "instance_reincarnation", + ) + .await + ); + + // This time, it should come back. + dbg!( + instance_wait_for_state(client, instance_id, InstanceState::Starting) + .await + ); +} + #[nexus_test] async fn test_instances_are_not_marked_failed_on_other_sled_agent_errors( cptestctx: &ControlPlaneTestContext, @@ -1581,6 +1695,22 @@ async fn expect_instance_reboot_fail( .expect("expected instance reboot to fail"); } +async fn expect_instance_stop_fail( + client: &ClientTestContext, + instance_name: &str, + status: http::StatusCode, +) { + let url = get_instance_url(format!("{instance_name}/stop").as_str()); + let builder = RequestBuilder::new(client, Method::POST, &url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(status)); + NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("expected instance stop to fail"); +} + async fn expect_instance_reboot_ok( client: &ClientTestContext, instance_name: &str, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 03570ec4f3e..d0b0e28004f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1151,6 +1151,26 @@ CREATE TYPE IF NOT EXISTS omicron.public.instance_auto_restart AS ENUM ( 'best_effort' ); +/* + * Represents the *desired* state of an instance, as requested by the user. +*/ +CREATE TYPE IF NOT EXISTS omicron.public.instance_intended_state AS ENUM ( + /* The instance should be running. */ + 'running', + + /* The instance was asked to stop by an API request. */ + 'stopped', + + /* The guest OS shut down the virtual machine. + * + * This is distinct from the 'stopped' intent, which represents a stop + * requested by the API. + */ + 'guest_shutdown', + + /* The instance should be destroyed. */ + 'destroyed' +); /* * TODO consider how we want to manage multiple sagas operating on the same @@ -1235,6 +1255,14 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance ( */ boot_disk_id UUID, + /* + * The intended state of the instance, as requested by the user. + * + * This may differ from its current state, and is used to determine what + * action should be taken when the instance's VMM state changes. + */ + intended_state omicron.public.instance_intended_state NOT NULL, + CONSTRAINT vmm_iff_active_propolis CHECK ( ((state = 'vmm') AND (active_propolis_id IS NOT NULL)) OR ((state != 'vmm') AND (active_propolis_id IS NULL)) @@ -5476,7 +5504,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '139.0.0', NULL) + (TRUE, NOW(), NOW(), '140.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/instance-intended-state/up01.sql b/schema/crdb/instance-intended-state/up01.sql new file mode 100644 index 00000000000..9295f8ebd97 --- /dev/null +++ b/schema/crdb/instance-intended-state/up01.sql @@ -0,0 +1,20 @@ +/* + * Represents the *desired* state of an instance, as requested by the user. +*/ +CREATE TYPE IF NOT EXISTS omicron.public.instance_intended_state AS ENUM ( + /* The instance should be running. */ + 'running', + + /* The instance was asked to stop by an API request. */ + 'stopped', + + /* The guest OS shut down the virtual machine. + * + * This is distinct from the 'stopped' intent, which represents a stop + * requested by the API. + */ + 'guest_shutdown', + + /* The instance should be destroyed. */ + 'destroyed' +); diff --git a/schema/crdb/instance-intended-state/up02.sql b/schema/crdb/instance-intended-state/up02.sql new file mode 100644 index 00000000000..4a04602e51e --- /dev/null +++ b/schema/crdb/instance-intended-state/up02.sql @@ -0,0 +1,4 @@ +-- create the column. it's nullable for now, as we must backfill the intended +-- states of existing instances. +ALTER TABLE omicron.public.instance + ADD COLUMN IF NOT EXISTS intended_state omicron.public.instance_intended_state; diff --git a/schema/crdb/instance-intended-state/up03.sql b/schema/crdb/instance-intended-state/up03.sql new file mode 100644 index 00000000000..de4990d5560 --- /dev/null +++ b/schema/crdb/instance-intended-state/up03.sql @@ -0,0 +1,12 @@ +-- backfill instance intended states based on the current state of the instance. +SET LOCAL disallow_full_table_scans = off; +UPDATE instance SET intended_state = CASE + WHEN instance.state = 'destroyed' THEN 'destroyed' + WHEN instance.state = 'vmm' AND vmm.state = 'stopped' THEN 'stopped' + WHEN instance.state = 'vmm' AND vmm.state = 'stopping' THEN 'stopped' + WHEN instance.state = 'vmm' THEN 'running' + ELSE 'stopped' +END +FROM omicron.public.instance instance + LEFT JOIN omicron.public.vmm + ON instance.vmm.active_propolis_id = vmm.id; diff --git a/schema/crdb/instance-intended-state/up04.sql b/schema/crdb/instance-intended-state/up04.sql new file mode 100644 index 00000000000..0f091055554 --- /dev/null +++ b/schema/crdb/instance-intended-state/up04.sql @@ -0,0 +1,4 @@ +-- now make the new column non-nullable +ALTER TABLE omicron.public.instance +ALTER COLUMN intended_state +SET NOT NULL;