Skip to content

Commit 0b64a31

Browse files
committed
ci-test
1 parent 73633a8 commit 0b64a31

File tree

9 files changed

+170
-44
lines changed

9 files changed

+170
-44
lines changed

dagster-cloud-cli/dagster_cloud_cli/commands/ci/__init__.py

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import sys
99
from collections import Counter
1010
from enum import Enum
11-
from typing import Any, Dict, List, Optional
11+
from typing import Any, Dict, List, Optional, cast
1212

1313
import typer
1414
from typer import Typer
@@ -28,11 +28,10 @@
2828
get_org_url,
2929
)
3030
from dagster_cloud_cli.core import pex_builder, pydantic_yaml
31-
32-
from .. import metrics
33-
from . import checks, report, state
34-
35-
app = Typer(hidden=True, help="CI/CD agnostic commands")
31+
from dagster_cloud_cli.core.artifacts import (
32+
download_organization_artifact,
33+
upload_organization_artifact,
34+
)
3635
from dagster_cloud_cli.core.pex_builder import (
3736
code_location,
3837
deps,
@@ -42,6 +41,11 @@
4241
)
4342
from dagster_cloud_cli.types import CliEventTags, CliEventType
4443

44+
from .. import metrics
45+
from . import checks, report, state
46+
47+
app = Typer(hidden=True, help="CI/CD agnostic commands")
48+
4549

4650
@app.command(help="Print json information about current CI/CD environment")
4751
def inspect(project_dir: str):
@@ -204,6 +208,10 @@ def init(
204208
status_url: Optional[str] = None,
205209
):
206210
yaml_path = pathlib.Path(project_dir) / dagster_cloud_yaml_path
211+
if not yaml_path.exists():
212+
raise ui.error(
213+
f"Dagster Cloud yaml file not found at specified path {yaml_path.resolve()}."
214+
)
207215
locations_def = pydantic_yaml.load_dagster_cloud_yaml(yaml_path.read_text())
208216
locations = locations_def.locations
209217
if location_name:
@@ -217,6 +225,7 @@ def init(
217225
url = get_org_url(organization, dagster_env)
218226
# Deploy to the branch deployment for the current context. If there is no branch deployment
219227
# available (eg. if not in a PR) then we fallback to the --deployment flag.
228+
220229
try:
221230
branch_deployment = get_deployment_from_context(url, project_dir)
222231
if deployment:
@@ -225,9 +234,11 @@ def init(
225234
f" --deployment={deployment}"
226235
)
227236
deployment = branch_deployment
237+
is_branch_deployment = True
228238
except ValueError as err:
229239
if deployment:
230240
ui.print(f"Deploying to {deployment}. No branch deployment ({err}).")
241+
is_branch_deployment = False
231242
else:
232243
raise ui.error(
233244
f"Cannot determine deployment name in current context ({err}). Please specify"
@@ -245,6 +256,7 @@ def init(
245256
deployment_name=deployment,
246257
location_file=str(yaml_path.absolute()),
247258
location_name=location.location_name,
259+
is_branch_deployment=is_branch_deployment,
248260
build=state.BuildMetadata(
249261
git_url=git_url, commit_hash=commit_hash, build_config=location.build
250262
),
@@ -703,3 +715,77 @@ def _deploy(
703715
agent_heartbeat_timeout=agent_heartbeat_timeout,
704716
url=deployment_url,
705717
)
718+
719+
720+
dagster_dbt_app = typer.Typer(
721+
hidden=True,
722+
help="Dagster Cloud commands for managing the `dagster-dbt` integration.",
723+
add_completion=False,
724+
)
725+
app.add_typer(dagster_dbt_app, name="dagster-dbt", no_args_is_help=True)
726+
727+
project_app = typer.Typer(
728+
name="project",
729+
no_args_is_help=True,
730+
help="Commands for using a dbt project in Dagster.",
731+
add_completion=False,
732+
)
733+
dagster_dbt_app.add_typer(project_app, name="project", no_args_is_help=True)
734+
735+
736+
@project_app.command(
737+
name="manage-state",
738+
help="""
739+
This CLI command will handle uploading and downloading artifacts if `state_dir` is specified on
740+
`DbtProject`.
741+
""",
742+
)
743+
def manage_state_command(
744+
statedir: str = STATEDIR_OPTION,
745+
file: str = typer.Option(),
746+
source_deployment: str = typer.Option(
747+
default="prod",
748+
help="Which deployment should upload its manifest.json.",
749+
),
750+
key_prefix: str = typer.Option(
751+
default="",
752+
help="A key prefix for the key the manifest.json is saved with.",
753+
),
754+
):
755+
try:
756+
from dagster_dbt import DbtProject
757+
except:
758+
ui.print(
759+
"Unable to import dagster_dbt, can not use dbt-prepare-for-deployment when dagster_dbt is not installed."
760+
)
761+
return
762+
from dagster._core.code_pointer import load_python_file
763+
from dagster._core.definitions.load_assets_from_modules import find_objects_in_module_of_types
764+
765+
state_store = state.FileStore(statedir=statedir)
766+
locations = state_store.list_locations()
767+
if not locations:
768+
raise ui.error("Unable to determine deployment state.")
769+
770+
location = locations[0]
771+
deployment_name = location.deployment_name
772+
is_branch = location.is_branch_deployment
773+
774+
contents = load_python_file(file, None)
775+
for project in find_objects_in_module_of_types(contents, DbtProject):
776+
project = cast(DbtProject, project)
777+
if project.state_path:
778+
download_path = project.state_path.joinpath("manifest.json")
779+
key = f"{key_prefix}{os.fspath(download_path)}"
780+
if is_branch:
781+
ui.print(f"Downloading {source_deployment} manifest for branch deployment.")
782+
os.makedirs(project.state_path, exist_ok=True)
783+
download_organization_artifact(key, download_path)
784+
ui.print("Download complete.")
785+
786+
elif deployment_name == source_deployment:
787+
ui.print(f"Uploading {source_deployment} manifest.")
788+
upload_organization_artifact(key, project.manifest_path)
789+
ui.print("Upload complete")
790+
791+
ui.print("Project ready")

dagster-cloud-cli/dagster_cloud_cli/commands/ci/state.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class LocationState(BaseModel, extra=Extra.forbid):
4747
deployment_name: str
4848
location_file: str
4949
location_name: str
50+
is_branch_deployment: bool
5051
selected: bool = True
5152
build: BuildMetadata
5253
build_output: Optional[Union[DockerBuildOutput, PexBuildOutput]] = Field(
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.7.0"
1+
__version__ = "1!0+dev"
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from .defs import (
22
build_anomaly_detection_freshness_checks as build_anomaly_detection_freshness_checks,
33
)
4-
from .types import AnomalyDetectionModelParams as AnomalyDetectionModelParams
4+
from .types import (
5+
AnomalyDetectionModelParams as AnomalyDetectionModelParams,
6+
BetaFreshnessAnomalyDetectionParams as BetaFreshnessAnomalyDetectionParams,
7+
)

dagster-cloud/dagster_cloud/anomaly_detection/defs.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _build_check_for_assets(
4747
@multi_asset_check(
4848
specs=[
4949
AssetCheckSpec(
50-
name="freshness_anomaly_detection_check",
50+
name="anomaly_detection_freshness_check",
5151
description=f"Detects anomalies in the freshness of the asset using model {params.model_version.value.lower()}.",
5252
asset=asset_key,
5353
)
@@ -65,7 +65,9 @@ def the_check(context: AssetCheckExecutionContext) -> Iterable[AssetCheckResult]
6565
)
6666
instance = cast(DagsterCloudAgentInstance, context.instance)
6767
with create_cloud_webserver_client(
68-
instance.dagit_url,
68+
instance.dagit_url[:-1]
69+
if instance.dagit_url.endswith("/")
70+
else instance.dagit_url, # Remove trailing slash
6971
check.str_param(instance.dagster_cloud_agent_token, "dagster_cloud_agent_token"),
7072
) as client:
7173
for check_key in context.selected_asset_check_keys:
@@ -82,16 +84,17 @@ def the_check(context: AssetCheckExecutionContext) -> Iterable[AssetCheckResult]
8284
},
8385
},
8486
)
87+
data = result["data"]["anomalyDetectionInference"]
8588
metadata = {
8689
"model_params": {**params.as_metadata},
8790
"model_version": params.model_version.value,
8891
}
89-
if result["anomalyDetectionInference"]["__typename"] != "AnomalyDetectionSuccess":
92+
if data["__typename"] != "AnomalyDetectionSuccess":
9093
yield handle_anomaly_detection_inference_failure(
91-
result, metadata, params, asset_key
94+
data, metadata, params, asset_key
9295
)
9396
continue
94-
response = result["anomalyDetectionInference"]["response"]
97+
response = result["data"]["anomalyDetectionInference"]["response"]
9598
overdue_seconds = check.float_param(response["overdue_seconds"], "overdue_seconds")
9699
overdue_deadline_timestamp = response["overdue_deadline_timestamp"]
97100
metadata["overdue_deadline_timestamp"] = MetadataValue.timestamp(
@@ -148,25 +151,22 @@ def the_check(context: AssetCheckExecutionContext) -> Iterable[AssetCheckResult]
148151

149152

150153
def handle_anomaly_detection_inference_failure(
151-
result: dict, metadata: dict, params: AnomalyDetectionModelParams, asset_key: AssetKey
154+
data: dict, metadata: dict, params: AnomalyDetectionModelParams, asset_key: AssetKey
152155
) -> AssetCheckResult:
153156
if (
154-
result["anomalyDetectionInference"]["__typename"] == "AnomalyDetectionFailure"
155-
and result["anomalyDetectionInference"]["message"]
156-
== params.model_version.minimum_required_records_msg
157+
data["__typename"] == "AnomalyDetectionFailure"
158+
and data["message"] == params.model_version.minimum_required_records_msg
157159
):
158160
# Intercept failure in the case of not enough records, and return a pass to avoid
159161
# being too noisy with failures.
160162
return AssetCheckResult(
161163
passed=True,
162164
severity=AssetCheckSeverity.WARN,
163165
metadata=metadata,
164-
description=result["anomalyDetectionInference"]["message"],
166+
description=data["message"],
165167
asset_key=asset_key,
166168
)
167-
raise DagsterCloudAnomalyDetectionFailed(
168-
f"Anomaly detection failed: {result['anomalyDetectionInference']['message']}"
169-
)
169+
raise DagsterCloudAnomalyDetectionFailed(f"Anomaly detection failed: {data['message']}")
170170

171171

172172
def build_anomaly_detection_freshness_checks(

dagster-cloud/dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def jaffle_shop_dbt_assets(
174174
)
175175
if row.bytes_billed or row.slots_ms:
176176
cost_info = BigQueryCostInfo(
177-
asset_key, partition, row.job_id, row.bytes_billed, row.slots_ms
177+
asset_key, partition, row.job_id, row.slots_ms, row.bytes_billed
178178
)
179179
cost_by_asset[cost_info.asset_partition_key].append(cost_info)
180180
except:
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.7.0"
1+
__version__ = "1!0+dev"

dagster-cloud/dagster_cloud/workspace/user_code_launcher/user_code_launcher.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,9 @@
8484

8585
DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT = 180
8686
DEFAULT_MAX_TTL_SERVERS = 25
87-
ACTIVE_AGENT_HEARTBEAT_INTERVAL = 600
87+
ACTIVE_AGENT_HEARTBEAT_INTERVAL = int(
88+
os.getenv("DAGSTER_CLOUD_ACTIVE_AGENT_HEARTBEAT_INVERAL", "600")
89+
)
8890

8991

9092
USER_CODE_LAUNCHER_RECONCILE_SLEEP_SECONDS = 1
@@ -340,6 +342,7 @@ def __init__(
340342
self._desired_entries: Dict[DeploymentAndLocation, UserCodeLauncherEntry] = {}
341343
self._actual_entries: Dict[DeploymentAndLocation, UserCodeLauncherEntry] = {}
342344
self._last_refreshed_actual_entries = 0
345+
self._last_cleaned_up_dangling_code_servers = 0
343346
self._metadata_lock = threading.Lock()
344347

345348
self._upload_locations: Set[DeploymentAndLocation] = set()
@@ -457,7 +460,9 @@ def start(self, run_reconcile_thread=True, run_metrics_thread=True):
457460
"Not starting run worker monitoring, because it's not supported on this agent."
458461
)
459462

460-
self._graceful_cleanup_servers()
463+
self._graceful_cleanup_servers(
464+
include_own_servers=True # shouldn't be any of our own servers at this part, but won't hurt either
465+
)
461466

462467
if run_reconcile_thread:
463468
self._reconcile_grpc_metadata_thread = threading.Thread(
@@ -893,13 +898,15 @@ def _graceful_remove_server_handle(self, server_handle: ServerHandle):
893898
with self._grpc_servers_lock:
894899
self._pending_delete_grpc_server_handles.discard(server_handle)
895900

896-
def _cleanup_servers(self, active_agent_ids: Set[str]) -> None:
901+
def _cleanup_servers(self, active_agent_ids: Set[str], include_own_servers: bool) -> None:
897902
"""Remove all servers, across all deployments and locations."""
898903
with ThreadPoolExecutor() as executor:
899904
futures = []
900905
for handle in self._list_server_handles():
901906
self._logger.info(f"Attempting to cleanup server {handle}")
902-
if self._can_cleanup_server(handle, active_agent_ids):
907+
if self._can_cleanup_server(
908+
handle, active_agent_ids, include_own_servers=include_own_servers
909+
):
903910
self._logger.info(f"Can remove server {handle}. Cleaning up.")
904911
futures.append(executor.submit(self._remove_server_handle, handle))
905912
else:
@@ -924,7 +931,9 @@ def get_agent_id_for_server(self, handle: ServerHandle) -> Optional[str]:
924931
def get_server_create_timestamp(self, handle: ServerHandle) -> Optional[float]:
925932
"""Returns the update_timestamp value from the given code server."""
926933

927-
def _can_cleanup_server(self, handle: ServerHandle, active_agent_ids: Set[str]) -> bool:
934+
def _can_cleanup_server(
935+
self, handle: ServerHandle, active_agent_ids: Set[str], include_own_servers: bool
936+
) -> bool:
928937
"""Returns true if we can clean up the server identified by the handle without issues (server was started by this agent, or agent is no longer active)."""
929938
agent_id_for_server = self.get_agent_id_for_server(handle)
930939
self._logger.info(
@@ -933,11 +942,13 @@ def _can_cleanup_server(self, handle: ServerHandle, active_agent_ids: Set[str])
933942
)
934943
self._logger.info(f"All active agent ids: {active_agent_ids}")
935944

936-
# If this server was created by the current agent, it can always be cleaned up
937-
# (or if its a legacy server that never set an agent ID)
938-
if not agent_id_for_server or self._instance.instance_uuid == agent_id_for_server:
945+
# if it's a legacy server that never set an agent ID:
946+
if not agent_id_for_server:
939947
return True
940948

949+
if self._instance.instance_uuid == agent_id_for_server:
950+
return include_own_servers
951+
941952
try:
942953
update_timestamp_for_server = self.get_server_create_timestamp(handle)
943954
except:
@@ -958,16 +969,18 @@ def _can_cleanup_server(self, handle: ServerHandle, active_agent_ids: Set[str])
958969

959970
return agent_id_for_server not in cast(Set[str], active_agent_ids)
960971

961-
def _graceful_cleanup_servers(self): # ServerHandles
972+
def _graceful_cleanup_servers(self, include_own_servers: bool): # ServerHandles
962973
active_agent_ids = self.get_active_agent_ids()
963974
if not self.supports_get_current_runs_for_server_handle:
964-
return self._cleanup_servers(active_agent_ids)
975+
return self._cleanup_servers(active_agent_ids, include_own_servers=include_own_servers)
965976

966977
handles = self._list_server_handles()
967978
servers_to_remove: List[ServerHandle] = []
968979
with self._grpc_servers_lock:
969980
for handle in handles:
970-
if self._can_cleanup_server(handle, active_agent_ids):
981+
if self._can_cleanup_server(
982+
handle, active_agent_ids, include_own_servers=include_own_servers
983+
):
971984
servers_to_remove.append(handle)
972985
self._pending_delete_grpc_server_handles.update(servers_to_remove)
973986
for server_handle in servers_to_remove:
@@ -994,7 +1007,7 @@ def __exit__(self, exception_type, exception_value, traceback):
9941007
self._reconcile_location_utilization_metrics_thread.join()
9951008

9961009
if self._started:
997-
self._graceful_cleanup_servers()
1010+
self._graceful_cleanup_servers(include_own_servers=True)
9981011

9991012
super().__exit__(exception_value, exception_value, traceback)
10001013

@@ -1081,6 +1094,9 @@ def _reconcile_thread(self, shutdown_event):
10811094
f"Failure updating user code servers: {serializable_error_info_from_exc_info(sys.exc_info())}"
10821095
)
10831096

1097+
def _cleanup_server_check_interval(self):
1098+
return int(os.getenv("DAGSTER_CLOUD_CLEANUP_SERVER_CHECK_INTERVAL", "1800"))
1099+
10841100
def reconcile(self) -> None:
10851101
with self._metadata_lock:
10861102
desired_entries = (
@@ -1095,6 +1111,24 @@ def reconcile(self) -> None:
10951111

10961112
now = pendulum.now("UTC").timestamp()
10971113

1114+
if not self._last_refreshed_actual_entries:
1115+
self._last_refreshed_actual_entries = now
1116+
1117+
if not self._last_cleaned_up_dangling_code_servers:
1118+
self._last_cleaned_up_dangling_code_servers = now
1119+
1120+
cleanup_server_check_interval = self._cleanup_server_check_interval()
1121+
1122+
if (
1123+
cleanup_server_check_interval
1124+
and now - self._last_cleaned_up_dangling_code_servers > cleanup_server_check_interval
1125+
):
1126+
try:
1127+
self._graceful_cleanup_servers(include_own_servers=False)
1128+
except:
1129+
self._logger.exception("Failed to clean up dangling code serverrs.")
1130+
self._last_cleaned_up_dangling_code_servers = now
1131+
10981132
if now - self._last_refreshed_actual_entries > ACTUAL_ENTRIES_REFRESH_INTERVAL:
10991133
try:
11001134
self._refresh_actual_entries()

0 commit comments

Comments
 (0)