Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
4ec7ece
Add Blacksmith sticky-disk build PoC for deploy binaries
claude Jun 4, 2026
5736cdd
Cut deploy-all-services binary builds over to Blacksmith sticky disk
claude Jun 4, 2026
0dc3bac
Move lambda builds onto Blacksmith and tighten matrix timing
claude Jun 4, 2026
d8d8937
Add branch-scoped push trigger to Blacksmith PoC for branch testing
claude Jun 4, 2026
7b0e8cb
Fix lambda build: drop login shell that reset dev-shell PATH
claude Jun 4, 2026
e28445b
Free /nix sticky disk before commit by stopping Nix daemon
claude Jun 4, 2026
c183615
Speed up cachix install: use flake-pinned nixpkgs, not nixpkgs-weekly
claude Jun 4, 2026
1341148
Remove branch-scoped push trigger from Blacksmith PoC
claude Jun 4, 2026
0a402ba
Parallelize all binary builds; tune shared ALB health check for fast …
claude Jun 4, 2026
f8ac243
Remove max-parallel cap on lambda builds too
claude Jun 4, 2026
aee76d8
Move deploy-services to Blacksmith runner
claude Jun 4, 2026
2a20215
Merge remote-tracking branch 'origin/main' into claude/gracious-thomp…
claude Jun 4, 2026
6fc89ea
Collapse COPY+chmod in svc Dockerfiles; lower lambda opt-level
claude Jun 4, 2026
dd166ec
Remove max-parallel cap on deploy-services
claude Jun 4, 2026
d817484
ci(deploy): hand build artifacts to deploy over Blacksmith sticky disks
claude Jun 5, 2026
8a8a2db
ci(deploy): cache Pulumi provider plugins on a sticky disk
claude Jun 5, 2026
2cea004
ci(lambda): build a service's handlers in one cargo-lambda invocation
claude Jun 5, 2026
6723c1a
ci(lambda): surface sccache stats in the deploy lambda build
claude Jun 5, 2026
f9df065
spike(lambda): build a Rust lambda via crane + cargo-zigbuild
claude Jun 5, 2026
0841115
ci(spike): workflow to build the crane+zigbuild lambda on Blacksmith
claude Jun 5, 2026
abf10dd
ci(spike): trigger lambda crane spike on push to the feature branch
claude Jun 5, 2026
e11e152
fix(lambda spike): build dep closure with plain cargo, pin glibc only…
claude Jun 5, 2026
a2e7ce7
fix(lambda spike): force aws-lc-sys cmake builder under zig
claude Jun 5, 2026
7d50bbe
lambda(rollout): generate all handler packages; validate all via crane
claude Jun 5, 2026
4f7739f
ci(deploy): build lambdas via crane/nix on a dedicated two-disk topology
claude Jun 5, 2026
767bb92
ci(spike): drop the temporary push trigger from the lambda validation…
claude Jun 5, 2026
2ae9384
ci(deploy): drop Cachix fallback from the warm/build jobs
claude Jun 5, 2026
22f58d1
ci(deploy): remove dead cachix watch-store blocks
claude Jun 5, 2026
ce05de4
Merge remote-tracking branch 'origin/main' into claude/gracious-thomp…
claude Jun 5, 2026
9a6f60d
ci(deploy): persist docker layer cache on a Blacksmith builder
claude Jun 5, 2026
d717eb2
Merge remote-tracking branch 'origin/main' into claude/gracious-thomp…
claude Jun 5, 2026
dc56c51
Merge remote-tracking branch 'origin/main' into claude/gracious-thomp…
claude Jun 8, 2026
8138c60
Merge remote-tracking branch 'origin/claude/gracious-thompson-LXyqN' …
claude Jun 8, 2026
1305738
fix(ffmpeg): ffmpeg lambda layer nix
synoet Jun 8, 2026
4a2e3a7
fixes
synoet Jun 9, 2026
afc068b
Merge branch 'main' of github.com:macro-inc/macro into claude/graciou…
synoet Jun 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 41 additions & 10 deletions .github/actions/deploy-cloud-storage-pulumi/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ inputs:
description: 'Whether to setup docker'
required: false
default: 'true'
use-blacksmith-builder:
description: >-
Use a Blacksmith buildkit builder whose layer cache lives on a per-Dockerfile
sticky disk (persists the heavy base layers across runs). Only valid on
Blacksmith runners; defaults to the stock docker/setup-buildx-action.
required: false
default: 'false'
use-lfs:
description: 'Whether to checkout LFS content'
required: false
Expand Down Expand Up @@ -48,6 +55,14 @@ inputs:
description: 'Optional GitHub artifact containing built Lambda artifacts under target/lambda'
required: false
default: ''
prebuilt-binaries-tar:
description: 'Optional path to a prebuilt-binaries.tar.gz already present on disk (e.g. a Blacksmith sticky-disk handoff). When set, takes precedence over prebuilt-binaries-artifact and skips the GitHub artifact download.'
required: false
default: ''
lambda-artifacts-tar:
description: 'Optional path to a lambda-artifacts.tar.gz already present on disk (e.g. a Blacksmith sticky-disk handoff). When set, takes precedence over lambda-artifacts and skips the GitHub artifact download.'
required: false
default: ''
cloud-storage-service-name:
description: 'Optional service key from .github/services-config.json; builds Lambda artifacts inline when lambda-artifacts is not provided'
required: false
Expand Down Expand Up @@ -77,12 +92,16 @@ runs:
path: rust/cloud-storage/prebuilt-artifact

- name: Extract prebuilt service binaries
if: ${{ inputs.prebuilt-binaries-artifact != '' }}
if: ${{ inputs.prebuilt-binaries-artifact != '' || inputs.prebuilt-binaries-tar != '' }}
shell: bash
env:
TAR_PATH: ${{ inputs.prebuilt-binaries-tar }}
run: |
set -euo pipefail
mkdir -p rust/cloud-storage/prebuilt
tar -xzf rust/cloud-storage/prebuilt-artifact/prebuilt-binaries.tar.gz -C rust/cloud-storage/prebuilt
# Prefer an on-disk tar (sticky-disk handoff); fall back to the downloaded artifact.
src="${TAR_PATH:-rust/cloud-storage/prebuilt-artifact/prebuilt-binaries.tar.gz}"
tar -xzf "$src" -C rust/cloud-storage/prebuilt

- name: Download Lambda artifacts
if: ${{ inputs.lambda-artifacts != '' }}
Expand All @@ -92,14 +111,18 @@ runs:
path: rust/cloud-storage/lambda-artifact

- name: Extract Lambda artifacts
if: ${{ inputs.lambda-artifacts != '' }}
if: ${{ inputs.lambda-artifacts != '' || inputs.lambda-artifacts-tar != '' }}
shell: bash
env:
TAR_PATH: ${{ inputs.lambda-artifacts-tar }}
run: |
set -euo pipefail
tar -xzf rust/cloud-storage/lambda-artifact/lambda-artifacts.tar.gz -C rust/cloud-storage
# Prefer an on-disk tar (sticky-disk handoff); fall back to the downloaded artifact.
src="${TAR_PATH:-rust/cloud-storage/lambda-artifact/lambda-artifacts.tar.gz}"
tar -xzf "$src" -C rust/cloud-storage

- name: Check Lambda config for inline build
if: ${{ inputs.lambda-artifacts == '' && inputs.cloud-storage-service-name != '' }}
if: ${{ inputs.lambda-artifacts == '' && inputs.lambda-artifacts-tar == '' && inputs.cloud-storage-service-name != '' }}
id: check-lambdas
shell: bash
env:
Expand All @@ -109,24 +132,32 @@ runs:
echo "has_lambdas=$has_lambdas" >> "$GITHUB_OUTPUT"

- name: Configure Cachix
if: ${{ inputs.lambda-artifacts == '' && inputs.cloud-storage-service-name != '' && steps.check-lambdas.outputs.has_lambdas == 'true' }}
if: ${{ inputs.lambda-artifacts == '' && inputs.lambda-artifacts-tar == '' && inputs.cloud-storage-service-name != '' && steps.check-lambdas.outputs.has_lambdas == 'true' }}
uses: ./.github/actions/setup-cachix
with:
cachix-auth-token: ${{ inputs.cachix-auth-token }}
sccache-bucket: ${{ inputs.sccache-bucket }}

- name: Build Lambda artifacts inline
if: ${{ inputs.lambda-artifacts == '' && inputs.cloud-storage-service-name != '' && steps.check-lambdas.outputs.has_lambdas == 'true' }}
if: ${{ inputs.lambda-artifacts == '' && inputs.lambda-artifacts-tar == '' && inputs.cloud-storage-service-name != '' && steps.check-lambdas.outputs.has_lambdas == 'true' }}
shell: bash
env:
SERVICE: ${{ inputs.cloud-storage-service-name }}
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-key }}
run: .github/scripts/build-cloud-storage-lambdas.sh

# setup docker
# setup docker -- a Blacksmith builder keeps the buildkit layer cache on a
# per-Dockerfile sticky disk (so heavy bases like LibreOffice/Collabora stay
# warm across runs instead of re-pulling ~hundreds of MB from ECR each time);
# otherwise the stock buildx builder. Pulumi's docker-build provider uses
# whichever is set as the default builder.
- name: Set up Blacksmith Docker builder
if: ${{ inputs.use-docker == 'true' && inputs.use-blacksmith-builder == 'true' }}
uses: useblacksmith/setup-docker-builder@v1

- uses: docker/setup-buildx-action@v3
if: ${{ inputs.use-docker == 'true' }}
if: ${{ inputs.use-docker == 'true' && inputs.use-blacksmith-builder != 'true' }}

# install bun
- uses: oven-sh/setup-bun@v2
Expand Down Expand Up @@ -165,4 +196,4 @@ runs:
DD_APP_KEY: ${{ inputs.dd-app-key }}
DD_API_KEY: ${{ inputs.dd-api-key }}
DD_HOST: ${{ inputs.dd-host }}
USE_PREBUILT_SERVICE_BINARIES: ${{ inputs.prebuilt-binaries-artifact != '' && 'true' || 'false' }}
USE_PREBUILT_SERVICE_BINARIES: ${{ (inputs.prebuilt-binaries-artifact != '' || inputs.prebuilt-binaries-tar != '') && 'true' || 'false' }}
12 changes: 10 additions & 2 deletions .github/actions/setup-cachix/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,16 @@ runs:
}

if ! command -v cachix >/dev/null 2>&1; then
if ! retry 3 nix profile add nixpkgs#cachix; then
echo "::warning::Failed to install Cachix; continuing without the Cachix binary cache."
# Install cachix from the repo flake's pinned nixpkgs (already on the
# /nix sticky disk) via --inputs-from, instead of the default registry
# `nixpkgs`, which Determinate resolves to an unpinned nixpkgs-weekly
# and re-fetches/re-evaluates (~20s) on every run.
flake_dir="${GITHUB_WORKSPACE:-$PWD}"
if ! retry 3 nix profile add --inputs-from "$flake_dir" nixpkgs#cachix; then
echo "::warning::Failed to install Cachix from the pinned flake; falling back to registry nixpkgs."
if ! retry 3 nix profile add nixpkgs#cachix; then
echo "::warning::Failed to install Cachix; continuing without the Cachix binary cache."
fi
fi
fi

Expand Down
86 changes: 86 additions & 0 deletions .github/actions/setup-nix/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: Setup Nix
description: >-
Ensure a working Nix (flakes enabled, systemd daemon) is available on a
runner that does not ship Nix preinstalled (e.g. Blacksmith). Idempotent:
when /nix is already populated from a sticky disk it only re-initialises the
daemon and config rather than reinstalling the store.

inputs:
extra-substituters:
description: Space-separated extra binary cache substituters to trust.
required: false
default: ''
extra-trusted-public-keys:
description: Space-separated public keys for the extra substituters.
required: false
default: ''

runs:
using: composite
steps:
- name: Install or re-initialise Nix
shell: bash
env:
EXTRA_SUBSTITUTERS: ${{ inputs.extra-substituters }}
EXTRA_TRUSTED_PUBLIC_KEYS: ${{ inputs.extra-trusted-public-keys }}
run: |
set -euo pipefail

# When /nix is mounted from a warm sticky disk the store + profiles are
# already present, but the systemd unit and /etc/nix config live outside
# /nix and are gone on a fresh runner. Detect that case and only repair
# the bits that don't persist on the disk.
nix_bin="/nix/var/nix/profiles/default/bin/nix"

if [[ -x "$nix_bin" ]]; then
echo "Found existing Nix store on sticky disk; re-initialising daemon/config only."

sudo mkdir -p /etc/nix
# Recreate the minimal config the Determinate installer would have written.
{
echo "experimental-features = nix-command flakes"
echo "trusted-users = root runner"
echo "build-users-group = nixbld"
} | sudo tee /etc/nix/nix.conf >/dev/null

# The nixbld build group/users do not persist outside /nix; recreate if missing.
if ! getent group nixbld >/dev/null; then
sudo groupadd -r nixbld || true
for i in $(seq 1 10); do
sudo useradd -r -g nixbld -G nixbld -d /var/empty -s "$(command -v nologin || echo /sbin/nologin)" \
"nixbld$i" 2>/dev/null || true
done
fi

# Reinstall the systemd units shipped in the store and start the daemon.
if [[ -e /nix/var/nix/profiles/default/lib/systemd/system/nix-daemon.service ]]; then
sudo cp /nix/var/nix/profiles/default/lib/systemd/system/nix-daemon.* /etc/systemd/system/ || true
sudo systemctl daemon-reload || true
fi
sudo systemctl enable --now nix-daemon.socket nix-daemon.service 2>/dev/null || true
else
echo "No Nix store found on sticky disk; performing a full install."
curl --http1.1 --retry 5 --retry-delay 5 --retry-all-errors \
--proto '=https' --tlsv1.2 -sSf -L https://install.determinate.systems/nix | \
sh -s -- install linux --no-confirm --init systemd \
--extra-conf "experimental-features = nix-command flakes" \
--extra-conf "trusted-users = root runner"
fi

# Make nix discoverable in subsequent (non-login) steps.
echo "/nix/var/nix/profiles/default/bin" >> "$GITHUB_PATH"
export PATH="/nix/var/nix/profiles/default/bin:$PATH"
export NIX_REMOTE=daemon
echo "NIX_REMOTE=daemon" >> "$GITHUB_ENV"

# Append fallback substituters (e.g. Cachix) so a cold sticky disk pulls
# prebuilt artifacts instead of compiling from source.
if [[ -n "${EXTRA_SUBSTITUTERS}" ]]; then
{
echo "extra-substituters = ${EXTRA_SUBSTITUTERS}"
echo "extra-trusted-public-keys = ${EXTRA_TRUSTED_PUBLIC_KEYS}"
} | sudo tee -a /etc/nix/nix.conf >/dev/null
sudo systemctl restart nix-daemon.service 2>/dev/null || true
fi

"$nix_bin" --version || nix --version
24 changes: 24 additions & 0 deletions .github/actions/teardown-nix/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Teardown Nix
description: >-
Stop the Nix daemon(s) so a /nix sticky disk can be unmounted and committed.
The Determinate Nix daemon runs out of /nix, which keeps the mount busy and
makes the Blacksmith sticky-disk commit fail with "target is busy". Run this
as the final (always()) step of any job that mounts /nix as a sticky disk.

runs:
using: composite
steps:
- name: Stop Nix daemons holding /nix
shell: bash
run: |
# Best-effort: never fail the job from teardown.
set +e
for unit in \
nix-daemon.service nix-daemon.socket \
determinate-nixd.service determinate-nixd.socket; do
sudo systemctl stop "$unit" 2>/dev/null
done
# Backstop: free any remaining handles so the umount can succeed.
sudo fuser -km /nix 2>/dev/null
sync
exit 0
61 changes: 61 additions & 0 deletions .github/scripts/build-cloud-storage-lambdas-nix.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env bash
set -euo pipefail

# Build all of a service's Lambda handlers via the crane + cargo-zigbuild nix
# packages (.#deploy-lambda-<name>), and assemble the target/lambda/<name>/ zip
# artifact layout the deploy action consumes -- identical to what the old
# cargo-lambda script produced, so nothing downstream changes.
#
# Unlike the cargo-lambda path, this never recompiles unchanged handlers: nix is
# content-addressed, so an unchanged handler is a pure cache hit (substituted
# from the warm /nix lambda disk or Cachix). Independent handler derivations
# also build in parallel within the single `nix build` invocation.

SERVICE="${SERVICE:?SERVICE is required}"
OUTPUT_DIR="${OUTPUT_DIR:-lambda-artifacts}"
CONFIG_PATH="${CONFIG_PATH:-.github/services-config.json}"

if ! jq -e --arg service "$SERVICE" '.services | has($service)' "$CONFIG_PATH" >/dev/null; then
echo "Service '$SERVICE' not found in $CONFIG_PATH" >&2
exit 1
fi

mapfile -t LAMBDAS < <(jq -r --arg service "$SERVICE" '.services[$service].deploy_lambdas[]? // empty' "$CONFIG_PATH")

if [[ ${#LAMBDAS[@]} -eq 0 ]]; then
echo "No deploy_lambdas configured for $SERVICE"
exit 0
fi

echo "Building Lambda artifacts for $SERVICE via nix: ${LAMBDAS[*]}"

cachix_pid=
if command -v cachix >/dev/null 2>&1 && [[ -n "${CACHIX_CACHE_NAME:-}" ]]; then
cachix watch-store "$CACHIX_CACHE_NAME" >/tmp/cachix-watch-store.log 2>&1 &
cachix_pid=$!
trap 'if [[ -n "${cachix_pid:-}" ]]; then kill "$cachix_pid" 2>/dev/null || true; wait "$cachix_pid" 2>/dev/null || true; fi' EXIT
fi

# Build every handler for this service in one nix invocation: independent
# derivations build in parallel, and unchanged ones are pure cache hits.
installables=()
for lambda in "${LAMBDAS[@]}"; do
installables+=(".#deploy-lambda-${lambda}")
done
nix build --no-link --print-build-logs "${installables[@]}"

# Assemble target/lambda/<name>/*.zip from each handler's store path.
rm -rf "$OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR/target/lambda"
for lambda in "${LAMBDAS[@]}"; do
# Already built above, so this just resolves the (cached) out path.
out="$(nix build --no-link --print-out-paths ".#deploy-lambda-${lambda}")"
mkdir -p "$OUTPUT_DIR/target/lambda/$lambda"
cp -a "$out/$lambda/." "$OUTPUT_DIR/target/lambda/$lambda/"
test -f "$OUTPUT_DIR/target/lambda/$lambda/bootstrap.zip"
if [[ "$lambda" == "call_recording_preview_handler" ]]; then
test -f "$OUTPUT_DIR/target/lambda/$lambda/ffmpeg-layer.zip"
fi
done

tar -C "$OUTPUT_DIR" -czf lambda-artifacts.tar.gz target
Loading
Loading