Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion lib/kamal/cli/app.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@ def boot
end
end

#  Primary hosts and roles are returned first, so they can open the barrier
barrier = Kamal::Cli::Healthcheck::Barrier.new if KAMAL.roles.many?

on(KAMAL.hosts, **KAMAL.boot_strategy) do |host|
KAMAL.roles_on(host).each do |role|
Kamal::Cli::App::Boot.new(host, role, version, self).run
Kamal::Cli::App::Boot.new(host, role, self, version, barrier).run
end
end

#  Tag once the app booted on all hosts
on(KAMAL.hosts) do |host|
execute *KAMAL.auditor.record("Tagging #{KAMAL.config.absolute_image} as the latest image"), verbosity: :debug
execute *KAMAL.app.tag_latest_image
Expand Down
85 changes: 68 additions & 17 deletions lib/kamal/cli/app/boot.rb
Original file line number Diff line number Diff line change
@@ -1,38 +1,37 @@
class Kamal::Cli::App::Boot
attr_reader :host, :role, :version, :sshkit
delegate :execute, :capture_with_info, :info, to: :sshkit
delegate :uses_cord?, :assets?, to: :role
attr_reader :host, :role, :version, :barrier, :sshkit
delegate :execute, :capture_with_info, :capture_with_pretty_json, :info, :error, to: :sshkit
delegate :uses_cord?, :assets?, :running_traefik?, to: :role

def initialize(host, role, version, sshkit)
def initialize(host, role, sshkit, version, barrier)
@host = host
@role = role
@version = version
@barrier = barrier
@sshkit = sshkit
end

def run
old_version = old_version_renamed_if_clashing

start_new_version
wait_at_barrier if queuer?

begin
start_new_version
rescue => e
close_barrier if gatekeeper?
stop_new_version
raise
end

release_barrier if gatekeeper?

if old_version
stop_old_version(old_version)
end
end

private
def app
@app ||= KAMAL.app(role: role, host: host)
end

def auditor
@auditor = KAMAL.auditor(role: role)
end

def audit(message)
execute *auditor.record(message), verbosity: :debug
end

def old_version_renamed_if_clashing
if capture_with_info(*app.container_id_for_version(version), raise_on_non_zero_exit: false).present?
renamed_version = "#{version}_replaced_#{SecureRandom.hex(8)}"
Expand All @@ -46,12 +45,17 @@ def old_version_renamed_if_clashing

def start_new_version
audit "Booted app version #{version}"

execute *app.tie_cord(role.cord_host_file) if uses_cord?
hostname = "#{host.to_s[0...51].gsub(/\.+$/, '')}-#{SecureRandom.hex(6)}"
execute *app.run(hostname: hostname)
Kamal::Cli::Healthcheck::Poller.wait_for_healthy(pause_after_ready: true) { capture_with_info(*app.status(version: version)) }
end

def stop_new_version
execute *app.stop(version: version), raise_on_non_zero_exit: false
end

def stop_old_version(version)
if uses_cord?
cord = capture_with_info(*app.cord(version: version), raise_on_non_zero_exit: false).strip
Expand All @@ -65,4 +69,51 @@ def stop_old_version(version)

execute *app.clean_up_assets if assets?
end

def release_barrier
if barrier.open
info "First #{KAMAL.primary_role} container is healthy on #{host}, booting other roles"
end
end

def wait_at_barrier
info "Waiting for the first healthy #{KAMAL.primary_role} container before booting #{role} on #{host}..."
barrier.wait
info "First #{KAMAL.primary_role} container is healthy, booting #{role} on #{host}..."
rescue Kamal::Cli::Healthcheck::Error
info "First #{KAMAL.primary_role} container is unhealthy, not booting #{role} on #{host}"
raise
end

def close_barrier
if barrier.close
info "First #{KAMAL.primary_role} container is unhealthy on #{host}, not booting other roles"
error capture_with_info(*app.logs(version: version))
error capture_with_info(*app.container_health_log(version: version))
end
end

def barrier_role?
role == KAMAL.primary_role
end

def app
@app ||= KAMAL.app(role: role, host: host)
end

def auditor
@auditor = KAMAL.auditor(role: role)
end

def audit(message)
execute *auditor.record(message), verbosity: :debug
end

def gatekeeper?
barrier && barrier_role?
end

def queuer?
barrier && !barrier_role?
end
end
21 changes: 0 additions & 21 deletions lib/kamal/cli/healthcheck.rb

This file was deleted.

31 changes: 31 additions & 0 deletions lib/kamal/cli/healthcheck/barrier.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
class Kamal::Cli::Healthcheck::Barrier
def initialize
@ivar = Concurrent::IVar.new
end

def close
set(false)
end

def open
set(true)
end

def wait
unless opened?
raise Kamal::Cli::Healthcheck::Error.new("Halted at barrier")
end
end

private
def opened?
@ivar.value
end

def set(value)
@ivar.set(value)
true
rescue Concurrent::MultipleAssignmentError
false
end
end
2 changes: 2 additions & 0 deletions lib/kamal/cli/healthcheck/error.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class Kamal::Cli::Healthcheck::Error < StandardError
end
9 changes: 4 additions & 5 deletions lib/kamal/cli/healthcheck/poller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ module Kamal::Cli::Healthcheck::Poller

TRAEFIK_UPDATE_DELAY = 5

class HealthcheckError < StandardError; end

def wait_for_healthy(pause_after_ready: false, &block)
attempt = 1
Expand All @@ -16,9 +15,9 @@ def wait_for_healthy(pause_after_ready: false, &block)
when "running" # No health check configured
sleep KAMAL.config.readiness_delay if pause_after_ready
else
raise HealthcheckError, "container not ready (#{status})"
raise Kamal::Cli::Healthcheck::Error, "container not ready (#{status})"
end
rescue HealthcheckError => e
rescue Kamal::Cli::Healthcheck::Error => e
if attempt <= max_attempts
info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
sleep attempt
Expand All @@ -41,9 +40,9 @@ def wait_for_unhealthy(pause_after_ready: false, &block)
when "unhealthy"
sleep TRAEFIK_UPDATE_DELAY if pause_after_ready
else
raise HealthcheckError, "container not unhealthy (#{status})"
raise Kamal::Cli::Healthcheck::Error, "container not unhealthy (#{status})"
end
rescue HealthcheckError => e
rescue Kamal::Cli::Healthcheck::Error => e
if attempt <= max_attempts
info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
sleep attempt
Expand Down
13 changes: 1 addition & 12 deletions lib/kamal/cli/main.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ def deploy
say "Ensure Traefik is running...", :magenta
invoke "kamal:cli:traefik:boot", [], invoke_options

if KAMAL.config.role(KAMAL.config.primary_role).running_traefik?
say "Ensure app can pass healthcheck...", :magenta
invoke "kamal:cli:healthcheck:perform", [], invoke_options
end

say "Detect stale containers...", :magenta
invoke "kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true)

Expand Down Expand Up @@ -77,9 +72,6 @@ def redeploy

run_hook "pre-deploy"

say "Ensure app can pass healthcheck...", :magenta
invoke "kamal:cli:healthcheck:perform", [], invoke_options

say "Detect stale containers...", :magenta
invoke "kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true)

Expand Down Expand Up @@ -228,9 +220,6 @@ def version
desc "env", "Manage environment files"
subcommand "env", Kamal::Cli::Env

desc "healthcheck", "Healthcheck application"
subcommand "healthcheck", Kamal::Cli::Healthcheck

desc "lock", "Manage the deploy lock"
subcommand "lock", Kamal::Cli::Lock

Expand All @@ -255,7 +244,7 @@ def container_available?(version)
raise "Container not found" unless container_id.present?
end
end
rescue SSHKit::Runner::ExecuteError => e
rescue SSHKit::Runner::ExecuteError, SSHKit::Runner::MultipleExecuteError => e
if e.message =~ /Container not found/
say "Error looking for container version #{version}: #{e.message}"
return false
Expand Down
8 changes: 8 additions & 0 deletions lib/kamal/commands/app/containers.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
module Kamal::Commands::App::Containers
DOCKER_HEALTH_LOG_FORMAT = "'{{json .State.Health}}'"

def list_containers
docker :container, :ls, "--all", *filter_args
end
Expand All @@ -20,4 +22,10 @@ def rename_container(version:, new_version:)
def remove_containers
docker :container, :prune, "--force", *filter_args
end

def container_health_log(version:)
pipe \
container_id_for(container_name: container_name(version)),
xargs(docker(:inspect, "--format", DOCKER_HEALTH_LOG_FORMAT))
end
end
4 changes: 2 additions & 2 deletions lib/kamal/commands/app/logging.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module Kamal::Commands::App::Logging
def logs(since: nil, lines: nil, grep: nil)
def logs(version: nil, since: nil, lines: nil, grep: nil)
pipe \
current_running_container_id,
version ? container_id_for_version(version) : current_running_container_id,
"xargs docker logs#{" --since #{since}" if since}#{" --tail #{lines}" if lines} 2>&1",
("grep '#{grep}'" if grep)
end
Expand Down
1 change: 0 additions & 1 deletion lib/kamal/commands/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ class Base
delegate :sensitive, :argumentize, to: Kamal::Utils

DOCKER_HEALTH_STATUS_FORMAT = "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'"
DOCKER_HEALTH_LOG_FORMAT = "'{{json .State.Health}}'"

attr_accessor :config

Expand Down
59 changes: 0 additions & 59 deletions lib/kamal/commands/healthcheck.rb

This file was deleted.

2 changes: 1 addition & 1 deletion lib/kamal/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def sshkit


def healthcheck
{ "path" => "/up", "port" => 3000, "max_attempts" => 7, "exposed_port" => 3999, "cord" => "/tmp/kamal-cord", "log_lines" => 50 }.merge(raw_config.healthcheck || {})
{ "path" => "/up", "port" => 3000, "max_attempts" => 7, "cord" => "/tmp/kamal-cord", "log_lines" => 50 }.merge(raw_config.healthcheck || {})
end

def healthcheck_service
Expand Down
Loading