gristlabs
diff --git a/‎.buildkite/hooks/post-command
Lines changed: 65 additions & 15 deletions b/‎.buildkite/hooks/post-command
Lines changed: 65 additions & 15 deletions
diff --git a/‎.buildkite/hooks/pre-command
Lines changed: 19 additions & 0 deletions b/‎.buildkite/hooks/pre-command
Lines changed: 19 additions & 0 deletions
diff --git a/‎.buildkite/pipeline.yaml
Lines changed: 15 additions & 0 deletions b/‎.buildkite/pipeline.yaml
Lines changed: 15 additions & 0 deletions
diff --git a/‎.buildkite/summarize.sh
Lines changed: 52 additions & 0 deletions b/‎.buildkite/summarize.sh
Lines changed: 52 additions & 0 deletions
@@ -1,24 +1,74 @@
 # Upload test logs on failure, if there are any.
-if [[ "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0" ]]; then
-  declare log_count=0
-  for log in $(make testlogs 2>/dev/null | sort | uniq); do
-    buildkite-agent artifact upload "${log}"
-    log_count=$((${log_count}+1))
-    # N.B. If *all* tests fail due to some common cause, then we will
-    # end up spending way too much time uploading logs. Instead, we just
-    # upload the first 100 and stop. That is hopefully enough to debug.
-    if [[ "${log_count}" -ge 100 ]]; then
-      echo "Only uploaded first 100 failures; skipping the rest."
-      break
-    fi
-  done
+if test "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0"; then
+  # Generate a metafile that ends with .output, and contains all the
+  # test failures that have been uploaded. These will all be sorted and
+  # aggregated by a failure stage in the build pipeline.
+  declare output=$(mktemp "${BUILDKITE_JOB_ID}".XXXXXX.output)
+  make -s testlogs 2>/dev/null | grep // | sort | uniq | (
+    declare log_count=0
+    while read target log; do
+      if test -z "${target}"; then
+        continue
+      fi
+
+      # N.B. If *all* tests fail due to some common cause, then we will
+      # end up spending way too much time uploading logs. Instead, we just
+      # upload the first 10 and stop. That is hopefully enough to debug.
+      #
+      # We include this test in the metadata, but note that we cannot
+      # upload the actual test logs. The user should rerun locally.
+      log_count=$((${log_count}+1))
+      if test "${log_count}" -ge 10; then
+        echo " * ${target} (no upload)" | tee -a "${output}"
+      else
+        buildkite-agent artifact upload "${log}"
+        echo " * [${target}](artifact://${log#/})" | tee -a "${output}"
+      fi
+    done
+  )
+
+  # Upload if we had outputs.
+  if test -s "${output}"; then
+    buildkite-agent artifact upload "${output}"
+  fi
+  rm -rf "${output}"
+
   # Attempt to clear the cache and shut down.
   make clean || echo "make clean failed with code $?"
   make bazel-shutdown || echo "make bazel-shutdown failed with code $?"
 fi
 
+# Upload all profiles, and include in an annotation.
+if test -d /tmp/profile; then
+  # Same as above.
+  declare profile_output=$(mktemp "${BUILDKITE_JOB_ID}".XXXXXX.profile_output)
+  for file in $(find /tmp/profile -name \*.pprof -print 2>/dev/null | sort); do
+    # Generate a link to speedscope, with a URL-encoded link to the BuildKite
+    # artifact location. Note that we use do a fixed URL encode below, since
+    # the link can be uniquely determined. If the storage location changes,
+    # this schema may break and these links may stop working. The artifacts
+    # uploaded however, will still work just fine.
+    profile_name="${file#/tmp/profile/}"
+    public_url="https://storage.googleapis.com/gvisor-buildkite/${BUILDKITE_BUILD_ID}/${BUILDKITE_JOB_ID}/${file#/}"
+    encoded_url=$(jq -rn --arg x "${public_url}" '$x|@uri')
+    encoded_title=$(jq -rn --arg x "${profile_name}" '$x|@uri')
+    profile_url="https://speedscope.app/#profileURL=${encoded_url}&title=${encoded_title}"
+    buildkite-agent artifact upload "${file}"
+    echo " * [${profile_name}](${profile_url}) ([pprof](artifact://${file#/}))" | tee -a "${profile_output}"
+  done
+
+  # Upload if we had outputs.
+  if test -s "${profile_output}"; then
+    buildkite-agent artifact upload "${profile_output}"
+  fi
+  rm -rf "${profile_output}"
+
+  # Remove stale profiles, which may be owned by root.
+  sudo rm -rf /tmp/profile
+fi
+
 # Kill any running containers (clear state).
 CONTAINERS="$(docker ps -q)"
-if ! [[ -z "${CONTAINERS}" ]]; then
+if ! test -z "${CONTAINERS}"; then
   docker container kill ${CONTAINERS} 2>/dev/null || true
-fi
+fi
@@ -1,3 +1,15 @@
+# Install packages we need. Docker must be installed and configured,
+# as should Go itself. We just install some extra bits and pieces.
+function install_pkgs() {
+  while true; do
+    if sudo apt-get update && sudo apt-get install -y "$@"; then
+      break
+    fi
+  done
+}
+install_pkgs graphviz jq curl binutils gnupg gnupg-agent linux-libc-dev \
+  apt-transport-https ca-certificates software-properties-common
+
 # Setup for parallelization with PARTITION and TOTAL_PARTITIONS.
 export PARTITION=${BUILDKITE_PARALLEL_JOB:-0}
 PARTITION=$((${PARTITION}+1)) # 1-indexed, but PARALLEL_JOB is 0-indexed.
@@ -9,3 +21,10 @@ if test "${EXPERIMENTAL}" != "true"; then
   make sudo TARGETS=//runsc:runsc ARGS="install --experimental=true"
   sudo systemctl restart docker
 fi
+
+# Helper for benchmarks, based on the branch.
+if test "${BUILDKITE_BRANCH}" = "master"; then
+  export BENCHMARKS_OFFICIAL=true
+else
+  export BENCHMARKS_OFFICIAL=false
+fi
@@ -132,3 +132,18 @@ steps:
     command: make python3.7.3-runtime-tests
     parallelism: 10
     if: build.message =~ /VFS1/ || build.branch == "master"
+
+  # The final step here will aggregate data uploaded by all other steps into an
+  # annotation that will appear at the top of the build, with useful information.
+  #
+  # See .buildkite/summarize.sh and .buildkite/hooks/post-command for more.
+  - wait
+  - <<: *common
+    label: ":yawning_face: Wait"
+    command: "true"
+    key: "wait"
+  - <<: *common
+    label: ":thisisfine: Summarize"
+    command: .buildkite/summarize.sh
+    allow_dependency_failure: true
+    depends_on: "wait"
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright 2020 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -xeou pipefail
+
+# This script collects metadata fragments produced by individual test shards in
+# .buildkite/hooks/postcommand, and aggregates these into a single annotation
+# that is posted to the build. In the future, this will include coverage.
+
+# Start the summary.
+declare summary
+declare status
+summary=$(mktemp --tmpdir summary.XXXXXX)
+status="info"
+
+# Download all outputs.
+declare outputs
+outputs=$(mktemp -d --tmpdir outputs.XXXXXX)
+if buildkite-agent artifact download '**/*.output' "${outputs}"; then
+  status="error"
+  echo "## Failures" >> "${summary}"
+  find "${outputs}" -type f -print | xargs -r -n 1 cat | sort >> "${summary}"
+fi
+rm -rf "${outputs}"
+
+# Attempt to find profiles, if there are any.
+declare profiles
+profiles=$(mktemp -d --tmpdir profiles.XXXXXX)
+if buildkite-agent artifact download '**/*.profile_output' "${profiles}"; then
+  echo "## Profiles" >> "${summary}"
+  find "${profiles}" -type f -print | xargs -r -n 1 cat | sort >> "${summary}"
+fi
+rm -rf "${profiles}"
+
+# Upload the final annotation.
+if [[ -s "${summary}" ]]; then
+  cat "${summary}" | buildkite-agent annotate --style "${status}"
+fi
+rm -rf "${summary}"