Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions src/utils/log-resolver.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { gunzipSync, inflateRawSync } from "node:zlib";
import type { HarnessClient } from "../client/harness-client.js";
import { HarnessApiError } from "./errors.js";
import { createLogger } from "./logger.js";

const log = createLogger("log-resolver");
Expand Down Expand Up @@ -38,6 +39,7 @@ const EXTERNAL_STORAGE_HOSTS = new Set([

/** S3-style host pattern: bucket.s3.amazonaws.com or bucket.s3.region.amazonaws.com */
const S3_BUCKET_HOST_RE = /^[a-z0-9][a-z0-9.-]*\.s3([.-][a-z0-9-]+)?\.amazonaws\.com$/i;
const S3_PATH_STYLE_HOST_RE = /^s3([.-][a-z0-9-]+)?\.amazonaws\.com$/i;

function safeParseUrl(raw: string): URL | undefined {
try {
Expand All @@ -51,14 +53,18 @@ function isExternalStorageHost(host: string): boolean {
const h = host.toLowerCase();
if (EXTERNAL_STORAGE_HOSTS.has(h)) return true;
if (S3_BUCKET_HOST_RE.test(h)) return true;
if (S3_PATH_STYLE_HOST_RE.test(h)) return true;
if (h.endsWith(".storage.googleapis.com")) return true;
return false;
}

function normalizePath(path: string): string {
return path.startsWith("/") ? path : `/${path}`;
}

/**
* Detect pre-signed URLs that should be fetched directly without auth.
* GCS and S3 signed URLs carry their credentials in query params — adding
* extra auth headers or rewriting the host invalidates the signature.
* Detect signed URLs whose path/query must not be rewritten.
* Direct fetching is still limited to recognized external storage hosts.
*/
function isPresignedUrl(url: URL): boolean {
if (url.searchParams.has("X-Goog-Signature")) return true;
Expand Down Expand Up @@ -327,31 +333,36 @@ async function downloadBlobContent(
signal: AbortSignal,
): Promise<Response> {
const blobUrl = safeParseUrl(blobLink);
const isSignedBlobUrl = blobUrl ? isPresignedUrl(blobUrl) : false;
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Relative signed links are still misclassified here. safeParseUrl() returns undefined for values like /some/blob/path?X-Amz-Signature=sig, so isSignedBlobUrl becomes false and the code later prepends /gateway/log-service and omits headerBasedScoping. Running that through the real HarnessClient produces a URL like https://<base>/gateway/log-service/some/blob/path?accountIdentifier=...&routingId=...&accountID=...&X-Amz-Signature=sig, which mutates the signed blob URL. Please detect signed relative paths too and add a regression test for that case.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still misses relative signed links. new URL(blobLink) returns undefined for inputs like some/blob/path?X-Amz-Signature=sig&token=abc, so isSignedBlobUrl becomes false here and the code falls into the /gateway/log-service rewrite below. With the real HarnessClient, that rewritten path then picks up accountIdentifier, routingId, and accountID, which mutates the signed query string and breaks the blob download. Please parse relative links against a dummy base or inspect the raw query string before deciding whether to preserve the path and enable headerBasedScoping.


if (blobUrl && (isExternalStorageHost(blobUrl.host) || isPresignedUrl(blobUrl))) {
if (blobUrl && isExternalStorageHost(blobUrl.hostname)) {
log.debug("Downloading log blob (direct)", { prefix, url: blobLink.slice(0, 80) });
try {
return await fetch(blobLink, { signal });
} catch (err) {
const cause = err instanceof Error ? `${err.name}: ${err.message}` : String(err);
throw new Error(`Log download fetch failed for ${blobUrl.host}: ${cause}`);
throw new Error(`Log download fetch failed for ${blobUrl.hostname}: ${cause}`);
}
}

const rawPath = blobUrl ? blobUrl.pathname + blobUrl.search : blobLink;
const downloadPath = rawPath.startsWith(LOG_SERVICE_GATEWAY_PREFIX)
const rawPath = normalizePath(blobUrl ? blobUrl.pathname + blobUrl.search : blobLink);
const downloadPath = isSignedBlobUrl
? rawPath
: `${LOG_SERVICE_GATEWAY_PREFIX}${rawPath}`;
: rawPath.startsWith(LOG_SERVICE_GATEWAY_PREFIX)
? rawPath
: `${LOG_SERVICE_GATEWAY_PREFIX}${rawPath}`;
log.debug("Downloading log blob (client)", { prefix, path: downloadPath.slice(0, 80) });
try {
return await client.requestStream({
method: "GET",
path: downloadPath,
signal,
...(isSignedBlobUrl ? { headerBasedScoping: true } : {}),
});
} catch (err) {
if (err instanceof HarnessApiError) throw err;
const cause = err instanceof Error ? `${err.name}: ${err.message}` : String(err);
throw new Error(`Log download fetch failed for ${blobUrl?.host ?? "harness"}: ${cause}`);
throw new Error(`Log download fetch failed for ${blobUrl?.hostname ?? "harness"}: ${cause}`);
}
}

Expand Down
18 changes: 18 additions & 0 deletions tasks/todo.md
Original file line number Diff line number Diff line change
Expand Up @@ -340,3 +340,21 @@
- Split query strings out of `RequestOptions.path` before base-path de-duplication and query assembly.
- Merged path query params into the generated `URLSearchParams` before applying `options.params`, preserving explicit override behavior.
- Verified with `pnpm test tests/client/harness-client.test.ts`, `pnpm typecheck`, `pnpm build`, and full `pnpm test`.

## Slack Bug Triage: Harness Log Blob Routing (2026-05-14)
- [x] Read Slack thread, PR #195 context, memories, and current resolver/client code
- [x] Add failing regression coverage for Harness-hosted pre-signed blob links
- [x] Patch `src/utils/log-resolver.ts` so only true external storage hosts are direct-fetched
- [x] Run focused log resolver tests, typecheck, build, and broader tests as appropriate
- [x] Commit, push, open PR, and reply in the original Slack thread

### Plan
- Treat PR #195 as the concrete report because the Slack thread has no follow-up screenshots or repro text.
- Keep the fix in `src/utils/log-resolver.ts`: route S3/GCS storage URLs directly, route Harness-hosted signed links through `HarnessClient.requestStream()`, and normalize the path passed to the client.
- Preserve `HarnessApiError` details from client-routed downloads so callers can still distinguish auth/permission failures.

### Review
- Confirmed the red tests failed on current code: Harness-hosted signed URLs direct-fetched and failed, relative blob paths became `/gateway/log-servicesome/...`, and `HarnessApiError` details were wrapped in a generic `Error`.
- Updated `src/utils/log-resolver.ts` so only recognized external storage hosts are direct-fetched; non-storage signed URLs keep their raw path/query while routing through `HarnessClient.requestStream()` with header-based scoping to avoid appending query params.
- Added coverage for Harness-hosted `X-Amz-Signature` and `X-Goog-Signature` links, true S3 direct fetch including path-style regional hosts, relative path normalization, and preservation of `HarnessApiError`.
- Verified with `pnpm test tests/utils/log-resolver.test.ts`, `pnpm typecheck`, `pnpm build`, and full `pnpm test` (`53` files / `1312` tests).
97 changes: 97 additions & 0 deletions tests/utils/log-resolver.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { resolveLogContent } from "../../src/utils/log-resolver.js";
import { gzipSync, deflateRawSync } from "node:zlib";
import type { HarnessClient } from "../../src/client/harness-client.js";
import { HarnessApiError } from "../../src/utils/errors.js";

function makeClient(
requestFn: (...args: unknown[]) => unknown,
Expand Down Expand Up @@ -179,6 +180,102 @@ describe("resolveLogContent", () => {
);
});

it("REGRESSION routes Harness-hosted pre-signed blob URLs through client.requestStream", async () => {
const blobLink = "https://app.harness.io/some/blob/path?X-Amz-Signature=sig&token=abc";
const streamFn = vi.fn().mockResolvedValue(new Response('{"out":"log line 1"}', { status: 200 }));
const client = makeClient(
vi.fn().mockResolvedValue({ status: "success", link: blobLink }),
{ requestStream: streamFn },
);
fetchSpy.mockRejectedValue(new TypeError("fetch failed"));

const result = await resolveLogContent(client, "prefix");

expect(result).toContain("log line 1");
expect(fetchSpy).not.toHaveBeenCalled();
expect(streamFn).toHaveBeenCalledWith(
expect.objectContaining({
method: "GET",
path: "/some/blob/path?X-Amz-Signature=sig&token=abc",
headerBasedScoping: true,
}),
);
});

it("REGRESSION routes Harness-hosted pre-signed blob URLs with explicit ports through client.requestStream", async () => {
const blobLink = "https://app.harness.io:443/some/blob/path?X-Goog-Signature=sig&token=abc";
const streamFn = vi.fn().mockResolvedValue(new Response('{"out":"log line 1"}', { status: 200 }));
const client = makeClient(
vi.fn().mockResolvedValue({ status: "success", link: blobLink }),
{ requestStream: streamFn },
);
fetchSpy.mockRejectedValue(new TypeError("fetch failed"));

const result = await resolveLogContent(client, "prefix");

expect(result).toContain("log line 1");
expect(fetchSpy).not.toHaveBeenCalled();
expect(streamFn).toHaveBeenCalledWith(
expect.objectContaining({
method: "GET",
path: "/some/blob/path?X-Goog-Signature=sig&token=abc",
headerBasedScoping: true,
}),
);
});

it("direct-fetches true external storage pre-signed URLs", async () => {
const blobLink = "https://bucket.s3.us-west-2.amazonaws.com/logs.zip?X-Amz-Signature=sig";
const client = makeClient(vi.fn().mockResolvedValue({ status: "success", link: blobLink }));
fetchSpy.mockResolvedValue(new Response('{"out":"log line 1"}', { status: 200 }));

const result = await resolveLogContent(client, "prefix");

expect(result).toContain("log line 1");
expect(fetchSpy).toHaveBeenCalledWith(blobLink, expect.any(Object));
expect(client.requestStream).not.toHaveBeenCalled();
});

it("direct-fetches path-style S3 pre-signed URLs from any region", async () => {
const blobLink = "https://s3.eu-north-1.amazonaws.com/harness-logs/logs.zip?X-Amz-Signature=sig";
const client = makeClient(vi.fn().mockResolvedValue({ status: "success", link: blobLink }));
fetchSpy.mockResolvedValue(new Response('{"out":"log line 1"}', { status: 200 }));

const result = await resolveLogContent(client, "prefix");

expect(result).toContain("log line 1");
expect(fetchSpy).toHaveBeenCalledWith(blobLink, expect.any(Object));
expect(client.requestStream).not.toHaveBeenCalled();
});

it("normalizes relative blob paths before adding the gateway prefix", async () => {
const streamFn = vi.fn().mockResolvedValue(new Response('{"out":"log line 1"}', { status: 200 }));
const client = makeClient(
vi.fn().mockResolvedValue({ status: "success", link: "some/blob/path?token=abc" }),
{ requestStream: streamFn },
);

const result = await resolveLogContent(client, "prefix");

expect(result).toContain("log line 1");
expect(streamFn).toHaveBeenCalledWith(
expect.objectContaining({
method: "GET",
path: "/gateway/log-service/some/blob/path?token=abc",
}),
);
});

it("preserves HarnessApiError details from client-routed blob downloads", async () => {
const apiError = new HarnessApiError("Forbidden", 403, "FORBIDDEN", "corr-123");
const client = makeClient(
vi.fn().mockResolvedValue({ status: "success", link: "https://logs.example.com/blob" }),
{ requestStream: vi.fn().mockRejectedValue(apiError) },
);

await expect(resolveLogContent(client, "prefix")).rejects.toBe(apiError);
});

it("throws when log file exceeds max size", async () => {
const bigContent = "x".repeat(1024);
const streamFn = vi.fn().mockResolvedValue(new Response(bigContent, {
Expand Down