diff --git a/library/helpers/getRawUrlPath.test.ts b/library/helpers/getRawUrlPath.test.ts new file mode 100644 index 000000000..1813674e4 --- /dev/null +++ b/library/helpers/getRawUrlPath.test.ts @@ -0,0 +1,47 @@ +import * as t from "tap"; +import { getRawUrlPath } from "./getRawUrlPath"; + +t.test("it returns the raw URL path", async (t) => { + t.equal(getRawUrlPath(""), "/"); + t.equal(getRawUrlPath("/"), "/"); + t.equal(getRawUrlPath("/?test=abc"), "/"); + t.equal(getRawUrlPath("#"), "/"); + t.equal(getRawUrlPath("https://example.com"), "/"); + + t.equal( + getRawUrlPath("https://example.com/path/to/resource"), + "/path/to/resource" + ); + t.equal( + getRawUrlPath("http://example.com/path/to/resource/"), + "/path/to/resource/" + ); + t.equal( + getRawUrlPath("https://example.com/path/to/resource/123"), + "/path/to/resource/123" + ); + t.equal( + getRawUrlPath("https://example.com/path/to/resource/123/456"), + "/path/to/resource/123/456" + ); + t.equal( + getRawUrlPath("https://example.com/path/to/resource/123/456/789"), + "/path/to/resource/123/456/789" + ); + t.equal( + getRawUrlPath( + "https://example.com/path/to/resource/123/456/789?query=string" + ), + "/path/to/resource/123/456/789" + ); + t.equal( + getRawUrlPath("https://example.com/path/to/resource/123/456/789#fragment"), + "/path/to/resource/123/456/789" + ); + t.equal( + getRawUrlPath( + "https://example.com/path/to/resource/123/456/789?query=string#fragment" + ), + "/path/to/resource/123/456/789" + ); +}); diff --git a/library/helpers/getRawUrlPath.ts b/library/helpers/getRawUrlPath.ts new file mode 100644 index 000000000..f83876739 --- /dev/null +++ b/library/helpers/getRawUrlPath.ts @@ -0,0 +1,22 @@ +export function getRawUrlPath(url: string): string { + let partialUrl = url; + + // Remove protocol (http://, https://, etc.) + const pathStart = partialUrl.indexOf("://"); + if (pathStart !== -1) partialUrl = partialUrl.slice(pathStart + 3); + + // Remove hostname and port + const slashIndex = partialUrl.indexOf("/"); + if (slashIndex === -1) return "/"; // only hostname given + partialUrl = partialUrl.slice(slashIndex); + + // Remove query and fragment + const queryIndex = partialUrl.indexOf("?"); + const hashIndex = partialUrl.indexOf("#"); + + let endIndex = partialUrl.length; + if (queryIndex !== -1) endIndex = Math.min(endIndex, queryIndex); + if (hashIndex !== -1) endIndex = Math.min(endIndex, hashIndex); + + return partialUrl.slice(0, endIndex) || "/"; +} diff --git a/library/sources/HTTPServer.test.ts b/library/sources/HTTPServer.test.ts index 9f5c71258..99c129413 100644 --- a/library/sources/HTTPServer.test.ts +++ b/library/sources/HTTPServer.test.ts @@ -792,7 +792,7 @@ t.test("it blocks path traversal in path", async (t) => { t.equal( response, - "Zen has blocked a path traversal attack: path.join(...) originating from url." + "Zen has blocked a path traversal attack originating from url.path." ); server.close(); resolve(); @@ -921,6 +921,56 @@ t.test( } ); +t.test( + "it blocks path traversal in path even if its not directly used", + async (t) => { + const server = http.createServer((req, res) => { + res.statusCode = 200; + res.end("Hello, world!"); + }); + + await new Promise((resolve) => { + server.listen(3328, async () => { + const response = await new Promise((resolve, reject) => { + // Directly using http.request with a url-like object to prevent path normalization that would remove /../ + const req = http.request( + { + hostname: "localhost", + port: 3328, + path: "/../package.json", // Path traversal attempt + method: "GET", + }, + (res) => { + let data = ""; + + res.on("data", (chunk) => { + data += chunk; + }); + + res.on("end", () => { + resolve(data); + }); + } + ); + + req.on("error", (err) => { + reject(err); + }); + + req.end(); + }); + + t.equal( + response, + "Zen has blocked a path traversal attack originating from url.path." + ); + server.close(); + resolve(); + }); + }); + } +); + t.test("rate limiting works with url encoded paths", async (t) => { const server = http.createServer((req, res) => { const result = shouldBlockRequest(); diff --git a/library/sources/http-server/checkIfRequestIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts index 6d30dd2df..1ce6b083c 100644 --- a/library/sources/http-server/checkIfRequestIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -4,6 +4,10 @@ import { Agent } from "../../agent/Agent"; import { getContext } from "../../agent/Context"; import { escapeHTML } from "../../helpers/escapeHTML"; import { ipAllowedToAccessRoute } from "./ipAllowedToAccessRoute"; +import { checkUrlPathForPathTraversal } from "../../vulnerabilities/path-traversal/checkUrlPathForPathTraversal"; +import { cleanupStackTrace } from "../../helpers/cleanupStackTrace"; +import { getLibraryRoot } from "../../helpers/getLibraryRoot"; +import { attackKindHumanName } from "../../agent/Attack"; const checkedBlocks = Symbol("__zen_checked_blocks__"); @@ -147,5 +151,35 @@ export function checkIfRequestIsBlocked( return true; } + // Only checks the url path for path traversal attacks, other inputs are checked using the sinks + const pathTraversalResult = checkUrlPathForPathTraversal(context.url); + if (pathTraversalResult && pathTraversalResult.found) { + const stackTraceError = new Error(); + + agent.onDetectedAttack({ + module: "http", + operation: "", + kind: "path_traversal", + source: "url", + blocked: agent.shouldBlock(), + stack: cleanupStackTrace(stackTraceError.stack!, getLibraryRoot()), + paths: [".path"], + metadata: {}, + request: context, + payload: pathTraversalResult.payload, + }); + + if (agent.shouldBlock()) { + res.statusCode = 403; + res.setHeader("Content-Type", "text/plain"); + + res.end( + `Zen has blocked ${attackKindHumanName("path_traversal")} originating from url.path.` + ); + + return true; + } + } + return false; } diff --git a/library/vulnerabilities/path-traversal/checkUrlPathForPathTraversal.test.ts b/library/vulnerabilities/path-traversal/checkUrlPathForPathTraversal.test.ts new file mode 100644 index 000000000..7c452dc3d --- /dev/null +++ b/library/vulnerabilities/path-traversal/checkUrlPathForPathTraversal.test.ts @@ -0,0 +1,103 @@ +import * as t from "tap"; +import { checkUrlPathForPathTraversal } from "./checkUrlPathForPathTraversal"; + +t.test("it does not detect", async (t) => { + t.equal(checkUrlPathForPathTraversal("").found, false); + t.equal(checkUrlPathForPathTraversal("").payload, undefined); + t.equal(checkUrlPathForPathTraversal("abc").found, false); + t.equal(checkUrlPathForPathTraversal("/").found, false); + t.equal(checkUrlPathForPathTraversal("/abc").found, false); + t.equal(checkUrlPathForPathTraversal("/a?test=123").found, false); + + t.equal( + checkUrlPathForPathTraversal("https://example.com/path/to/resource").found, + false + ); + t.equal( + checkUrlPathForPathTraversal("https://example.com/path/to/resource/").found, + false + ); + t.equal( + checkUrlPathForPathTraversal("https://example.com/path/to/resource/123") + .found, + false + ); + t.equal( + checkUrlPathForPathTraversal("https://example.com/path/to/resource/123/456") + .found, + false + ); + t.equal( + checkUrlPathForPathTraversal( + "https://example.com/path/to/resource/123/456/789" + ).found, + false + ); + t.equal( + checkUrlPathForPathTraversal( + "https://example.com/path/to/resource/123/456/789?query=string" + ).found, + false + ); + t.equal( + checkUrlPathForPathTraversal( + "https://example.com/path/to/resource/123/456/789#fragment" + ).found, + false + ); + t.equal( + checkUrlPathForPathTraversal( + "https://example.com/path/to/resource/123/456/789?query=string#fragment" + ).found, + false + ); + t.equal( + checkUrlPathForPathTraversal("https://example.com/path/to/resource/%C3%A4") + .found, + false + ); + + // Invalid url encoded characters + t.equal( + checkUrlPathForPathTraversal("https://example.com/path/to/resource/%a") + .found, + false + ); +}); + +t.test("only detect in path segments", async (t) => { + t.equal(checkUrlPathForPathTraversal("/test#/../").found, false); + t.equal(checkUrlPathForPathTraversal("/test?param=/../etc").found, false); // Query parameters are checked differently +}); + +t.test("it detects", async (t) => { + t.equal(checkUrlPathForPathTraversal("/..").found, true); + t.equal(checkUrlPathForPathTraversal("/../").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/..").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/..").payload, "/abc/.."); + t.equal(checkUrlPathForPathTraversal("/abc/../def").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/def/..").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/def/../ghi").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/def/../../ghi").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/def/../../../ghi").found, true); + + // With backslashes + t.equal(checkUrlPathForPathTraversal("/..\\").found, true); + t.equal(checkUrlPathForPathTraversal("/..\\\\").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/..\\").found, true); + + // With URL encoding + t.equal(checkUrlPathForPathTraversal("/%2E%2E").found, true); + t.equal(checkUrlPathForPathTraversal("/%2E%2E/").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/%2E%2E").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/%2E%2E").payload, "/abc/%2E%2E"); + + // With space characters + t.equal(checkUrlPathForPathTraversal("/.%09./").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/.%0a./def").found, true); + t.equal(checkUrlPathForPathTraversal("/abc/.%0D./def").found, true); + t.equal( + checkUrlPathForPathTraversal("/abc/.%0D./def").payload, + "/abc/.%0D./def" + ); +}); diff --git a/library/vulnerabilities/path-traversal/checkUrlPathForPathTraversal.ts b/library/vulnerabilities/path-traversal/checkUrlPathForPathTraversal.ts new file mode 100644 index 000000000..190e49072 --- /dev/null +++ b/library/vulnerabilities/path-traversal/checkUrlPathForPathTraversal.ts @@ -0,0 +1,65 @@ +import { getRawUrlPath } from "../../helpers/getRawUrlPath"; +import { safeDecodeURIComponent } from "../../helpers/safeDecodeURIComponent"; +import { normalizeLikeURLConstructor } from "./normalizeLikeURLConstructor"; + +const forbiddenPattern = /(?:^|[\\/])\.\.(?:[\\/]|$)/; + +/** + * Check if the URL path contains a path traversal attack + * A legitimate URL path should not contain ".." or "/../" or "\..\" or "\..\" + * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 + */ +export function checkUrlPathForPathTraversal(url: string | undefined): { + found: boolean; + payload?: string; +} { + if (!url || url.length < 3) { + // Performance optimization, url most include at least 3 characters (/..) + return { + found: false, + }; + } + + const rawPath = getRawUrlPath(url); + if (rawPath.length < 3) { + // Performance optimization, we don't need to check for path traversal if the path is less than 3 characters + // Can happen if the URL has a query string + return { + found: false, + }; + } + + if (forbiddenPattern.test(rawPath)) { + return { + found: true, + payload: rawPath, + }; + } + + if (!url.includes("%")) { + // Performance optimization, if the URL does not contain any encoded characters, we don't need to decode it + return { + found: false, + }; + } + + // Also check encoded paths + const decodedPath = safeDecodeURIComponent(rawPath); + if (!decodedPath) { + return { + found: false, + }; + } + + const normalizedDecodedPath = normalizeLikeURLConstructor(decodedPath); + if (forbiddenPattern.test(normalizedDecodedPath)) { + return { + found: true, + payload: rawPath, + }; + } + + return { + found: false, + }; +} diff --git a/library/vulnerabilities/path-traversal/normalizeLikeURLConstructor.ts b/library/vulnerabilities/path-traversal/normalizeLikeURLConstructor.ts new file mode 100644 index 000000000..ab46b3689 --- /dev/null +++ b/library/vulnerabilities/path-traversal/normalizeLikeURLConstructor.ts @@ -0,0 +1,14 @@ +/** + * This function is used for urls, because they can contain a TAB, carriage return or line feed that is silently removed by the URL constructor. + * + * The WHATWG URL spec defines the following: + * - Remove all ASCII tab or newline from input. + * - An ASCII tab or newline is U+0009 TAB, U+000A LF, or U+000D CR. + * + * Also, backslashes are converted to forward slashes by the URL constructor. + * + * See https://url.spec.whatwg.org/#url-parsing + */ +export function normalizeLikeURLConstructor(url: string): string { + return url.replace(/[\t\n\r]/g, "").replace(/\\/g, "/"); +}