sphinx-doc · cglukas · Sep 13, 2025 · Sep 14, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -83,6 +83,7 @@ Contributors
 * Lars Hupfeldt Nielsen - OpenSSL FIPS mode md5 bug fix
 * Louis Maddox -- better docstrings
 * Łukasz Langa -- partial support for autodoc
+* Lukas Wieg -- JavaScript search improvement
 * Marco Buttu -- doctest extension (pyversion option)
 * Mark Ostroth -- semantic HTML contributions
 * Martin Hans -- autodoc improvements

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -125,6 +125,7 @@ Bugs fixed
   configuration variable ``doctest_test_doctest_blocks``.
 * #13885: Coverage builder: Fix TypeError when warning about missing modules.
   Patch by Damien Ayers.
+* #13892: HTML search: fix word exclusion in the search by prefixing words with "-".
 * #13929: Duplicate equation label warnings now have a new warning
   sub-type, ``ref.equation``.
   Patch by Jared Dillard.

diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js
@@ -171,14 +171,17 @@ const _orderResultsByScoreThenName = (a, b) => {
  * Default splitQuery function. Can be overridden in ``sphinx.search`` with a
  * custom function per language.
  *
- * The regular expression works by splitting the string on consecutive characters
- * that are not Unicode letters, numbers, underscores, or emoji characters.
- * This is the same as ``\W+`` in Python, preserving the surrogate pair area.
+ * The `consecutiveLetters` regular expression works by matching consecutive characters
+ * that are Unicode letters, numbers, underscores, or emoji characters.
+ *
+ * The `searchWords` regular expression works by matching a word like structure
+ * that matches the `consecutiveLetters` with or without a leading hyphen '-' which is
+ * used to exclude search terms later on.
  */
 if (typeof splitQuery === "undefined") {
   var splitQuery = (query) =>
     query
-      .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
+      .split(/(?<!\s)[-]|[^\p{Letter}\p{Number}\-_\p{Emoji_Presentation}]+/gu)
       .filter((term) => term); // remove remaining empty strings
 }
 
@@ -627,15 +630,18 @@ const Search = {
 
       // ensure that none of the excluded terms is in the search result
       if (
-        [...excludedTerms].some(
-          (term) =>
-            terms[term] === file
-            || titleTerms[term] === file
-            || (terms[term] || []).includes(file)
-            || (titleTerms[term] || []).includes(file),
-        )
+        [...excludedTerms].some((excludedTerm) => {
+          // Both mappings will contain either a single integer or a list of integers.
+          // Converting them to lists makes the comparison more readable.
+          let excludedTermFiles = [].concat(terms[excludedTerm]);
+          let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
+          return (
+            excludedTermFiles.includes(file)
+            || excludedTitleFiles.includes(file)
+          );
+        })
-        [...excludedTerms].some((excludedTerm) => {
-          // Both mappings will contain either a single integer or a list of integers.
-          // Converting them to lists makes the comparison more readable.
-          let excludedTermFiles = [].concat(terms[excludedTerm]);
-          let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
-          return (
-            excludedTermFiles.includes(file)
-            || excludedTitleFiles.includes(file)
-          );
-        })
+        [...excludedTerms].some(
+          (term) =>
+            terms[term] === file
+            || titleTerms[term] === file
+            || (terms[term] || []).includes(file)
+            || (titleTerms[term] || []).includes(file),
+        )
-        [...excludedTerms].some((excludedTerm) => {
-          // Both mappings will contain either a single integer or a list of integers.
-          // Converting them to lists makes the comparison more readable.
-          let excludedTermFiles = [].concat(terms[excludedTerm]);
-          let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
-          return (
-            excludedTermFiles.includes(file)
-            || excludedTitleFiles.includes(file)
-          );
-        })
+        [...excludedTerms].some(
+          (term) =>
+            terms[term] === file
+            || titleTerms[term] === file
+            || (terms[term] || []).includes(file)
+            || (titleTerms[term] || []).includes(file),
+        )
       )
-        break;
+        continue;
 
       // select one (max) score for the file.
       const score = Math.max(...wordList.map((w) => scoreMap.get(file).get(w)));

diff --git a/tests/js/fixtures/search_exclusion/searchindex.js b/tests/js/fixtures/search_exclusion/searchindex.js
diff --git a/tests/js/fixtures/search_multiple_exclusions/searchindex.js b/tests/js/fixtures/search_multiple_exclusions/searchindex.js
diff --git a/tests/js/roots/search_exclusion/conf.py b/tests/js/roots/search_exclusion/conf.py
diff --git a/tests/js/roots/search_exclusion/excluded.rst b/tests/js/roots/search_exclusion/excluded.rst
@@ -0,0 +1,4 @@
+Excluded Page
+=============
+
+This is a page with the special word penguin.
diff --git a/tests/js/roots/search_exclusion/index.rst b/tests/js/roots/search_exclusion/index.rst
@@ -0,0 +1,12 @@
+Main Page
+=========
+
+This is the main page of the ``search_exclusion`` test project.
+
+This document is used as a test fixture to check that search results can be
+filtered in the query by specifying excluded terms.
+
+A term which starts with a hypen will be used as excluded term.
+
+Include a second page which can be excluded in the search:
+:index:`excluded`
diff --git a/tests/js/roots/search_multiple_exclusions/conf.py b/tests/js/roots/search_multiple_exclusions/conf.py
@@ -0,0 +1 @@
+
diff --git a/tests/js/roots/search_multiple_exclusions/first.rst b/tests/js/roots/search_multiple_exclusions/first.rst
@@ -0,0 +1,4 @@
+First Page
+=============
+
+This is a page with the special word penguin.
diff --git a/tests/js/roots/search_multiple_exclusions/index.rst b/tests/js/roots/search_multiple_exclusions/index.rst
@@ -0,0 +1,13 @@
+Main Page
+=========
+
+This is the main page of the ``search_exclusion`` test project.
+
+This document is used as a test fixture to check that search results can be
+filtered in the query by specifying excluded terms.
+
+A term which starts with a hypen will be used as excluded term.
+
+Include pages which can be excluded in the search:
+:index:`first`
+:index:`second`
diff --git a/tests/js/roots/search_multiple_exclusions/second.rst b/tests/js/roots/search_multiple_exclusions/second.rst
@@ -0,0 +1,4 @@
+Second Page
+=============
+
+This is a page with the special word jumanji.
diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js
@@ -66,6 +66,68 @@ describe("Basic html theme search", function () {
       expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
     });
 
+    it("should find results when a excluded term is used", function () {
+      eval(loadFixture("search_exclusion/searchindex.js"));
+
+      // It's important that the searchterm is included in multiple pages while the
+      // excluded term is not included in all pages.
+      // In this case the ``page`` is included in the two existing pages while the ``penguin``
+      // is only included in one page.
+      [_searchQuery, searchterms, excluded, ..._remainingItems] =
+        Search._parseQuery("page -penguin");
+
+      // prettier-ignore
+      hits = [[
+        'index',
+        'Main Page',
+        '',
+        null,
+        15,
+        'index.rst',
+        'text'
+      ]];
+
+      expect(excluded).toEqual(new Set(["penguin"]));
+      expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
+    });
+
+    it("should exclude results where the file index is above 0.", function () {
+      // This is a very constructed test case for fixing the issue that
+      // `(terms[term] || []).includes(file)` raises an error when `terms[term]` is an
+      // int above 0. The condition would be convterted to `(1).includes(3)` which
+      // results in this type error:
+      //    TypeError: (terms[term] || []).includes is not a function
+      eval(loadFixture("search_multiple_exclusions/searchindex.js"));
+
+      [_searchQuery, searchterms, excluded, ..._remainingItems] =
+        Search._parseQuery("page -jumanji");
+
+      // prettier-ignore
+      hits = [
+        [
+          'first',
+          'First Page',
+          '',
+          null,
+          15,
+          'first.rst',
+          'text'
+        ],
+        [
+          'index',
+          'Main Page',
+          '',
+          null,
+          15,
+          'index.rst',
+          'text'
+        ]
+      ];
+
+      expect(excluded).toEqual(new Set(["jumanji"]));
+      expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
+    });
+
     it('should partially-match "sphinx" when in title index', function () {
       eval(loadFixture("partial/searchindex.js"));
 
@@ -295,6 +357,16 @@ describe("splitQuery regression tests", () => {
     expect(parts).toEqual(["Pin", "Code"]);
   });
 
+  it("can keep underscores in words", () => {
+    const parts = splitQuery("python_function");
+    expect(parts).toEqual(["python_function"]);
+  });
+
+  it("can maintain negated search words", () => {
+    const parts = splitQuery("Pin -Code");
+    expect(parts).toEqual(["Pin", "-Code"]);
+  });
+
   it("can split Chinese characters", () => {
     const parts = splitQuery("Hello from 中国 上海");
     expect(parts).toEqual(["Hello", "from", "中国", "上海"]);