diff --git a/AUTHORS.rst b/AUTHORS.rst index 707c77aec04..94efef6aa61 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -83,6 +83,7 @@ Contributors * Lars Hupfeldt Nielsen - OpenSSL FIPS mode md5 bug fix * Louis Maddox -- better docstrings * Łukasz Langa -- partial support for autodoc +* Lukas Wieg -- JavaScript search improvement * Marco Buttu -- doctest extension (pyversion option) * Mark Ostroth -- semantic HTML contributions * Martin Hans -- autodoc improvements diff --git a/CHANGES.rst b/CHANGES.rst index f0f94fda396..090e38569ff 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -125,6 +125,7 @@ Bugs fixed configuration variable ``doctest_test_doctest_blocks``. * #13885: Coverage builder: Fix TypeError when warning about missing modules. Patch by Damien Ayers. +* #13892: HTML search: fix word exclusion in the search by prefixing words with "-". * #13929: Duplicate equation label warnings now have a new warning sub-type, ``ref.equation``. Patch by Jared Dillard. diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 5a7628a18a2..31beb2cdeba 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -171,14 +171,17 @@ const _orderResultsByScoreThenName = (a, b) => { * Default splitQuery function. Can be overridden in ``sphinx.search`` with a * custom function per language. * - * The regular expression works by splitting the string on consecutive characters - * that are not Unicode letters, numbers, underscores, or emoji characters. - * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + * The `consecutiveLetters` regular expression works by matching consecutive characters + * that are Unicode letters, numbers, underscores, or emoji characters. + * + * The `searchWords` regular expression works by matching a word like structure + * that matches the `consecutiveLetters` with or without a leading hyphen '-' which is + * used to exclude search terms later on. */ if (typeof splitQuery === "undefined") { var splitQuery = (query) => query - .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .split(/(? term); // remove remaining empty strings } @@ -627,15 +630,18 @@ const Search = { // ensure that none of the excluded terms is in the search result if ( - [...excludedTerms].some( - (term) => - terms[term] === file - || titleTerms[term] === file - || (terms[term] || []).includes(file) - || (titleTerms[term] || []).includes(file), - ) + [...excludedTerms].some((excludedTerm) => { + // Both mappings will contain either a single integer or a list of integers. + // Converting them to lists makes the comparison more readable. + let excludedTermFiles = [].concat(terms[excludedTerm]); + let excludedTitleFiles = [].concat(titleTerms[excludedTerm]); + return ( + excludedTermFiles.includes(file) + || excludedTitleFiles.includes(file) + ); + }) ) - break; + continue; // select one (max) score for the file. const score = Math.max(...wordList.map((w) => scoreMap.get(file).get(w))); diff --git a/tests/js/fixtures/search_exclusion/searchindex.js b/tests/js/fixtures/search_exclusion/searchindex.js new file mode 100644 index 00000000000..8e21d1001d2 --- /dev/null +++ b/tests/js/fixtures/search_exclusion/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles":{"Excluded Page":[[0,null]],"Main Page":[[1,null]]},"docnames":["excluded","index"],"envversion":{"sphinx":66,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["excluded.rst","index.rst"],"indexentries":{"excluded":[[1,"index-0",false]]},"objects":{},"objnames":{},"objtypes":{},"terms":{"A":1,"This":[0,1],"can":1,"check":1,"document":1,"exclud":1,"filter":1,"fixtur":1,"hypen":1,"includ":1,"penguin":0,"project":1,"queri":1,"result":1,"search":1,"search_exclus":1,"second":1,"special":0,"specifi":1,"start":1,"term":1,"test":1,"use":1,"will":1,"word":0},"titles":["Excluded Page","Main Page"],"titleterms":{"exclud":0,"main":1,"page":[0,1]}}) \ No newline at end of file diff --git a/tests/js/fixtures/search_multiple_exclusions/searchindex.js b/tests/js/fixtures/search_multiple_exclusions/searchindex.js new file mode 100644 index 00000000000..73e4a7c1446 --- /dev/null +++ b/tests/js/fixtures/search_multiple_exclusions/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles":{"First Page":[[0,null]],"Main Page":[[1,null]],"Second Page":[[2,null]]},"docnames":["first","index","second"],"envversion":{"sphinx":66,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["first.rst","index.rst","second.rst"],"indexentries":{"first":[[1,"index-0",false]],"second":[[1,"index-1",false]]},"objects":{},"objnames":{},"objtypes":{},"terms":{"A":1,"This":[0,1,2],"can":1,"check":1,"document":1,"exclud":1,"filter":1,"first":1,"fixtur":1,"hypen":1,"includ":1,"jumanji":2,"penguin":0,"project":1,"queri":1,"result":1,"search":1,"search_exclus":1,"second":1,"special":[0,2],"specifi":1,"start":1,"term":1,"test":1,"use":1,"will":1,"word":[0,2]},"titles":["First Page","Main Page","Second Page"],"titleterms":{"first":0,"main":1,"page":[0,1,2],"second":2}}) \ No newline at end of file diff --git a/tests/js/roots/search_exclusion/conf.py b/tests/js/roots/search_exclusion/conf.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/js/roots/search_exclusion/excluded.rst b/tests/js/roots/search_exclusion/excluded.rst new file mode 100644 index 00000000000..62f8485153b --- /dev/null +++ b/tests/js/roots/search_exclusion/excluded.rst @@ -0,0 +1,4 @@ +Excluded Page +============= + +This is a page with the special word penguin. diff --git a/tests/js/roots/search_exclusion/index.rst b/tests/js/roots/search_exclusion/index.rst new file mode 100644 index 00000000000..29f3a13012d --- /dev/null +++ b/tests/js/roots/search_exclusion/index.rst @@ -0,0 +1,12 @@ +Main Page +========= + +This is the main page of the ``search_exclusion`` test project. + +This document is used as a test fixture to check that search results can be +filtered in the query by specifying excluded terms. + +A term which starts with a hypen will be used as excluded term. + +Include a second page which can be excluded in the search: +:index:`excluded` diff --git a/tests/js/roots/search_multiple_exclusions/conf.py b/tests/js/roots/search_multiple_exclusions/conf.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/tests/js/roots/search_multiple_exclusions/conf.py @@ -0,0 +1 @@ + diff --git a/tests/js/roots/search_multiple_exclusions/first.rst b/tests/js/roots/search_multiple_exclusions/first.rst new file mode 100644 index 00000000000..8f7a81860f2 --- /dev/null +++ b/tests/js/roots/search_multiple_exclusions/first.rst @@ -0,0 +1,4 @@ +First Page +============= + +This is a page with the special word penguin. diff --git a/tests/js/roots/search_multiple_exclusions/index.rst b/tests/js/roots/search_multiple_exclusions/index.rst new file mode 100644 index 00000000000..71f011a7292 --- /dev/null +++ b/tests/js/roots/search_multiple_exclusions/index.rst @@ -0,0 +1,13 @@ +Main Page +========= + +This is the main page of the ``search_exclusion`` test project. + +This document is used as a test fixture to check that search results can be +filtered in the query by specifying excluded terms. + +A term which starts with a hypen will be used as excluded term. + +Include pages which can be excluded in the search: +:index:`first` +:index:`second` \ No newline at end of file diff --git a/tests/js/roots/search_multiple_exclusions/second.rst b/tests/js/roots/search_multiple_exclusions/second.rst new file mode 100644 index 00000000000..fa7c2ff2f28 --- /dev/null +++ b/tests/js/roots/search_multiple_exclusions/second.rst @@ -0,0 +1,4 @@ +Second Page +============= + +This is a page with the special word jumanji. diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index d00689c907c..07b14dfc9f4 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -66,6 +66,68 @@ describe("Basic html theme search", function () { expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); }); + it("should find results when a excluded term is used", function () { + eval(loadFixture("search_exclusion/searchindex.js")); + + // It's important that the searchterm is included in multiple pages while the + // excluded term is not included in all pages. + // In this case the ``page`` is included in the two existing pages while the ``penguin`` + // is only included in one page. + [_searchQuery, searchterms, excluded, ..._remainingItems] = + Search._parseQuery("page -penguin"); + + // prettier-ignore + hits = [[ + 'index', + 'Main Page', + '', + null, + 15, + 'index.rst', + 'text' + ]]; + + expect(excluded).toEqual(new Set(["penguin"])); + expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); + }); + + it("should exclude results where the file index is above 0.", function () { + // This is a very constructed test case for fixing the issue that + // `(terms[term] || []).includes(file)` raises an error when `terms[term]` is an + // int above 0. The condition would be convterted to `(1).includes(3)` which + // results in this type error: + // TypeError: (terms[term] || []).includes is not a function + eval(loadFixture("search_multiple_exclusions/searchindex.js")); + + [_searchQuery, searchterms, excluded, ..._remainingItems] = + Search._parseQuery("page -jumanji"); + + // prettier-ignore + hits = [ + [ + 'first', + 'First Page', + '', + null, + 15, + 'first.rst', + 'text' + ], + [ + 'index', + 'Main Page', + '', + null, + 15, + 'index.rst', + 'text' + ] + ]; + + expect(excluded).toEqual(new Set(["jumanji"])); + expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits); + }); + it('should partially-match "sphinx" when in title index', function () { eval(loadFixture("partial/searchindex.js")); @@ -295,6 +357,16 @@ describe("splitQuery regression tests", () => { expect(parts).toEqual(["Pin", "Code"]); }); + it("can keep underscores in words", () => { + const parts = splitQuery("python_function"); + expect(parts).toEqual(["python_function"]); + }); + + it("can maintain negated search words", () => { + const parts = splitQuery("Pin -Code"); + expect(parts).toEqual(["Pin", "-Code"]); + }); + it("can split Chinese characters", () => { const parts = splitQuery("Hello from 中国 上海"); expect(parts).toEqual(["Hello", "from", "中国", "上海"]);