Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ordering inside searchindex.js not being deterministic #11665

Merged
merged 3 commits into from
Sep 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Bugs fixed

* #11668: Raise a useful error when ``theme.conf`` is missing.
Patch by Vinay Sajip.
* #11622: Ensure that the order of keys in ``searchindex.js`` is deterministic.
Patch by Pietro Albini.

Testing
-------
Expand Down
2 changes: 1 addition & 1 deletion sphinx/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class _JavaScriptIndex:
SUFFIX = ')'

def dumps(self, data: Any) -> str:
return self.PREFIX + json.dumps(data) + self.SUFFIX
return self.PREFIX + json.dumps(data, sort_keys=True) + self.SUFFIX

def loads(self, s: str) -> Any:
data = s[len(self.PREFIX):-len(self.SUFFIX)]
Expand Down
37 changes: 37 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,40 @@ def test_parallel(app):
app.build()
index = load_searchindex(app.outdir / 'searchindex.js')
assert index['docnames'] == ['index', 'nosearch', 'tocitem']


@pytest.mark.sphinx(testroot='search')
def test_search_index_is_deterministic(app):
lists_not_to_sort = {
# Each element of .titles is related to the element of .docnames in the same position.
# The ordering is deterministic because .docnames is sorted.
'.titles',
# Each element of .filenames is related to the element of .docnames in the same position.
# The ordering is deterministic because .docnames is sorted.
'.filenames',
}

# In the search index, titles inside .alltitles are stored as a tuple of
# (document_idx, title_anchor). Tuples are represented as lists in JSON,
# but their contents must not be sorted. We cannot sort them anyway, as
# document_idx is an int and title_anchor is a str.
def is_title_tuple_type(item):
return len(item) == 2 and isinstance(item[0], int) and isinstance(item[1], str)

def assert_is_sorted(item, path):
err_path = path if path else '<root>'
if isinstance(item, dict):
assert list(item.keys()) == sorted(item.keys()), f'{err_path} is not sorted'
for key, value in item.items():
assert_is_sorted(value, f'{path}.{key}')
elif isinstance(item, list):
if not is_title_tuple_type(item) and path not in lists_not_to_sort:
assert item == sorted(item), f'{err_path} is not sorted'
for i, child in enumerate(item):
assert_is_sorted(child, f'{path}[{i}]')

app.builder.build_all()
index = load_searchindex(app.outdir / 'searchindex.js')
# Pretty print the index. Only shown by pytest on failure.
print(f'searchindex.js contents:\n\n{json.dumps(index, indent=2)}')
assert_is_sorted(index, '')