Skip to content

Commit

Permalink
ensure lists in the search index are also deterministic
Browse files Browse the repository at this point in the history
  • Loading branch information
pietroalbini committed Sep 1, 2023
1 parent b8a1de6 commit 80b71f7
Showing 1 changed file with 26 additions and 12 deletions.
38 changes: 26 additions & 12 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,19 +308,33 @@ def test_parallel(app):

@pytest.mark.sphinx(testroot='search')
def test_search_index_is_deterministic(app):
app.builder.build_all()
index = load_searchindex(app.outdir / 'searchindex.js')

def check_if_dicts_are_sorted(item):
LISTS_NOT_TO_SORT = [
# Elements of .titles correspond to the element of .docnames in the same position.
".titles",
# Elements of .filenames correspond to the element of .docnames in the same position.
".filenames",
]

# In the search index, titles inside .alltitles are stored as a tuple of
# (document_idx, title_anchor). Tuples are represented as lists in JSON,
# but their contents must not be sorted. We cannot sort them anyway, as
# document_idx is an int and title_anchor is a str.
def is_title_tuple_type(item):
return len(item) == 2 and isinstance(item[0], int) and isinstance(item[1], str)

def assert_is_sorted(item, path):
print('visiting', path if path else '<root>')
if isinstance(item, dict):
assert list(item.keys()) == sorted(item.keys())
for child in item.values():
check_if_dicts_are_sorted(child)
# Lists in the search index cannot be sorted: for some lists, their
# position inside the list is referenced elsewhere in the index, so if
# we were to sort lists, the search index would break.
for key, value in item.items():
assert_is_sorted(value, f'{path}.{key}')
elif isinstance(item, list):
for child in item:
check_if_dicts_are_sorted(child)
if not is_title_tuple_type(item) and path not in LISTS_NOT_TO_SORT:
assert item == sorted(item)
for i, child in enumerate(item):
assert_is_sorted(child, f'{path}[{i}]')

check_if_dicts_are_sorted(index)
app.builder.build_all()
index = load_searchindex(app.outdir / 'searchindex.js')
print(f'index contents:\n{json.dumps(index, indent=2)}\n') # Pretty print the index.
assert_is_sorted(index, '')

0 comments on commit 80b71f7

Please sign in to comment.