Skip to content

Commit

Permalink
FEA Add examples recommender system (#1125)
Browse files Browse the repository at this point in the history
Co-authored-by: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Co-authored-by: Eric Larson <larson.eric.d@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
Co-authored-by: Lucy Liu <jliu176@gmail.com>
  • Loading branch information
7 people committed Nov 17, 2023
1 parent 83f8c3a commit 13b9bef
Show file tree
Hide file tree
Showing 10 changed files with 490 additions and 5 deletions.
2 changes: 1 addition & 1 deletion continuous_integration/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ if [ "$DISTRIB" == "mamba" ]; then
if [ "$PLATFORM" != "Linux" ]; then
conda remove -y memory_profiler
fi
PIP_DEPENDENCIES="jupyterlite-sphinx>=0.8.0,<0.9.0 jupyterlite-pyodide-kernel<0.1.0 libarchive-c"
PIP_DEPENDENCIES="jupyterlite-sphinx>=0.8.0,<0.9.0 jupyterlite-pyodide-kernel<0.1.0 libarchive-c numpy"
elif [ "$DISTRIB" == "minimal" ]; then
PIP_DEPENDENCIES=""
elif [ "$DISTRIB" == "pip" ]; then
Expand Down
52 changes: 52 additions & 0 deletions doc/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ file:
- ``reset_modules_order`` (:ref:`reset_modules_order`)
- ``abort_on_example_error`` (:ref:`abort_on_first`)
- ``only_warn_on_example_error`` (:ref:`warning_on_error`)
- ``recommender`` (:ref:`recommend_examples`)
- ``expected_failing_examples`` (:ref:`dont_fail_exit`)
- ``min_reported_time`` (:ref:`min_reported_time`)
- ``show_memory`` (:ref:`show_memory`)
Expand Down Expand Up @@ -1785,6 +1786,57 @@ flag is passed to ``sphinx-build``. This can be enabled by setting::
}


.. _recommend_examples:

Enabling the example recommender system
=======================================

Sphinx-Gallery can be configured to generate content-based recommendations for
an example gallery. A list of related examples is automatically generated by
computing the closest examples in the `TF-IDF
<https://en.wikipedia.org/wiki/Tf%E2%80%93idf>`_ space of their text contents.
Only examples within a single gallery (including it's sub-galleries) are used to
compute the closest examples. The most similar content is then displayed at the bottom
of each example as a set of thumbnails.

The recommender system can be enabled by setting ``enable`` to ``True``. To
configure it, pass a dictionary to the ``sphinx_gallery_conf``, e.g.::

sphinx_gallery_conf = {
...
"recommender": {"enable": True, "n_examples": 5, "min_df": 3, "max_df": 0.9},
}

The only necessary parameter is ``enable``. If any other parameters is not
specified, the default value is used. Below is a more complete explanation of
each field:

enable (type: bool, default: False)
Whether to generate recommendations inside the example gallery. Enabling this
feature requires adding `numpy` to the dependencies.
n_examples (type: int, default: 5)
Number of most relevant examples to display.
min_df (type: float in range [0.0, 1.0] | int, default: 3)
When building the vocabulary ignore terms that have a document frequency
strictly lower than the given threshold. If float, the parameter represents a
proportion of documents, integer represents absolute counts. This value is
also called cut-off in the literature.
max_df (type: float in range [0.0, 1.0] | int, default: 0.9)
When building the vocabulary ignore terms that have a document frequency
strictly higher than the given threshold. If float, the parameter represents a
proportion of documents, integer represents absolute counts.
rubric_header (type: str, default: "Related examples")
Customizable rubric header. It can be edited to more descriptive text or to
add external links, e.g. to the API doc of the recommender system on the
sphinx-gallery documentation.

The parameters ``min_df`` and ``max_df`` can be customized by the user to trim
the very rare/very common words. This may improve the recommendations quality,
but more importantly, it spares some computation resources that would be wasted
on non-informative tokens.

Currently example recommendations are only computed for ``.py`` files.

.. _setting_thumbnail_size:

Setting gallery thumbnail size
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
with open("requirements.txt") as fid:
install_requires = [line.strip() for line in fid if line.strip()]

extras_require = {"recommender": ["numpy"]}

setup(
name="sphinx-gallery",
description=description, # noqa: E501, analysis:ignore
Expand All @@ -55,6 +57,7 @@
author="Óscar Nájera",
author_email="najera.oscar@gmail.com",
install_requires=install_requires,
extras_require=extras_require,
python_requires=">=3.8",
license="3-clause BSD",
classifiers=[
Expand Down
39 changes: 39 additions & 0 deletions sphinx_gallery/gen_gallery.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import os
import pathlib
from xml.sax.saxutils import quoteattr, escape
from itertools import chain

from sphinx.errors import ConfigError, ExtensionError
import sphinx.util
Expand All @@ -39,6 +40,7 @@
from .interactive_example import post_configure_jupyterlite_sphinx
from .interactive_example import create_jupyterlite_contents
from .directives import MiniGallery, ImageSg, imagesg_addnode
from .recommender import ExampleRecommender, _write_recommendations

_KNOWN_CSS = (
"sg_gallery",
Expand Down Expand Up @@ -85,6 +87,7 @@ def __call__(self, gallery_conf, script_vars):
"download_all_examples": True,
"abort_on_example_error": False,
"only_warn_on_example_error": False,
"recommender": {"enable": False},
"failing_examples": {},
"passing_examples": [],
"stale_examples": [], # ones that did not need to be run due to md5sum
Expand Down Expand Up @@ -663,6 +666,42 @@ def generate_gallery_rst(app):
costs += subsection_costs
write_computation_times(gallery_conf, target_dir, subsection_costs)

# Build recommendation system
if gallery_conf["recommender"]["enable"]:
try:
import numpy as np # noqa: F401
except ImportError:
raise ConfigError("gallery_conf['recommender'] requires numpy")

recommender_params = copy.deepcopy(gallery_conf["recommender"])
recommender_params.pop("enable")
recommender_params.pop("rubric_header", None)
recommender = ExampleRecommender(**recommender_params)

gallery_py_files = []
# root and subsection directories containing python examples
gallery_directories = [gallery_dir_abs_path] + subsecs
for current_dir in gallery_directories:
src_dir = os.path.join(gallery_dir_abs_path, current_dir)
# sort python files to have a deterministic input across call
py_files = sorted(
[
fname
for fname in Path(src_dir).iterdir()
if fname.suffix == ".py"
],
key=gallery_conf["within_subsection_order"](src_dir),
)
gallery_py_files.append(
[os.path.join(src_dir, fname) for fname in py_files]
)
# flatten the list of list
gallery_py_files = list(chain.from_iterable(gallery_py_files))

recommender.fit(gallery_py_files)
for fname in gallery_py_files:
_write_recommendations(recommender, fname, gallery_conf)

# generate toctree with subsections
if gallery_conf["nested_sections"] is True:
subsections_toctree = _format_toctree(
Expand Down
9 changes: 9 additions & 0 deletions sphinx_gallery/gen_rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ def __exit__(self, type_, value, tb):
:download:`Download Jupyter notebook: {0} <{0}>`
"""

RECOMMENDATIONS_INCLUDE = """\n
.. include:: {0}.recommendations
"""


def codestr2rst(codestr, lang="python", lineno=None):
"""Return reStructuredText code block from code string."""
Expand Down Expand Up @@ -1467,6 +1471,11 @@ def save_rst_example(

example_rst += CODE_DOWNLOAD.format(example_file.name, language)

if gallery_conf["recommender"]["enable"]:
# extract the filename without the extension
recommend_fname = Path(example_fname).stem
example_rst += RECOMMENDATIONS_INCLUDE.format(recommend_fname)

if gallery_conf["show_signature"]:
example_rst += SPHX_GLR_SIG

Expand Down

0 comments on commit 13b9bef

Please sign in to comment.