Skip to content

Commit

Permalink
ENH Add interaction constraint shortcuts to HistGradientBoosting* (#2…
Browse files Browse the repository at this point in the history
…4849)

Co-authored-by: jeremie du boisberranger <jeremiedbb@yahoo.fr>
  • Loading branch information
betatim and jeremiedbb committed Nov 25, 2022
1 parent f76ea1b commit df14322
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 7 deletions.
6 changes: 6 additions & 0 deletions doc/whats_new/v1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,12 @@ Changelog
as value to specify monotonicity constraints for each feature.
:pr:`24855` by :user:`Olivier Grisel <ogrisel>`.

- |Enhancement| Interaction constraints for
:class:`ensemble.HistGradientBoostingClassifier`
and :class:`ensemble.HistGradientBoostingRegressor` can now be specified
as strings for two common cases: "no_interactions" and "pairwise" interactions.
:pr:`24849` by :user:`Tim Head <betatim>`.

- |Fix| Fixed the issue where :class:`ensemble.AdaBoostClassifier` outputs
NaN in feature importance when fitted with very small sample weight.
:pr:`20415` by :user:`Zhehao Liu <MaxwellLZH>`.
Expand Down
4 changes: 1 addition & 3 deletions examples/inspection/plot_partial_dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,7 @@

print("Training interaction constraint HistGradientBoostingRegressor...")
tic = time()
est_no_interactions = HistGradientBoostingRegressor(
interaction_cst=[[i] for i in range(X_train.shape[1])]
)
est_no_interactions = HistGradientBoostingRegressor(interaction_cst="no_interactions")
est_no_interactions.fit(X_train, y_train)
print(f"done in {time() - tic:.3f}s")

Expand Down
29 changes: 25 additions & 4 deletions sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from abc import ABC, abstractmethod
from functools import partial
import itertools
from numbers import Real, Integral
import warnings

Expand Down Expand Up @@ -92,7 +93,12 @@ class BaseHistGradientBoosting(BaseEstimator, ABC):
"min_samples_leaf": [Interval(Integral, 1, None, closed="left")],
"l2_regularization": [Interval(Real, 0, None, closed="left")],
"monotonic_cst": ["array-like", dict, None],
"interaction_cst": [list, tuple, None],
"interaction_cst": [
list,
tuple,
StrOptions({"pairwise", "no_interactions"}),
None,
],
"n_iter_no_change": [Interval(Integral, 1, None, closed="left")],
"validation_fraction": [
Interval(Real, 0, 1, closed="neither"),
Expand Down Expand Up @@ -288,8 +294,15 @@ def _check_interaction_cst(self, n_features):
if self.interaction_cst is None:
return None

if self.interaction_cst == "no_interactions":
interaction_cst = [[i] for i in range(n_features)]
elif self.interaction_cst == "pairwise":
interaction_cst = itertools.combinations(range(n_features), 2)
else:
interaction_cst = self.interaction_cst

try:
constraints = [set(group) for group in self.interaction_cst]
constraints = [set(group) for group in interaction_cst]
except TypeError:
raise ValueError(
"Interaction constraints must be a sequence of tuples or lists, got:"
Expand Down Expand Up @@ -1275,7 +1288,8 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
.. versionchanged:: 1.2
Accept dict of constraints with feature names as keys.
interaction_cst : sequence of lists/tuples/sets of int, default=None
interaction_cst : {"pairwise", "no_interaction"} or sequence of lists/tuples/sets \
of int, default=None
Specify interaction constraints, the sets of features which can
interact with each other in child node splits.
Expand All @@ -1284,6 +1298,9 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
specified in these constraints, they are treated as if they were
specified as an additional set.
The strings "pairwise" and "no_interactions" are shorthands for
allowing only pairwise or no interactions, respectively.
For instance, with 5 features in total, `interaction_cst=[{0, 1}]`
is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,
and specifies that each branch of a tree will either only split
Expand Down Expand Up @@ -1623,7 +1640,8 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
.. versionchanged:: 1.2
Accept dict of constraints with feature names as keys.
interaction_cst : sequence of lists/tuples/sets of int, default=None
interaction_cst : {"pairwise", "no_interaction"} or sequence of lists/tuples/sets \
of int, default=None
Specify interaction constraints, the sets of features which can
interact with each other in child node splits.
Expand All @@ -1632,6 +1650,9 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
specified in these constraints, they are treated as if they were
specified as an additional set.
The strings "pairwise" and "no_interactions" are shorthands for
allowing only pairwise or no interactions, respectively.
For instance, with 5 features in total, `interaction_cst=[{0, 1}]`
is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,
and specifies that each branch of a tree will either only split
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,10 @@ def test_uint8_predict(Est):
[
(None, 931, None),
([{0, 1}], 2, [{0, 1}]),
("pairwise", 2, [{0, 1}]),
("pairwise", 4, [{0, 1}, {0, 2}, {0, 3}, {1, 2}, {1, 3}, {2, 3}]),
("no_interactions", 2, [{0}, {1}]),
("no_interactions", 4, [{0}, {1}, {2}, {3}]),
([(1, 0), [5, 1]], 6, [{0, 1}, {1, 5}, {2, 3, 4}]),
],
)
Expand Down

0 comments on commit df14322

Please sign in to comment.