Skip to content

Commit

Permalink
rebase on top of scikit-learn#4347
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorstephens committed Jun 4, 2015
1 parent cabd2ca commit 0166186
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 16 deletions.
6 changes: 5 additions & 1 deletion doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ Enhancements
option, which has a simpler forumlar and interpretation.
By Hanna Wallach and `Andreas Müller`_.

- Add ``class_weight`` parameter to automatically weight samples by class
frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
`Trevor Stephens`_.

- Added backlinks from the API reference pages to the user guide. By
`Andreas Müller`_.

Expand Down Expand Up @@ -572,7 +576,7 @@ API changes summary

- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
:class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
:class:`linear_model.PassiveAgressiveClassivier` and
:class:`linear_model.PassiveAgressiveClassifier` and
:class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.

- :class:`cluster.DBSCAN` now uses a deterministic initialization. The
Expand Down
16 changes: 9 additions & 7 deletions sklearn/linear_model/passive_aggressive.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,15 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
When set to True, reuse the solution of the previous call to fit as
initialization, otherwise, just erase the previous solution.
class_weight : dict, {class_label: weight} or "auto" or None, optional
class_weight : dict, {class_label: weight} or "balanced" or None, optional
Preset for the class_weight fit parameter.
Weights associated with classes. If not given, all classes
are supposed to have weight one.
The "auto" mode uses the values of y to automatically adjust
weights inversely proportional to class frequencies.
The "balanced" mode uses the values of y to automatically adjust
weights inversely proportional to class frequencies in the input data
as ``n_samples / (n_classes * np.bincount(y))``
Attributes
----------
Expand Down Expand Up @@ -120,10 +121,11 @@ def partial_fit(self, X, y, classes=None):
-------
self : returns an instance of self.
"""
if self.class_weight == 'auto':
raise ValueError("class_weight 'auto' is not supported for "
"partial_fit. In order to use 'auto' weights, "
"use compute_class_weight('auto', classes, y). "
if self.class_weight == 'balanced':
raise ValueError("class_weight 'balanced' is not supported for "
"partial_fit. In order to use 'balanced' "
"weights, use "
"compute_class_weight('balanced', classes, y). "
"In place of y you can us a large enough sample "
"of the full training set target to properly "
"estimate the class frequency distributions. "
Expand Down
16 changes: 8 additions & 8 deletions sklearn/linear_model/tests/test_passive_aggressive.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ def test_class_weights():
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))


def test_partial_fit_weight_class_auto():
# partial_fit with class_weight='auto' not supported
clf = PassiveAggressiveClassifier(class_weight="auto")
def test_partial_fit_weight_class_balanced():
# partial_fit with class_weight='balanced' not supported
clf = PassiveAggressiveClassifier(class_weight="balanced")
assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))


Expand All @@ -160,18 +160,18 @@ def test_equal_class_weight():
clf = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight=None)
clf.fit(X2, y2)

# Already balanced, so "auto" weights should have no effect
clf_auto = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
class_weight="auto")
clf_auto.fit(X2, y2)
# Already balanced, so "balanced" weights should have no effect
clf_balanced = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
class_weight="balanced")
clf_balanced.fit(X2, y2)

clf_weighted = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
class_weight={0: 0.5, 1: 0.5})
clf_weighted.fit(X2, y2)

# should be similar up to some epsilon due to learning rate schedule
assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
assert_almost_equal(clf.coef_, clf_auto.coef_, decimal=2)
assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)


def test_wrong_class_weight_label():
Expand Down

0 comments on commit 0166186

Please sign in to comment.