From b53ba4b92d1d11cfdce124f861323b7ec2f30309 Mon Sep 17 00:00:00 2001
From: Jake Vanderplas <vanderplas@astro.washington.edu>
Date: Thu, 28 Jul 2011 19:38:07 -0700
Subject: [PATCH 01/51] manifold doc update

---
 doc/modules/manifold.rst | 70 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 7 deletions(-)

diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst
index 7af6b432ff5a8..e863f56a74eba 100644
--- a/doc/modules/manifold.rst
+++ b/doc/modules/manifold.rst
@@ -25,14 +25,49 @@ Algorithms for this task are based on the idea that the dimensionality of
 many data sets is only artificially high.
 
 
+.. figure:: ../auto_examples/manifold/images/plot_compare_methods.png
+   :target: ../auto_examples/manifold/plot_compare_methods.html
+   :align: center
+
 
 Locally Linear Embedding
-------------------------
+========================
+
+Locally linear embedding (LLE) seeks a lower-dimensional projection of the data
+which preserves distances within local neighborhoods.  It can be thought
+of as a series of local *Principal Component* analyses which are optimized
+to find the best nonlinear embedding.
 
 Locally linear embedding can be performed with function
 :func:`locally_linear_embedding` or its object-oriented counterpart
 :class:`LocallyLinearEmbedding`.
 
+``scikits.learn`` contains both standard LLE and several variants, which
+can be accessed via the ``method`` keyword in either of the above routines.
+They are:
+* ``method = 'standard'``: Standard LLE.  This is the most computationally
+  efficient of the LLE algorithms, but because of the necessity to
+  regularize the local weight matrix, often leads to projections which
+  distort the geometry of the embedded manifold.
+* ``method = 'hessian'``: Hessian LLE, also known as Hessian Eigenmapping.
+  This method addresses some of the distortion problems of standard LLE
+  by constructing a more complicated *hessian estimator* in each local
+  neighborhood. The size of the hessian estimator scales as
+  :math:`O[d^2]`, where :math:`d` is the dimension of the embedded manifold.
+  additionally, the number of nearest neighbors must be greater than
+  :math:`d(d+3)/2`.  Because of this, hessian LLE is not suitable for
+  some problems.
+* ``method = 'modified'``: Modified LLE. This routine uses multiple weight 
+  vectors within each local neighborhood to solve the regularization problem
+  of standard LLE.  While this adds to the cost of computing the embedding,
+  it is more efficient than the hessian LLE algorithm.
+* ``method = 'ltsa'``: Local Tangent Space Alignment.  Though philosophically
+  distinct from LLE, LTSA is included under LLE because it is algorithmically
+  very similar to LLE and its variants.  LTSA seeks subspaces which are
+  tangent to the data within local neighborhoods, and then performs a global
+  analysis to align these spaces.  Its compuational cost is similar to that
+  of Modified LLE.
+
 These take as input a set of points in a high-dimensional space and return
 those points embedded in a manifold of dimension specified by parameter
 ``out_dim``.
@@ -47,22 +82,43 @@ those points embedded in a manifold of dimension specified by parameter
     * See :ref:`example_manifold_plot_lle_digits.py` for an example of
       dimensionality reduction on handwritten digits.
 
-    * See :ref:`example_manifold_plot_swissroll.py` for an example of
-      locally linear embedding on the swiss roll.
+    * See :ref:`example_manifold_plot_compare_methods.py` for an example of
+      dimensionality reduction on a toy "S-curve" dataset.
 
 
 Complexity
 ----------
 
-The complete algorithm scales using the `dense` eigensolver scales as
+For standard LLE, the complete algorithm scales using the 
+`dense` eigensolver scales as
 :math:`O(N log(N)) + O(D N K^3) + O(d N^2)`, where N is the number of samples,
 D is the input dimension, d the output dimension and K the number of
-neighbors. If the `lobcpg` solver is used, the last term can be reduced to
-sub-quadratic in N.
+neighbors. If the `lobcpg` or `arpack` solver is used, 
+the last term can be reduced to sub-quadratic in N.
+
+Isomap
+======
+
+Another approach to manifold learning is the Isomap algorithm, short for
+Isometric Mapping.  Isomap seeks a lower-dimensional embedding which
+maintains geodesic distances between all points.
+
+Isomap can be performed with the function :func:`scikits.learn.manifold.isomap`
+or the object :class:`scikits.learn.manifold.Isomap`.
+
+Complexity
+----------
+The optimized isomap algorithm has a full cost of 
+:math:`O[N log(N)] + O[N^2 (k + log(N))] + O[d N^2]` where the final
+cost can be reduced to sub-quadratic through the use of ``ARPACK``.
+The main bottleneck is a shortest-path graph search of all the points.
+For this, `scikit-learn` uses either *Dijkstra's algorithm* with
+*Fibonacci trees*, or the *Floyd-Warshall algorithm*, depending on the
+parameters of the problem.
 
 
 Tips on practical use
----------------------
+=====================
 
 * Make sure the same scale is used over all features. Being this a
   nearest-neighbors method it will behave poorly otherwise.

From 9abb9fa71030a94d7aaf0af094c7ff43c20df714 Mon Sep 17 00:00:00 2001
From: Jake Vanderplas <vanderplas@astro.washington.edu>
Date: Mon, 8 Aug 2011 17:39:15 -0700
Subject: [PATCH 02/51] update manifold documentation

---
 doc/modules/classes.rst                   |   1 +
 doc/modules/manifold.rst                  | 352 ++++++++++++++++++----
 examples/manifold/plot_compare_methods.py |  68 ++---
 examples/manifold/plot_lle_digits.py      |  36 ++-
 4 files changed, 352 insertions(+), 105 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 8af8aba094d1e..cc9e23ea77deb 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -552,6 +552,7 @@ Manifold learning
     :template: class.rst
 
     manifold.LocallyLinearEmbedding
+    manifold.Isomap
 
 
 .. autosummary::
diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst
index e863f56a74eba..6b145ebdf9db0 100644
--- a/doc/modules/manifold.rst
+++ b/doc/modules/manifold.rst
@@ -19,15 +19,133 @@ Manifold learning
                  |             -- Baloo's song [The Jungle Book]
 
 
+.. |s_input_img| image:: ../auto_examples/manifold/images/plot_compare_methods_1.png
+    :target: ../auto_examples/manifold/plot_compare_methods.html
+    :scale: 50
+
+.. |s_mlle_img| image:: ../auto_examples/manifold/images/plot_compare_methods_5.png
+    :target: ../auto_examples/manifold/plot_compare_methods.html
+    :scale: 50
+
+.. centered:: |s_input_img| |s_mlle_img|
 
 Manifold learning is an approach to nonlinear dimensionality reduction.
 Algorithms for this task are based on the idea that the dimensionality of
 many data sets is only artificially high.
 
 
-.. figure:: ../auto_examples/manifold/images/plot_compare_methods.png
-   :target: ../auto_examples/manifold/plot_compare_methods.html
+Introduction
+============
+
+High-dimensional datasets can be very difficult to visualize.  While data
+in two or three dimensions can be plotted to show the inherent
+structure of the data, equivalent high-dimensional plots are much less
+intuitive.  To aid visualization of the structure of a dataset, the
+dimension must be reduced in some way.
+
+The simplest way to accomplish this dimensionality reduction is by taking
+a random projection of the data.  Though this allows some degree of
+visualization of the data structure, the randomness of the choice leaves much
+to be desired.  In a random projection, it is likely that the more
+interesting structure within the data will be lost.
+
+
+.. |digits_img| image:: ../auto_examples/manifold/images/plot_lle_digits_1.png
+    :target: ../auto_examples/manifold/plot_lle_digits.html
+    :scale: 50
+
+.. |projected_img| image::  ../auto_examples/manifold/images/plot_lle_digits_2.png
+    :target: ../auto_examples/manifold/plot_lle_digits.html
+    :scale: 50
+
+.. centered:: |digits_img| |projected_img|
+
+
+To address this concern, a number of supervised and unsupervised linear
+dimensionality reduction frameworks have been designed, such as Principal
+Component Analysis (PCA), Independent Component Analysis, Linear 
+Discriminant Analysis, and others.  These algorithms define specific 
+rubrics to choose an "interesting" linear projection of the data.
+These methods can be powerful, but often miss important nonlinear 
+structure in the data.
+
+
+.. |PCA_img| image:: ../auto_examples/manifold/images/plot_lle_digits_3.png
+    :target: ../auto_examples/manifold/plot_lle_digits.html
+    :scale: 50
+
+.. |LDA_img| image::  ../auto_examples/manifold/images/plot_lle_digits_4.png
+    :target: ../auto_examples/manifold/plot_lle_digits.html
+    :scale: 50
+
+.. centered:: |PCA_img| |LDA_img|
+
+Manifold Learning can be thought of as an attempt to generalize linear
+frameworks like PCA to be sensitive to nonlinear structure in data. Though
+supervised variants exist, the typical manifold learning problem is
+unsupervised: it learns the high-dimensional structure of the data
+from the data itself, without the use of predetermined classifications.
+
+
+.. topic:: Examples:
+
+    * See :ref:`example_manifold_plot_lle_digits.py` for an example of
+      dimensionality reduction on handwritten digits.
+
+    * See :ref:`example_manifold_plot_compare_methods.py` for an example of
+      dimensionality reduction on a toy "S-curve" dataset.
+
+The manifold learning implementations available in scikits-learn are
+summarized below
+
+Isomap
+======
+
+One of the earliest approaches to manifold learning is the Isomap
+algorithm, short for Isometric Mapping.  Isomap can be viewed as an
+extension of Multi-dimensional Scaling (MDS) or Kernel PCA.
+Isomap seeks a lower-dimensional embedding which maintains geodesic
+distances between all points.  Isomap can be performed with the object
+:class:`Isomap`.
+
+.. figure:: ../auto_examples/manifold/images/plot_lle_digits_5.png
+   :target: ../auto_examples/manifold/plot_lle_digits.html
    :align: center
+   :scale: 50
+
+Complexity
+----------
+The Isomap algorithm comprises three stages:
+
+1. **Nearest neighbor search.**  Isomap uses
+   :class:`scikits.learn.neighbors.BallTree` for efficient neighbor search.
+   The cost is approximately :math:`O[D \log(k) N \log(N)]`, for :math:`k`
+   nearest neighbors of :math:`N` points in :math:`D` dimensions.
+
+2. **Shortest-path graph search.**  The most efficient known algorithms
+   for this are *Dijkstra's Algorithm*, which is approximately 
+   :math:`O[N^2(k + \log(N))]`, or the *Floyd-Warshall algorithm*, which
+   is :math:`O[N^3]`.  The algorithm can be selected by the user with
+   the ``path_method`` keyword of ``Isomap``.  If unspecified, the code
+   attempts to choose the best algorithm for the input data.
+
+3. **Partial eigenvalue decomposition.**  The embedding is encoded in the 
+   eigenvectors corresponding to the :math:`d` largest eigenvalues of the
+   :math:`N \times N` isomap kernel.  For a dense solver, the cost is
+   approximately :math:`O[d N^2]`.  This cost can often be improved using
+   the ``ARPACK`` solver.  The eigensolver can be specified by the user
+   with the ``path_method`` keyword of ``Isomap``.  If unspecified, the
+   code attempts to choose the best algorithm for the input data.
+
+The overall complexity of Isomap is
+:math:`O[D \log(k) N \log(N)] + O[N^2(k + \log(N))] + O[d N^2]`.
+
+
+.. topic:: References:
+
+   * `"A global geometric framework for nonlinear dimensionality reduction"
+     <http://www.sciencemag.org/content/290/5500/2319.full>`_
+     Tenenbaum, J.B.; De Silva, V.; & Langford, J.C.  Science 290 (5500)
 
 
 Locally Linear Embedding
@@ -35,93 +153,199 @@ Locally Linear Embedding
 
 Locally linear embedding (LLE) seeks a lower-dimensional projection of the data
 which preserves distances within local neighborhoods.  It can be thought
-of as a series of local *Principal Component* analyses which are optimized
-to find the best nonlinear embedding.
+of as a series of local Principal Component Analyses which are globally
+compared to find the best nonlinear embedding.
 
 Locally linear embedding can be performed with function
 :func:`locally_linear_embedding` or its object-oriented counterpart
 :class:`LocallyLinearEmbedding`.
 
-``scikits.learn`` contains both standard LLE and several variants, which
-can be accessed via the ``method`` keyword in either of the above routines.
-They are:
-* ``method = 'standard'``: Standard LLE.  This is the most computationally
-  efficient of the LLE algorithms, but because of the necessity to
-  regularize the local weight matrix, often leads to projections which
-  distort the geometry of the embedded manifold.
-* ``method = 'hessian'``: Hessian LLE, also known as Hessian Eigenmapping.
-  This method addresses some of the distortion problems of standard LLE
-  by constructing a more complicated *hessian estimator* in each local
-  neighborhood. The size of the hessian estimator scales as
-  :math:`O[d^2]`, where :math:`d` is the dimension of the embedded manifold.
-  additionally, the number of nearest neighbors must be greater than
-  :math:`d(d+3)/2`.  Because of this, hessian LLE is not suitable for
-  some problems.
-* ``method = 'modified'``: Modified LLE. This routine uses multiple weight 
-  vectors within each local neighborhood to solve the regularization problem
-  of standard LLE.  While this adds to the cost of computing the embedding,
-  it is more efficient than the hessian LLE algorithm.
-* ``method = 'ltsa'``: Local Tangent Space Alignment.  Though philosophically
-  distinct from LLE, LTSA is included under LLE because it is algorithmically
-  very similar to LLE and its variants.  LTSA seeks subspaces which are
-  tangent to the data within local neighborhoods, and then performs a global
-  analysis to align these spaces.  Its compuational cost is similar to that
-  of Modified LLE.
-
-These take as input a set of points in a high-dimensional space and return
-those points embedded in a manifold of dimension specified by parameter
-``out_dim``.
-
-.. figure:: ../auto_examples/manifold/images/plot_lle_digits_3.png
+.. figure:: ../auto_examples/manifold/images/plot_lle_digits_6.png
    :target: ../auto_examples/manifold/plot_lle_digits.html
    :align: center
+   :scale: 50
 
+Complexity
+----------
 
-.. topic:: Examples:
+The standard LLE algorithm comprises three stages:
 
-    * See :ref:`example_manifold_plot_lle_digits.py` for an example of
-      dimensionality reduction on handwritten digits.
+1. **Nearest Neighbors Search**.  See discussion under Isomap above.
 
-    * See :ref:`example_manifold_plot_compare_methods.py` for an example of
-      dimensionality reduction on a toy "S-curve" dataset.
+2. **Weight Matrix Construction**. :math:`O[D N k^3]`.
+   The construction of the LLE weight matrix involves the solution of a
+   :math:`k \times k` linear equation for each of the :math:`N` local
+   neighborhoods
 
+3. **Partial Eigenvalue Decomposition**. See discussion under Isomap above.
 
+The overall complexity of standard LLE is
+:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`.
+
+
+.. topic:: References:
+   
+   * `"Nonlinear dimensionality reduction by locally linear embedding"
+     <http://www.sciencemag.org/content/290/5500/2323.full>`_
+     Roweis, S. & Saul, L.  Science 290:2323 (2000)
+
+
+Modified Locally Linear Embedding
+=================================
+
+One well-known issue with LLE is the regularization problem.  When the number
+of neighbors is greater than the number of input dimensions, the matrix
+defining each local neighborhood is rank-deficient.  To address this, standard
+LLE applies an arbitrary regularization parameter :math:`r`, which is chosen
+relative to the trace of the local weight matrix.  Though it can be shown
+formally that as :math:`r \to 0`, the solution coverges to the desired
+embedding, there is no guarantee that the optimal solution will be found
+for :math:`r > 0`.  This problem manifests itself in embeddings which distort
+the underlying geometry of the manifold.
+
+One method to address the regularization problem is to use multiple weight
+vectors in each neighborhood.  This is the essence of *modified locally
+linear embedding* (MLLE).  MLLE can be  performed with function
+:func:`locally_linear_embedding` or its object-oriented counterpart
+:class:`LocallyLinearEmbedding`, with the keyword ``method = 'modified'``.
+It requires ``n_neighbors > out_dim``.
+
+.. figure:: ../auto_examples/manifold/images/plot_lle_digits_7.png
+   :target: ../auto_examples/manifold/plot_lle_digits.html
+   :align: center
+   :scale: 50
+   
 Complexity
 ----------
 
-For standard LLE, the complete algorithm scales using the 
-`dense` eigensolver scales as
-:math:`O(N log(N)) + O(D N K^3) + O(d N^2)`, where N is the number of samples,
-D is the input dimension, d the output dimension and K the number of
-neighbors. If the `lobcpg` or `arpack` solver is used, 
-the last term can be reduced to sub-quadratic in N.
+The MLLE algorithm comprises three stages:
 
-Isomap
-======
+1. **Nearest Neighbors Search**.  Same as standard LLE
+
+2. **Weight Matrix Construction**. Approximately
+   :math:`O[D N k^3] + O[N (k-D) k^2]`.  The first term is exactly equivalent
+   to that of standard LLE.  The second term has to do with constructing the
+   weight matrix from multiple weights.  In practice, the added cost of 
+   constructing the MLLE weight matrix is relatively small compared to the
+   cost of steps 1 and 3.
+
+3. **Partial Eigenvalue Decomposition**. Same as standard LLE
+
+The overall complexity of MLLE is
+:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N (k-D) k^2] + O[d N^2]`.
+
+
+.. topic:: References:
+     
+   * `"MLLE: Modified Locally Linear Embedding Using Multiple Weights"
+     <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382>`_
+     Zhang, Z. & Wang, J.
+
+
+Hessian Eigenmapping
+====================
+
+Hessian Eigenmapping (also known as Hessian-based LLE: HLLE) is another method
+of solving the regularization problem of LLE.  It revolves around a
+hessian-based quadratic form at each neighborhood which is used to recover
+the locally linear structure.  Though other implementations note its poor
+scaling with data size, ``scikits-learn`` implements some algorithmic
+improvements which make its cost comparable to that of other LLE variants
+for small output dimension.  HLLE can be  performed with function
+:func:`locally_linear_embedding` or its object-oriented counterpart
+:class:`LocallyLinearEmbedding`, with the keyword ``method = 'hessian'``.
+It requires ``n_neighbors > out_dim * (out_dim + 3) / 2``.
+
+.. figure:: ../auto_examples/manifold/images/plot_lle_digits_8.png
+   :target: ../auto_examples/manifold/plot_lle_digits.html
+   :align: center
+   :scale: 50
+   
+Complexity
+----------
+
+The HLLE algorithm comprises three stages:
+
+1. **Nearest Neighbors Search**.  Same as standard LLE
+
+2. **Weight Matrix Construction**. Approximately
+   :math:`O[D N k^3] + O[N d^6]`.  The first term reflects a similar
+   cost as standard LLE.  The second term comes from a QR decomposition
+   of the local hessian estimator.
+
+3. **Partial Eigenvalue Decomposition**. Same as standard LLE
 
-Another approach to manifold learning is the Isomap algorithm, short for
-Isometric Mapping.  Isomap seeks a lower-dimensional embedding which
-maintains geodesic distances between all points.
+The overall complexity of standard HLLE is
+:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N d^6] + O[d N^2]`.
 
-Isomap can be performed with the function :func:`scikits.learn.manifold.isomap`
-or the object :class:`scikits.learn.manifold.Isomap`.
 
+.. topic:: References:
+
+   * `"Hessian Eigenmaps: Locally linear embedding techniques for
+     high-dimensional data" <http://www.pnas.org/content/100/10/5591>`_
+     Donoho, D. & Grimes, C. Proc Natl Acad Sci USA. 100:5591 (2003)
+
+
+Local Tangent Space Alignment
+=============================
+
+Though not technically a variant of LLE, Local tangent space alignment (LTSA)
+is algorithmically similar enough to LLE that it can be put in this category.
+Rather than focusing on preserving neighborhood distances as in LLE, LTSA
+seeks to characterize the local geometry at each neighborhood via its
+tangent space, and performs a global optimization to align these local 
+tangent spaces to learn the embedding.  LTSA can be performed with function
+:func:`locally_linear_embedding` or its object-oriented counterpart
+:class:`LocallyLinearEmbedding`, with the keyword ``method = 'ltsa'``.
+
+.. figure:: ../auto_examples/manifold/images/plot_lle_digits_9.png
+   :target: ../auto_examples/manifold/plot_lle_digits.html
+   :align: center
+   :scale: 50
+   
 Complexity
 ----------
-The optimized isomap algorithm has a full cost of 
-:math:`O[N log(N)] + O[N^2 (k + log(N))] + O[d N^2]` where the final
-cost can be reduced to sub-quadratic through the use of ``ARPACK``.
-The main bottleneck is a shortest-path graph search of all the points.
-For this, `scikit-learn` uses either *Dijkstra's algorithm* with
-*Fibonacci trees*, or the *Floyd-Warshall algorithm*, depending on the
-parameters of the problem.
+
+The LTSA algorithm comprises three stages:
+
+1. **Nearest Neighbors Search**.  Same as standard LLE
+
+2. **Weight Matrix Construction**. Approximately
+   :math:`O[D N k^3] + O[k^2 d]`.  The first term reflects a similar
+   cost as standard LLE.
+
+3. **Partial Eigenvalue Decomposition**. Same as standard LLE
+
+The overall complexity of standard LTSA is
+:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[k^2 d] + O[d N^2]`.
+
+
+.. topic:: References:
+
+   * `"Principal manifolds and nonlinear dimensionality reduction via
+     tangent space alignment"
+     <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.3693>`_
+     Zhang, Z. & Zha, H. Journal of Shanghai Univ. 8:406 (2004)
 
 
 Tips on practical use
 =====================
 
-* Make sure the same scale is used over all features. Being this a
-  nearest-neighbors method it will behave poorly otherwise.
+* Make sure the same scale is used over all features. Because manifold
+  learning methods are based on a nearest-neighbor search, the algorithm
+  may perform poorly otherwise.
+
+* On certain problems, the `lobcpg` solver available in the LLE variants
+  might converge slowly. Supply a generous value for `max_iter` if big 
+  oscillations are detected between runs.  ARPACK is generally a more stable
+  iterative solver.
+
+* The reconstruction error computed by each routine can be used to choose
+  the optimal output dimension.  For a :math:`d`-dimensional manifold embedded
+  in a :math:`D`-dimensional parameter space, the reconstruction error will
+  decrease as ``out_dim`` is increased until ``out_dim == d``.
 
-* On certain problems, the `lobcpg` solver might converge slowly. Supply a
-  generous value for `max_iter` if big oscillations are detected between runs.
+* Note that noisy data can "short-circuit" the manifold, in essence acting
+  as a bridge between parts of the manifold that would otherwise be
+  well-separated.  Manifold learning on noisy and/or incomplete data is
+  an active area of research.
\ No newline at end of file
diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py
index 803cb84154281..9e1e72f2df4f6 100644
--- a/examples/manifold/plot_compare_methods.py
+++ b/examples/manifold/plot_compare_methods.py
@@ -4,18 +4,7 @@
 =========================================
 
 An illustration of dimensionality reduction on the S-curve dataset
-with various manifold learning methods.  The methods are as follows:
-
-* LLE : Standard Locally Linear Embedding.
-  :func:`scikits.learn.manifold.locally_linear`, ``method = 'standard'``
-* LTSA : Local Tangent Space Alignment.
-  :func:`scikits.learn.manifold.locally_linear`, ``method = 'ltsa'``
-* Hessian LLE : Hessian Eigenmapping.
-  :func:`scikits.learn.manifold.locally_linear`, ``method = 'hessian``
-* Modified LLE : Modified Locally Linear Embedding with multiple weights.
-  :func:`scikits.learn.manifold.locally_linear`, ``method = 'modified'``
-* Isomap : Isometric Mapping.
-  :func:`scikits.learn.manifold.Isomap`
+with various manifold learning methods.
 
 For a discussion and comparison of these algorithms, see the
 :ref:`manifold module page <manifold>`
@@ -34,50 +23,49 @@
 
 from scikits.learn import manifold, datasets
 
-X, color = datasets.samples_generator.make_s_curve(1000)
+n_points = 1000
+X, color = datasets.samples_generator.make_s_curve(n_points)
 n_neighbors = 8
 out_dim = 2
 
-methods = ['standard', 'ltsa', 'hessian', 'modified']
-labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE']
+def scatter_3D(X, title):
+    fig = pylab.figure()
+    try:
+        # compatibility matplotlib < 1.0
+        ax = fig.add_axes((0,0,1,1), projection='3d')
+        ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pylab.cm.Spectral)
+        ax.view_init(4, -72)
+    except:
+        ax = fig.add_subplot(321, projection='3d')
+        ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pylab.cm.Spectral)
+
+    ax.set_title(title)
+
+def scatter_2D(X, title):
+    fig = pylab.figure()
+    ax = fig.add_subplot(111)
+    ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=pylab.cm.Spectral)
+    ax.set_title(title)
+    ax.xaxis.set_major_formatter(NullFormatter())
+    ax.yaxis.set_major_formatter(NullFormatter())
 
-fig = pylab.figure(figsize=(8, 12))
-pylab.suptitle("Manifold Learning with %i points, %i neighbors"
-               % (1000, n_neighbors), fontsize=14)
 
-try:
-    # compatibility matplotlib < 1.0
-    ax = fig.add_subplot(321, projection='3d')
-    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pylab.cm.Spectral)
-    ax.view_init(4, -72)
-except:
-    ax = fig.add_subplot(321, projection='3d')
-    ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pylab.cm.Spectral)
+scatter_3D(X, "Input data: %i points in 3 dimensions" % n_points)
 
-ax.set_title('Original Data')
+methods = ['standard', 'ltsa', 'hessian', 'modified']
+labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE']    
 
 for i, method in enumerate(methods):
     t0 = time()
     Y, err = manifold.locally_linear_embedding(
         X, n_neighbors, out_dim, eigen_solver='arpack', method=method)
     t1 = time()
-    print "%s: %.2g sec" % (methods[i], t1 - t0)
-    print ' err = %.2e' % err
 
-    ax = fig.add_subplot(322 + i)
-    ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=pylab.cm.Spectral)
-    ax.set_title("%s (%.2g sec)" % (labels[i], t1 - t0))
-    ax.xaxis.set_major_formatter(NullFormatter())
-    ax.yaxis.set_major_formatter(NullFormatter())
+    scatter_2D(Y, "%s (%.2g sec)" % (labels[i], t1 - t0))
 
 t0 = time()
 Y = manifold.Isomap(n_neighbors, out_dim).fit_transform(X)
 t1 = time()
-print "Isomap: %.2g sec" % (t1 - t0)
-ax = fig.add_subplot(326)
-ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=pylab.cm.Spectral)
-ax.set_title("Isomap (%.2g sec)" % (t1 - t0))
-ax.xaxis.set_major_formatter(NullFormatter())
-ax.yaxis.set_major_formatter(NullFormatter())
+scatter_2D(Y, "Isomap (%.2g sec)" % (t1 - t0))
 
 pylab.show()
diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py
index 3c74705fd73f9..f75198635369c 100644
--- a/examples/manifold/plot_lle_digits.py
+++ b/examples/manifold/plot_lle_digits.py
@@ -25,6 +25,22 @@
 n_samples, n_features = X.shape
 n_neighbors = 30
 
+
+#----------------------------------------------------------------------
+# Plot images of the digits
+N = 20
+img = np.zeros((10 * N, 10 * N))
+for i in range(N):
+    ix = 10*i + 1
+    for j in range(N):
+        iy = 10 * j + 1
+        img[ix:ix + 8, iy:iy + 8] = X[i*N+j].reshape((8,8))
+pl.imshow(img, cmap=pl.cm.binary)
+pl.xticks([])
+pl.yticks([])
+pl.title('A selection from the 64-dimensional digits dataset')
+
+
 #----------------------------------------------------------------------
 # Random 2D projection using a random unitary matrix
 print "Computing random projection"
@@ -63,6 +79,22 @@
 print "Done. Reconstruction error: %g" % err
 
 
+#----------------------------------------------------------------------
+# Hessian Locally linear embedding of the digits dataset
+print "Computing hessian LLE embedding"
+X_hessian, err = manifold.locally_linear_embedding(X, n_neighbors, 2,
+                                                   method='hessian')
+print "Done. Reconstruction error: %g" % err
+
+
+#----------------------------------------------------------------------
+# LTSA embedding of the digits dataset
+print "Computing LTSA embedding"
+X_ltsa, err = manifold.locally_linear_embedding(X, n_neighbors, 2,
+                                                method='ltsa')
+print "Done. Reconstruction error: %g" % err
+
+
 #----------------------------------------------------------------------
 # Isomap projection of the digits dataset
 print "Computing Isomap embedding"
@@ -103,8 +135,10 @@ def plot_embedding(X, title=None):
 plot_embedding(X_projected, "Random Projection of the digits")
 plot_embedding(X_pca, "Principal Components projection of the digits")
 plot_embedding(X_lda, "Linear Discriminant projection of the digits")
+plot_embedding(X_iso, "Isomap projection of the digits")
 plot_embedding(X_lle, "Locally Linear Embedding of the digits")
 plot_embedding(X_mlle, "Modified Locally Linear Embedding of the digits")
-plot_embedding(X_iso, "Isomap projection of the digits")
+plot_embedding(X_hessian, "Hessian Eigenmap Embedding of the digits")
+plot_embedding(X_ltsa, "Local Tangent Space Alignment of the digits")
 
 pl.show()

From bb5fe7e51a7cfba5edf4276e48b49bdf52ae3171 Mon Sep 17 00:00:00 2001
From: Jean  KOSSAIFI <jkossaifi@is208616.intra.cea.fr>
Date: Tue, 9 Aug 2011 10:51:07 +0200
Subject: [PATCH 03/51] Added normalize parameter to LinearModel

In fit_intercept, there is now a normalize parameter (default=False)
to normalize data.
---
 scikits/learn/linear_model/base.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index d71701b2c122f..542ee9ba31021 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -47,7 +47,7 @@ def predict(self, X):
         return safe_sparse_dot(X, self.coef_.T) + self.intercept_
 
     @staticmethod
-    def _center_data(X, y, fit_intercept):
+    def _center_data(X, y, fit_intercept, normalize=False):
         """
         Centers data to have mean zero along axis 0. This is here because
         nearly all linear models will want their data to be centered.
@@ -56,20 +56,29 @@ def _center_data(X, y, fit_intercept):
         if fit_intercept:
             if scipy.sparse.issparse(X):
                 Xmean = np.zeros(X.shape[1])
+                Xstd = np.ones(X.shape[1])
             else:
                 Xmean = X.mean(axis=0)
                 X = X - Xmean
+                if normalize:
+                    Xstd = X.std(axis=0)
+                    Xstd[Xstd==0] = 1
+                    X = X / Xstd
+                else:
+                    Xstd = np.ones(X.shape[1])
             ymean = y.mean()
             y = y - ymean
         else:
             Xmean = np.zeros(X.shape[1])
+            Xstd = np.ones(X.shape[1])
             ymean = 0.
-        return X, y, Xmean, ymean
+        return X, y, Xmean, ymean, Xstd
 
-    def _set_intercept(self, Xmean, ymean):
+    def _set_intercept(self, Xmean, ymean, Xstd):
         """Set the intercept_
         """
         if self.fit_intercept:
+            w = w / Xstd
             self.intercept_ = ymean - np.dot(Xmean, self.coef_.T)
         else:
             self.intercept_ = 0

From a6ee9688485b4a5674313ae9d0d5e9cf45c97f5b Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Tue, 9 Aug 2011 12:31:45 +0200
Subject: [PATCH 04/51] Added parameter normalize to LinearRegression

---
 scikits/learn/linear_model/base.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index 542ee9ba31021..c4a355b537edc 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -103,8 +103,9 @@ class LinearRegression(LinearModel):
 
     """
 
-    def __init__(self, fit_intercept=True):
+    def __init__(self, fit_intercept=True, normalize=False):
         self.fit_intercept = fit_intercept
+        self.normalize = False
 
     def fit(self, X, y, **params):
         """
@@ -129,12 +130,12 @@ def fit(self, X, y, **params):
         X = np.asanyarray(X)
         y = np.asanyarray(y)
 
-        X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
+        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept)
 
         self.coef_, self.residues_, self.rank_, self.singular_ = \
                 np.linalg.lstsq(X, y)
 
-        self._set_intercept(Xmean, ymean)
+        self._set_intercept(Xmean, ymean, Xstd)
         return self
 
 ##

From 4c43f8e643db5094ca6d925628cbb00ba7f6c1f7 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Tue, 9 Aug 2011 13:35:53 +0200
Subject: [PATCH 05/51] LassoLARS now uses the normalize parameter

---
 scikits/learn/linear_model/least_angle.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index 0fd9bb5f2b89c..01b221547d137 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -386,7 +386,7 @@ def fit(self, X, y, overwrite_X=False, **params):
         X = np.atleast_2d(X)
         y = np.atleast_1d(y)
 
-        X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
+        X, y, Xmean, ymean, Xstd = self._center_data(X, y, self.fit_intercept, self.normalize)
         alpha = getattr(self, 'alpha', 0.)
         if hasattr(self, 'n_nonzero_coefs'):
             alpha = 0. # n_nonzero_coefs parametrization takes priority
@@ -394,14 +394,6 @@ def fit(self, X, y, overwrite_X=False, **params):
         else:
             max_iter = self.max_iter
 
-        if self.normalize:
-            norms = np.sqrt(np.sum(X ** 2, axis=0))
-            nonzeros = np.flatnonzero(norms)
-            if not overwrite_X:
-                X = X.copy()
-                overwrite_X = True
-            X[:, nonzeros] /= norms[nonzeros]
-
         # precompute if n_samples > n_features
         precompute = self.precompute
         if hasattr(precompute, '__array__'):
@@ -418,11 +410,9 @@ def fit(self, X, y, overwrite_X=False, **params):
                   method=self.method, verbose=self.verbose,
                   max_iter=max_iter, eps=self.eps)
 
-        if self.normalize:
-            self.coef_path_ /= norms[:, np.newaxis]
         self.coef_ = self.coef_path_[:, -1]
 
-        self._set_intercept(Xmean, ymean)
+        self._set_intercept(Xmean, ymean, Xstd)
 
         return self
 

From 52a0158345b15ae776bff71d117daea9cc3f3390 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Tue, 9 Aug 2011 16:09:55 +0200
Subject: [PATCH 06/51] Completed the integration of the parameter normalize

---
 scikits/learn/linear_model/base.py        | 6 ++++--
 scikits/learn/linear_model/least_angle.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index c4a355b537edc..4b26736d6cf97 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -78,7 +78,7 @@ def _set_intercept(self, Xmean, ymean, Xstd):
         """Set the intercept_
         """
         if self.fit_intercept:
-            w = w / Xstd
+            self.coef_ = self.coef_ / Xstd
             self.intercept_ = ymean - np.dot(Xmean, self.coef_.T)
         else:
             self.intercept_ = 0
@@ -121,6 +121,8 @@ def fit(self, X, y, **params):
             wether to calculate the intercept for this model. If set
             to false, no intercept will be used in calculations
             (e.g. data is expected to be already centered).
+        normalize : boolean, optional
+            If True, the regressors X are normalized
 
         Returns
         -------
@@ -130,7 +132,7 @@ def fit(self, X, y, **params):
         X = np.asanyarray(X)
         y = np.asanyarray(y)
 
-        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept)
+        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
 
         self.coef_, self.residues_, self.rank_, self.singular_ = \
                 np.linalg.lstsq(X, y)
diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index 01b221547d137..b9e07425772b6 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -323,7 +323,7 @@ class Lars(LinearModel):
     eps: float, optional
         The machine-precision regularization in the computation of the
         Cholesky diagonal factors. Increase this for very ill-conditioned
-        systems. Unlike the 'tol' parameter in some iterative 
+        systems. Unlike the 'tol' parameter in some iterative
         optimization-based algorithms, this parameter does not control
         the tolerance of the optimization.
 

From f1a7454767cc7d3e0753f21033a073ef25ba5404 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Tue, 9 Aug 2011 16:10:30 +0200
Subject: [PATCH 07/51] Implementation of the parameter normalize in bayes.py

---
 scikits/learn/linear_model/bayes.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/scikits/learn/linear_model/bayes.py b/scikits/learn/linear_model/bayes.py
index 4a263801c19e3..f195db823b95c 100644
--- a/scikits/learn/linear_model/bayes.py
+++ b/scikits/learn/linear_model/bayes.py
@@ -64,6 +64,14 @@ class BayesianRidge(LinearModel):
         (e.g. data is expected to be already centered).
         Default is True.
 
+    normalize : boolean, optional
+            If True, the regressors X are normalized
+            Default is False
+
+    verbose : boolean, optional
+            Verbose mode when fitting the model. Default is False.
+
+
     Attributes
     ----------
     `coef_` : array, shape = (n_features)
@@ -104,7 +112,7 @@ class BayesianRidge(LinearModel):
 
     def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
                 lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,
-                fit_intercept=True, verbose=False):
+                fit_intercept=True, normalize=False, verbose=False):
         self.n_iter = n_iter
         self.eps = eps
         self.alpha_1 = alpha_1
@@ -113,6 +121,7 @@ def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
         self.lambda_2 = lambda_2
         self.compute_score = compute_score
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.verbose = verbose
 
     def fit(self, X, y, **params):
@@ -132,7 +141,7 @@ def fit(self, X, y, **params):
         self._set_params(**params)
         X = np.asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
-        X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
+        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
         n_samples, n_features = X.shape
 
         ### Initialization of the values of the parameters
@@ -206,7 +215,7 @@ def fit(self, X, y, **params):
         self.lambda_ = lambda_
         self.coef_ = coef_
 
-        self._set_intercept(Xmean, ymean)
+        self._set_intercept(Xmean, ymean, Xstd)
         return self
 
 
@@ -267,6 +276,9 @@ class ARDRegression(LinearModel):
         (e.g. data is expected to be already centered).
         Default is True.
 
+    normalize : boolean, optional
+            If True, the regressors X are normalized
+
     verbose : boolean, optional
         Verbose mode when fitting the model. Default is False.
 
@@ -313,10 +325,11 @@ class ARDRegression(LinearModel):
 
     def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
                   lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,
-                  threshold_lambda=1.e+4, fit_intercept=True, verbose=False):
+                  threshold_lambda=1.e+4, fit_intercept=True, normalize=False, verbose=False):
         self.n_iter = n_iter
         self.eps = eps
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.alpha_1 = alpha_1
         self.alpha_2 = alpha_2
         self.lambda_1 = lambda_1
@@ -351,7 +364,7 @@ def fit(self, X, y, **params):
         n_samples, n_features = X.shape
         coef_ = np.zeros(n_features)
 
-        X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
+        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
 
         ### Launch the convergence loop
         keep_lambda = np.ones(n_features, dtype=bool)
@@ -417,5 +430,5 @@ def fit(self, X, y, **params):
         self.alpha_ = alpha_
         self.sigma_ = sigma_
 
-        self._set_intercept(Xmean, ymean)
+        self._set_intercept(Xmean, ymean, Xstd)
         return self

From a3f7cc4cec8afba0b79be3fc41d40d46bcedf992 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Tue, 9 Aug 2011 16:22:50 +0200
Subject: [PATCH 08/51] added parameter normalize to coordinate_descent

---
 .../learn/linear_model/coordinate_descent.py  | 53 +++++++++++++------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/scikits/learn/linear_model/coordinate_descent.py b/scikits/learn/linear_model/coordinate_descent.py
index 55af2b422acd1..f77b3621dee43 100644
--- a/scikits/learn/linear_model/coordinate_descent.py
+++ b/scikits/learn/linear_model/coordinate_descent.py
@@ -30,13 +30,16 @@ class ElasticNet(LinearModel):
 
     rho : float
         The ElasticNet mixing parameter, with 0 < rho <= 1. For rho = 0
-        the penalty is an L1 penalty. For rho = 1 it is an L2 penalty. 
+        the penalty is an L1 penalty. For rho = 1 it is an L2 penalty.
         For 0 < rho < 1, the penalty is a combination of L1 and L2
 
     fit_intercept: bool
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered.
 
+    normalize : boolean, optional
+        If True, the regressors X are normalized
+
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to 'auto' let us decide. The Gram
@@ -68,17 +71,18 @@ class ElasticNet(LinearModel):
         a*L1 + b*L2
 
     for::
-        
+
         alpha = a + b and rho = a/(a+b)
 
     """
 
     def __init__(self, alpha=1.0, rho=0.5, fit_intercept=True,
-                 precompute='auto', max_iter=1000, tol=1e-4):
+                 normalize=False, precompute='auto', max_iter=1000, tol=1e-4):
         self.alpha = alpha
         self.rho = rho
         self.coef_ = None
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.precompute = precompute
         self.max_iter = max_iter
         self.tol = tol
@@ -112,7 +116,8 @@ def fit(self, X, y, Xy=None, coef_init=None, **params):
         X = np.asanyarray(X, dtype=np.float64)
         y = np.asanyarray(y, dtype=np.float64)
 
-        X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
+        X, y, Xmean, ymean, Xstd = self._center_data(X, y, self.fit_intercept,
+                self.normalize)
 
         if coef_init is None:
             self.coef_ = np.zeros(X.shape[1], dtype=np.float64)
@@ -146,7 +151,7 @@ def fit(self, X, y, Xy=None, coef_init=None, **params):
                     cd_fast.enet_coordinate_descent_gram(self.coef_, alpha,
                                 beta, Gram, Xy, y, self.max_iter, self.tol)
 
-        self._set_intercept(Xmean, ymean)
+        self._set_intercept(Xmean, ymean, Xstd)
 
         if self.dual_gap_ > self.eps_:
             warnings.warn('Objective did not converge, you might want'
@@ -175,6 +180,9 @@ class Lasso(ElasticNet):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
+    normalize : boolean, optional
+        If True, the regressors X are normalized
+
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to 'auto' let us decide. The Gram
@@ -222,7 +230,7 @@ class Lasso(ElasticNet):
     should be directly passed as a fortran contiguous numpy array.
     """
 
-    def __init__(self, alpha=1.0, fit_intercept=True,
+    def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
                  precompute='auto', max_iter=1000, tol=1e-4):
         super(Lasso, self).__init__(alpha=alpha, rho=1.0,
                             fit_intercept=fit_intercept,
@@ -234,7 +242,7 @@ def __init__(self, alpha=1.0, fit_intercept=True,
 # Classes to store linear models along a regularization path
 
 def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
-               verbose=False, **fit_params):
+               normalize=False, verbose=False, **fit_params):
     """Compute Lasso path with coordinate descent
 
     Parameters
@@ -260,6 +268,9 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
     fit_params : kwargs
         keyword arguments passed to the Lasso fit method
 
+    normalize : boolean, optional
+        If True, the regressors X are normalized
+
     Returns
     -------
     models : a list of models along the regularization path
@@ -272,11 +283,12 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
     should be directly passed as a fortran contiguous numpy array.
     """
     return enet_path(X, y, rho=1., eps=eps, n_alphas=n_alphas, alphas=alphas,
-                  fit_intercept=fit_intercept, verbose=verbose, **fit_params)
+                  fit_intercept=fit_intercept, normalize=normalize,
+                  verbose=verbose, **fit_params)
 
 
 def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
-              fit_intercept=True, verbose=False, **fit_params):
+              fit_intercept=True, normalize=False, verbose=False, **fit_params):
     """Compute Elastic-Net path with coordinate descent
 
     Parameters
@@ -306,6 +318,9 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
     fit_params : kwargs
         keyword arguments passed to the Lasso fit method
 
+    normalize : boolean, optional
+        If True, the regressors X are normalized
+
     Returns
     -------
     models : a list of models along the regularization path
@@ -314,7 +329,8 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
     -----
     See examples/plot_lasso_coordinate_descent_path.py for an example.
     """
-    X, y, Xmean, ymean = LinearModel._center_data(X, y, fit_intercept)
+    X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y,
+            fit_intercept, normalize)
     X = np.asfortranarray(X)  # make data contiguous in memory
 
     n_samples = X.shape[0]
@@ -335,11 +351,12 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
             fit_params['Xy'] = np.dot(X.T, y)
 
     for alpha in alphas:
-        model = ElasticNet(alpha=alpha, rho=rho, fit_intercept=False)
+        model = ElasticNet(alpha=alpha, rho=rho, fit_intercept=False,
+                normalize=False)
         model.fit(X, y, coef_init=coef_, **fit_params)
         if fit_intercept:
             model.fit_intercept = True
-            model._set_intercept(Xmean, ymean)
+            model._set_intercept(Xmean, ymean, Xstd)
         if verbose:
             print model
         coef_ = model.coef_.copy()
@@ -350,13 +367,14 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
 class LinearModelCV(LinearModel):
     """Base class for iterative model fitting along a regularization path"""
 
-    def __init__(self, eps=1e-3, n_alphas=100, alphas=None,
-                 fit_intercept=True, precompute='auto', max_iter=1000,
+    def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
+            normalize=False, precompute='auto', max_iter=1000,
                  tol=1e-4, cv=None):
         self.eps = eps
         self.n_alphas = n_alphas
         self.alphas = alphas
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.precompute = precompute
         self.max_iter = max_iter
         self.tol = tol
@@ -534,7 +552,7 @@ class ElasticNetCV(LinearModelCV):
         a*L1 + b*L2
 
     for::
-        
+
         alpha = a + b and rho = a/(a+b)
 
     """
@@ -543,13 +561,14 @@ class ElasticNetCV(LinearModelCV):
     estimator = ElasticNet
 
     def __init__(self, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
-                 fit_intercept=True, precompute='auto', max_iter=1000,
-                 tol=1e-4, cv=None):
+                 fit_intercept=True, normalize=False, precompute='auto',
+                 max_iter=1000, tol=1e-4, cv=None):
         self.rho = rho
         self.eps = eps
         self.n_alphas = n_alphas
         self.alphas = alphas
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.precompute = precompute
         self.max_iter = max_iter
         self.tol = tol

From a23365c56b20709a04dd94b5f2d16099416a4baf Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Tue, 9 Aug 2011 16:31:18 +0200
Subject: [PATCH 09/51] added parameter normalize to ridge.py

---
 scikits/learn/linear_model/ridge.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/scikits/learn/linear_model/ridge.py b/scikits/learn/linear_model/ridge.py
index d295a412b9c5a..03b75365bcbc5 100644
--- a/scikits/learn/linear_model/ridge.py
+++ b/scikits/learn/linear_model/ridge.py
@@ -137,6 +137,9 @@ class Ridge(LinearModel):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
+    normalize : boolean, optional
+        If True, the regressors X are normalized
+
     tol: float
         Precision of the solution.
 
@@ -159,9 +162,10 @@ class Ridge(LinearModel):
     Ridge(alpha=1.0, tol=0.001, fit_intercept=True)
     """
 
-    def __init__(self, alpha=1.0, fit_intercept=True, tol=1e-3):
+    def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, tol=1e-3):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.tol = tol
 
     def fit(self, X, y, sample_weight=1.0, solver='auto', **params):
@@ -196,12 +200,12 @@ def fit(self, X, y, sample_weight=1.0, solver='auto', **params):
         X = safe_asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
 
-        X, y, X_mean, y_mean = \
-           LinearModel._center_data(X, y, self.fit_intercept)
+        X, y, X_mean, y_mean, Xstd = \
+           LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
 
         self.coef_ = ridge_regression(X, y, self.alpha, sample_weight,
                                       solver, self.tol)
-        self._set_intercept(X_mean, y_mean)
+        self._set_intercept(X_mean, y_mean, Xstd)
         return self
 
 
@@ -221,6 +225,9 @@ class RidgeClassifier(Ridge):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
+    normalize : boolean, optional
+        If True, the regressors X are normalized
+
     Attributes
     ----------
 
@@ -322,9 +329,10 @@ class _RidgeGCV(LinearModel):
     """
 
     def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True,
-                       score_func=None, loss_func=None):
+                       normalize=False, score_func=None, loss_func=None):
         self.alphas = np.asanyarray(alphas)
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.score_func = score_func
         self.loss_func = loss_func
 
@@ -385,7 +393,8 @@ def fit(self, X, y, sample_weight=1.0):
 
         n_samples = X.shape[0]
 
-        X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
+        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y,
+                self.fit_intercept, self.normalize)
 
         K, v, Q = self._pre_compute(X, y)
         n_y = 1 if len(y.shape) == 1 else y.shape[1]
@@ -413,7 +422,7 @@ def fit(self, X, y, sample_weight=1.0):
         self.dual_coef_ = C[best]
         self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)
 
-        self._set_intercept(Xmean, ymean)
+        self._set_intercept(Xmean, ymean, Xstd)
 
         return self
 
@@ -440,6 +449,9 @@ class RidgeCV(LinearModel):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
+    normalize : boolean, optional
+        If True, the regressors X are normalized
+
     loss_func: callable, optional
         function that takes 2 arguments and compares them in
         order to evaluate the performance of prediciton (small is good)
@@ -456,9 +468,10 @@ class RidgeCV(LinearModel):
     """
 
     def __init__(self, alphas=np.array([0.1, 1.0, 10.0]), fit_intercept=True,
-                       score_func=None, loss_func=None, cv=None):
+                   normalize=False, score_func=None, loss_func=None, cv=None):
         self.alphas = alphas
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.score_func = score_func
         self.loss_func = loss_func
         self.cv = cv

From 295dc3c982f580778a082c3602509a62938d5042 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Wed, 10 Aug 2011 10:32:48 +0200
Subject: [PATCH 10/51] Added parameter normalize to omp.py Some changes in
 Linear_Model

---
 scikits/learn/linear_model/base.py | 46 ++++++++++++++++--------------
 scikits/learn/linear_model/omp.py  | 17 ++++-------
 2 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index 4b26736d6cf97..9903315e7ae3d 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -12,6 +12,7 @@
 # License: BSD Style.
 
 import numpy as np
+import scipy.sparse
 
 from ..base import BaseEstimator, RegressorMixin, ClassifierMixin
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
@@ -47,39 +48,41 @@ def predict(self, X):
         return safe_sparse_dot(X, self.coef_.T) + self.intercept_
 
     @staticmethod
-    def _center_data(X, y, fit_intercept, normalize=False):
+    def _center_data(X, y, fit_intercept, normalize=False, copy=False):
         """
         Centers data to have mean zero along axis 0. This is here because
         nearly all linear models will want their data to be centered.
         """
-        import scipy.sparse  # importing scipy.sparse just for this is overkill
         if fit_intercept:
             if scipy.sparse.issparse(X):
-                Xmean = np.zeros(X.shape[1])
-                Xstd = np.ones(X.shape[1])
+                X_mean = np.zeros(X.shape[1])
+                X_std = np.ones(X.shape[1])
             else:
-                Xmean = X.mean(axis=0)
-                X = X - Xmean
+                if copy:
+                    X = X.copy()
+
+                X_mean = X.mean(axis=0)
+                X = X - X_mean
                 if normalize:
-                    Xstd = X.std(axis=0)
-                    Xstd[Xstd==0] = 1
-                    X = X / Xstd
+                    X_std = np.sqrt(np.sum(X ** 2, axis=0))
+                    X_std[X_std==0] = 1
+                    X = X / X_std
                 else:
-                    Xstd = np.ones(X.shape[1])
-            ymean = y.mean()
-            y = y - ymean
+                    X_std = np.ones(X.shape[1])
+            y_mean = y.mean()
+            y = y - y_mean
         else:
-            Xmean = np.zeros(X.shape[1])
-            Xstd = np.ones(X.shape[1])
-            ymean = 0.
-        return X, y, Xmean, ymean, Xstd
+            X_mean = np.zeros(X.shape[1])
+            X_std = np.ones(X.shape[1])
+            y_mean = 0.
+        return X, y, X_mean, y_mean, X_std
 
-    def _set_intercept(self, Xmean, ymean, Xstd):
+    def _set_intercept(self, X_mean, y_mean, X_std):
         """Set the intercept_
         """
         if self.fit_intercept:
-            self.coef_ = self.coef_ / Xstd
-            self.intercept_ = ymean - np.dot(Xmean, self.coef_.T)
+            self.coef_ = self.coef_ / X_std
+            self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
         else:
             self.intercept_ = 0
 
@@ -132,12 +135,13 @@ def fit(self, X, y, **params):
         X = np.asanyarray(X)
         y = np.asanyarray(y)
 
-        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
+        X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
+                self.fit_intercept, self.normalize)
 
         self.coef_, self.residues_, self.rank_, self.singular_ = \
                 np.linalg.lstsq(X, y)
 
-        self._set_intercept(Xmean, ymean, Xstd)
+        self._set_intercept(X_mean, y_mean, X_std)
         return self
 
 ##
diff --git a/scikits/learn/linear_model/omp.py b/scikits/learn/linear_model/omp.py
index 04a7a42c5ebe1..3127fcaac4ed3 100644
--- a/scikits/learn/linear_model/omp.py
+++ b/scikits/learn/linear_model/omp.py
@@ -497,11 +497,8 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
         X = np.atleast_2d(X)
         y = np.atleast_1d(y)
 
-        X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
-        if self.normalize:
-            norms = np.sqrt(np.sum(X ** 2, axis=0))
-            nonzeros = np.flatnonzero(norms)
-            X[:, nonzeros] /= norms[nonzeros]
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
+                self.normalize, not(overwrite_x))
         if Gram is not None:
             Gram = np.atleast_2d(Gram)
 
@@ -520,11 +517,11 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
                 if not overwrite_xy:
                     Xy = Xy.copy()
                 if self.normalize:
-                    Xy /= norms
+                    Xy /= X_std
 
             if self.normalize:
-                Gram /= norms
-                Gram /= norms[:, np.newaxis]
+                Gram /= X_std
+                Gram /= X_std[:, np.newaxis]
 
             norms_sq = np.sum((y ** 2), axis=0) if eps is not None else None
             self.coef_ = orthogonal_mp_gram(Gram, Xy, self.n_nonzero_coefs,
@@ -538,7 +535,5 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
                                        precompute_gram=precompute_gram,
                                        overwrite_x=overwrite_x).T
 
-        if self.normalize:
-            self.coef_ /= norms
-        self._set_intercept(Xmean, ymean)
+        self._set_intercept(X_mean, y_mean, X_std)
         return self

From e274935ba70f4ba6bd0918484f4116ce9e8ed703 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Wed, 10 Aug 2011 14:53:43 +0200
Subject: [PATCH 11/51] Added parameter normalize Added parameter overwrite_X

---
 scikits/learn/linear_model/base.py            | 13 +--
 scikits/learn/linear_model/bayes.py           | 47 +++++++----
 .../learn/linear_model/coordinate_descent.py  | 54 ++++++++----
 scikits/learn/linear_model/least_angle.py     | 27 ++++--
 scikits/learn/linear_model/omp.py             | 84 ++++++++++---------
 scikits/learn/linear_model/ridge.py           | 22 +++--
 .../linear_model/sparse/coordinate_descent.py |  8 +-
 7 files changed, 159 insertions(+), 96 deletions(-)

diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index 9903315e7ae3d..7cf8cefeee845 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -48,7 +48,7 @@ def predict(self, X):
         return safe_sparse_dot(X, self.coef_.T) + self.intercept_
 
     @staticmethod
-    def _center_data(X, y, fit_intercept, normalize=False, copy=False):
+    def _center_data(X, y, fit_intercept, normalize=False, overwrite_X=False):
         """
         Centers data to have mean zero along axis 0. This is here because
         nearly all linear models will want their data to be centered.
@@ -58,7 +58,7 @@ def _center_data(X, y, fit_intercept, normalize=False, copy=False):
                 X_mean = np.zeros(X.shape[1])
                 X_std = np.ones(X.shape[1])
             else:
-                if copy:
+                if not overwrite_X:
                     X = X.copy()
 
                 X_mean = X.mean(axis=0)
@@ -106,9 +106,10 @@ class LinearRegression(LinearModel):
 
     """
 
-    def __init__(self, fit_intercept=True, normalize=False):
+    def __init__(self, fit_intercept=True, normalize=False, overwrite_X=False):
         self.fit_intercept = fit_intercept
-        self.normalize = False
+        self.normalize = normalize
+        self.overwrite_X = overwrite_X
 
     def fit(self, X, y, **params):
         """
@@ -135,8 +136,8 @@ def fit(self, X, y, **params):
         X = np.asanyarray(X)
         y = np.asanyarray(y)
 
-        X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
-                self.fit_intercept, self.normalize)
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y,
+                self.fit_intercept, self.normalize, self.overwrite_X)
 
         self.coef_, self.residues_, self.rank_, self.singular_ = \
                 np.linalg.lstsq(X, y)
diff --git a/scikits/learn/linear_model/bayes.py b/scikits/learn/linear_model/bayes.py
index f195db823b95c..6c56ebdce4ee7 100644
--- a/scikits/learn/linear_model/bayes.py
+++ b/scikits/learn/linear_model/bayes.py
@@ -65,11 +65,15 @@ class BayesianRidge(LinearModel):
         Default is True.
 
     normalize : boolean, optional
-            If True, the regressors X are normalized
-            Default is False
+        If True, the regressors X are normalized
+        Default is False
+
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
 
     verbose : boolean, optional
-            Verbose mode when fitting the model. Default is False.
+        Verbose mode when fitting the model. Default is False.
 
 
     Attributes
@@ -99,9 +103,9 @@ class BayesianRidge(LinearModel):
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.BayesianRidge()
     >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
-    BayesianRidge(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06,
-           fit_intercept=True, eps=0.001, alpha_2=1e-06, alpha_1=1e-06,
-           compute_score=False)
+    BayesianRidge(normalize=False, n_iter=300, verbose=False, lambda_1=1e-06,
+       lambda_2=1e-06, fit_intercept=True, eps=0.001, alpha_2=1e-06,
+       alpha_1=1e-06, overwrite_X=False, compute_score=False)
     >>> clf.predict([[1, 1]])
     array([ 1.])
 
@@ -112,7 +116,8 @@ class BayesianRidge(LinearModel):
 
     def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
                 lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,
-                fit_intercept=True, normalize=False, verbose=False):
+                fit_intercept=True, normalize=False,
+                overwrite_X=False, verbose=False):
         self.n_iter = n_iter
         self.eps = eps
         self.alpha_1 = alpha_1
@@ -122,6 +127,7 @@ def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
         self.compute_score = compute_score
         self.fit_intercept = fit_intercept
         self.normalize = normalize
+        self.overwrite_X = overwrite_X
         self.verbose = verbose
 
     def fit(self, X, y, **params):
@@ -141,7 +147,8 @@ def fit(self, X, y, **params):
         self._set_params(**params)
         X = np.asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
-        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y,
+                self.fit_intercept, self.normalize, self.overwrite_X)
         n_samples, n_features = X.shape
 
         ### Initialization of the values of the parameters
@@ -215,7 +222,7 @@ def fit(self, X, y, **params):
         self.lambda_ = lambda_
         self.coef_ = coef_
 
-        self._set_intercept(Xmean, ymean, Xstd)
+        self._set_intercept(X_mean, y_mean, X_std)
         return self
 
 
@@ -277,7 +284,11 @@ class ARDRegression(LinearModel):
         Default is True.
 
     normalize : boolean, optional
-            If True, the regressors X are normalized
+        If True, the regressors X are normalized
+
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
 
     verbose : boolean, optional
         Verbose mode when fitting the model. Default is False.
@@ -312,9 +323,10 @@ class ARDRegression(LinearModel):
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.ARDRegression()
     >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
-    ARDRegression(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06,
-           fit_intercept=True, eps=0.001, threshold_lambda=10000.0,
-           alpha_2=1e-06, alpha_1=1e-06, compute_score=False)
+    RDRegression(normalize=False, n_iter=300, verbose=False, lambda_1=1e-06,
+       lambda_2=1e-06, fit_intercept=True, eps=0.001,
+       threshold_lambda=10000.0, alpha_2=1e-06, alpha_1=1e-06,
+       overwrite_X=False, compute_score=False)
     >>> clf.predict([[1, 1]])
     array([ 1.])
 
@@ -325,7 +337,8 @@ class ARDRegression(LinearModel):
 
     def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
                   lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,
-                  threshold_lambda=1.e+4, fit_intercept=True, normalize=False, verbose=False):
+                  threshold_lambda=1.e+4, fit_intercept=True,
+                  normalize=False, overwrite_X=False, verbose=False):
         self.n_iter = n_iter
         self.eps = eps
         self.fit_intercept = fit_intercept
@@ -336,6 +349,7 @@ def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
         self.lambda_2 = lambda_2
         self.compute_score = compute_score
         self.threshold_lambda = threshold_lambda
+        self.overwrite_X = overwrite_X
         self.verbose = verbose
 
     def fit(self, X, y, **params):
@@ -364,7 +378,8 @@ def fit(self, X, y, **params):
         n_samples, n_features = X.shape
         coef_ = np.zeros(n_features)
 
-        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
+                self.normalize, self.overwrite_X)
 
         ### Launch the convergence loop
         keep_lambda = np.ones(n_features, dtype=bool)
@@ -430,5 +445,5 @@ def fit(self, X, y, **params):
         self.alpha_ = alpha_
         self.sigma_ = sigma_
 
-        self._set_intercept(Xmean, ymean, Xstd)
+        self._set_intercept(X_mean, y_mean, X_std)
         return self
diff --git a/scikits/learn/linear_model/coordinate_descent.py b/scikits/learn/linear_model/coordinate_descent.py
index f77b3621dee43..a5d083af1f0a0 100644
--- a/scikits/learn/linear_model/coordinate_descent.py
+++ b/scikits/learn/linear_model/coordinate_descent.py
@@ -48,6 +48,10 @@ class ElasticNet(LinearModel):
     max_iter: int, optional
         The maximum number of iterations
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
+
     tol: float, optional
         The tolerance for the optimization: if the updates are
         smaller than 'tol', the optimization code checks the
@@ -77,7 +81,8 @@ class ElasticNet(LinearModel):
     """
 
     def __init__(self, alpha=1.0, rho=0.5, fit_intercept=True,
-                 normalize=False, precompute='auto', max_iter=1000, tol=1e-4):
+                 normalize=False, precompute='auto', max_iter=1000,
+                 overwrite_X=False, tol=1e-4):
         self.alpha = alpha
         self.rho = rho
         self.coef_ = None
@@ -85,6 +90,7 @@ def __init__(self, alpha=1.0, rho=0.5, fit_intercept=True,
         self.normalize = normalize
         self.precompute = precompute
         self.max_iter = max_iter
+        self.overwrite_X = overwrite_X
         self.tol = tol
 
     def fit(self, X, y, Xy=None, coef_init=None, **params):
@@ -117,7 +123,7 @@ def fit(self, X, y, Xy=None, coef_init=None, **params):
         y = np.asanyarray(y, dtype=np.float64)
 
         X, y, Xmean, ymean, Xstd = self._center_data(X, y, self.fit_intercept,
-                self.normalize)
+                self.normalize, self.overwrite_X)
 
         if coef_init is None:
             self.coef_ = np.zeros(X.shape[1], dtype=np.float64)
@@ -183,6 +189,10 @@ class Lasso(ElasticNet):
     normalize : boolean, optional
         If True, the regressors X are normalized
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
+
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to 'auto' let us decide. The Gram
@@ -211,8 +221,8 @@ class Lasso(ElasticNet):
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.Lasso(alpha=0.1)
     >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
-    Lasso(precompute='auto', alpha=0.1, max_iter=1000, tol=0.0001,
-       fit_intercept=True)
+    Lasso(normalize=False, fit_intercept=True, max_iter=1000, precompute='auto',
+       tol=0.0001, alpha=0.1, overwrite_X=False)
     >>> print clf.coef_
     [ 0.85  0.  ]
     >>> print clf.intercept_
@@ -231,18 +241,17 @@ class Lasso(ElasticNet):
     """
 
     def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
-                 precompute='auto', max_iter=1000, tol=1e-4):
+                 precompute='auto', overwrite_X=False, max_iter=1000, tol=1e-4):
         super(Lasso, self).__init__(alpha=alpha, rho=1.0,
-                            fit_intercept=fit_intercept,
-                            precompute=precompute, max_iter=max_iter,
-                            tol=tol)
+                            fit_intercept=fit_intercept, precompute=precompute,
+                            max_iter=max_iter, overwrite_X=overwrite_X, tol=tol)
 
 
 ###############################################################################
 # Classes to store linear models along a regularization path
 
 def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
-               normalize=False, verbose=False, **fit_params):
+               normalize=False, overwrite_X=False, verbose=False, **fit_params):
     """Compute Lasso path with coordinate descent
 
     Parameters
@@ -271,6 +280,10 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
     normalize : boolean, optional
         If True, the regressors X are normalized
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
+
     Returns
     -------
     models : a list of models along the regularization path
@@ -284,11 +297,12 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
     """
     return enet_path(X, y, rho=1., eps=eps, n_alphas=n_alphas, alphas=alphas,
                   fit_intercept=fit_intercept, normalize=normalize,
-                  verbose=verbose, **fit_params)
+                  verbose=verbose, overwrite_X=overwrite_X, **fit_params)
 
 
 def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
-              fit_intercept=True, normalize=False, verbose=False, **fit_params):
+              fit_intercept=True, normalize=False, verbose=False,
+              overwrite_X=False, **fit_params):
     """Compute Elastic-Net path with coordinate descent
 
     Parameters
@@ -321,6 +335,10 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
     normalize : boolean, optional
         If True, the regressors X are normalized
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
+
     Returns
     -------
     models : a list of models along the regularization path
@@ -330,7 +348,7 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
     See examples/plot_lasso_coordinate_descent_path.py for an example.
     """
     X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y,
-            fit_intercept, normalize)
+            fit_intercept, normalize, overwrite_X)
     X = np.asfortranarray(X)  # make data contiguous in memory
 
     n_samples = X.shape[0]
@@ -368,8 +386,8 @@ class LinearModelCV(LinearModel):
     """Base class for iterative model fitting along a regularization path"""
 
     def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
-            normalize=False, precompute='auto', max_iter=1000,
-                 tol=1e-4, cv=None):
+            normalize=False, precompute='auto', max_iter=1000, tol=1e-4,
+            overwrite_X=False, cv=None):
         self.eps = eps
         self.n_alphas = n_alphas
         self.alphas = alphas
@@ -378,6 +396,7 @@ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
         self.precompute = precompute
         self.max_iter = max_iter
         self.tol = tol
+        self.overwrite_X = overwrite_X
         self.cv = cv
 
     def fit(self, X, y, **fit_params):
@@ -472,7 +491,7 @@ class LassoCV(LinearModelCV):
 
     cv : integer or crossvalidation generator, optional
         If an integer is passed, it is the number of fold (default 3).
-        Specific crossvalidation objects can be passed, see 
+        Specific crossvalidation objects can be passed, see
         scikits.learn.cross_val module for the list of possible objects
 
     Notes
@@ -498,7 +517,7 @@ class ElasticNetCV(LinearModelCV):
     rho : float, optional
         float between 0 and 1 passed to ElasticNet (scaling between
         l1 and l2 penalties). For rho = 0
-        the penalty is an L1 penalty. For rho = 1 it is an L2 penalty. 
+        the penalty is an L1 penalty. For rho = 1 it is an L2 penalty.
         For 0 < rho < 1, the penalty is a combination of L1 and L2
 
     eps : float, optional
@@ -562,7 +581,7 @@ class ElasticNetCV(LinearModelCV):
 
     def __init__(self, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
                  fit_intercept=True, normalize=False, precompute='auto',
-                 max_iter=1000, tol=1e-4, cv=None):
+                 max_iter=1000, tol=1e-4, cv=None, overwrite_X=False):
         self.rho = rho
         self.eps = eps
         self.n_alphas = n_alphas
@@ -573,3 +592,4 @@ def __init__(self, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
         self.max_iter = max_iter
         self.tol = tol
         self.cv = cv
+        self.overwrite_X = overwrite_X
diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index b9e07425772b6..e729460938416 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -319,6 +319,9 @@ class Lars(LinearModel):
         calculations. If set to 'auto' let us decide. The Gram
         matrix can also be passed as argument.
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
 
     eps: float, optional
         The machine-precision regularization in the computation of the
@@ -356,7 +359,7 @@ class Lars(LinearModel):
     """
     def __init__(self, fit_intercept=True, verbose=False, normalize=True,
                  precompute='auto', n_nonzero_coefs=500,
-                 eps=np.finfo(np.float).eps):
+                 eps=np.finfo(np.float).eps, overwrite_X=False):
         self.fit_intercept = fit_intercept
         self.verbose = verbose
         self.normalize = normalize
@@ -364,8 +367,9 @@ def __init__(self, fit_intercept=True, verbose=False, normalize=True,
         self.precompute = precompute
         self.n_nonzero_coefs = n_nonzero_coefs
         self.eps = eps
+        self.overwrite_X = overwrite_X
 
-    def fit(self, X, y, overwrite_X=False, **params):
+    def fit(self, X, y, **params):
         """Fit the model using X, y as training data.
 
         parameters
@@ -386,7 +390,8 @@ def fit(self, X, y, overwrite_X=False, **params):
         X = np.atleast_2d(X)
         y = np.atleast_1d(y)
 
-        X, y, Xmean, ymean, Xstd = self._center_data(X, y, self.fit_intercept, self.normalize)
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
+                self.normalize, self.overwrite_X)
         alpha = getattr(self, 'alpha', 0.)
         if hasattr(self, 'n_nonzero_coefs'):
             alpha = 0. # n_nonzero_coefs parametrization takes priority
@@ -405,14 +410,14 @@ def fit(self, X, y, overwrite_X=False, **params):
             Gram = None
 
         self.alphas_, self.active_, self.coef_path_ = lars_path(X, y,
-                  Gram=Gram, overwrite_X=overwrite_X,
+                  Gram=Gram, overwrite_X=self.overwrite_X,
                   overwrite_Gram=True, alpha_min=alpha,
                   method=self.method, verbose=self.verbose,
                   max_iter=max_iter, eps=self.eps)
 
         self.coef_ = self.coef_path_[:, -1]
 
-        self._set_intercept(Xmean, ymean, Xstd)
+        self._set_intercept(X_mean, y_mean, X_std)
 
         return self
 
@@ -436,6 +441,10 @@ class LassoLars(Lars):
     normalize : boolean, optional
         If True, the regressors X are normalized
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
+
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to 'auto' let us decide. The Gram
@@ -465,8 +474,9 @@ class LassoLars(Lars):
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.LassoLars(alpha=0.01)
     >>> clf.fit([[-1,1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS
-    LassoLars(normalize=True, verbose=False, fit_intercept=True, max_iter=500,
-         eps=..., precompute='auto', alpha=0.01)
+LassoLars(normalize=True, verbose=False, fit_intercept=True, max_iter=500,
+     eps=2.2204460492503131e-16, precompute='auto', alpha=0.01,
+     overwrite_X=False)
     >>> print clf.coef_
     [ 0.         -0.96325765]
 
@@ -481,7 +491,7 @@ class LassoLars(Lars):
 
     def __init__(self, alpha=1.0, fit_intercept=True, verbose=False,
                  normalize=True, precompute='auto', max_iter=500,
-                 eps=np.finfo(np.float).eps):
+                 eps=np.finfo(np.float).eps, overwrite_X=False):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
         self.max_iter = max_iter
@@ -489,6 +499,7 @@ def __init__(self, alpha=1.0, fit_intercept=True, verbose=False,
         self.normalize = normalize
         self.method = 'lasso'
         self.precompute = precompute
+        self.overwrite_X = overwrite_X
         self.eps = eps
 
 
diff --git a/scikits/learn/linear_model/omp.py b/scikits/learn/linear_model/omp.py
index 3127fcaac4ed3..a448a8157c5cb 100644
--- a/scikits/learn/linear_model/omp.py
+++ b/scikits/learn/linear_model/omp.py
@@ -19,7 +19,7 @@
 """
 
 
-def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_x=False):
+def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_X=False):
     """
     Solves a single Orthogonal Matching Pursuit problem using
     the Cholesky decomposition.
@@ -38,7 +38,7 @@ def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_x=False):
     eps: float
         Targeted squared error, if not None overrides n_nonzero_coefs.
 
-    overwrite_x: bool,
+    overwrite_X: bool,
         Whether the design matrix X can be overwritten by the algorithm. This
         is only helpful if X is already Fortran-ordered, otherwise a copy
         is made anyway.
@@ -53,7 +53,7 @@ def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_x=False):
         vector
 
     """
-    if not overwrite_x:
+    if not overwrite_X:
         X = X.copy('F')
     else:  # even if we are allowed to overwrite, still copy it if bad order
         X = np.asfortranarray(X)
@@ -104,7 +104,7 @@ def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_x=False):
 
 
 def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None,
-              overwrite_gram=False, overwrite_xy=False):
+              overwrite_gram=False, overwrite_Xy=False):
     """
     Solves a single Orthogonal Matching Pursuit problem using
     the Cholesky decomposition, based on the Gram matrix and more
@@ -132,7 +132,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None,
         is only helpful if it is already Fortran-ordered, otherwise a copy
         is made anyway.
 
-    overwrite_xy: bool,
+    overwrite_Xy: bool,
         Whether the covariance vector Xy can be overwritten by the algorithm.
 
     Returns:
@@ -150,7 +150,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None,
     else:
         Gram = np.asfortranarray(Gram)
 
-    if not overwrite_xy:
+    if not overwrite_Xy:
         Xy = Xy.copy()
 
     min_float = np.finfo(Gram.dtype).eps
@@ -205,7 +205,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None,
 
 
 def orthogonal_mp(X, y, n_nonzero_coefs=None, eps=None, precompute_gram=False,
-                  overwrite_x=False):
+                  overwrite_X=False):
     """Orthogonal Matching Pursuit (OMP)
 
     Solves n_targets Orthogonal Matching Pursuit problems.
@@ -236,7 +236,7 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, eps=None, precompute_gram=False,
         Whether to perform precomputations. Improves performance when n_targets
         or n_samples is very large.
 
-    overwrite_x: bool,
+    overwrite_X: bool,
         Whether the design matrix X can be overwritten by the algorithm. This
         is only helpful if X is already Fortran-ordered, otherwise a copy
         is made anyway.
@@ -268,13 +268,13 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, eps=None, precompute_gram=False,
     X, y = map(np.asanyarray, (X, y))
     if y.ndim == 1:
         y = y[:, np.newaxis]
-    if not overwrite_x:
+    if not overwrite_X:
         X = X.copy('F')
-        overwrite_x = True
+        overwrite_X = True
     else:
         X = np.asfortranarray(X)
     if y.shape[1] > 1:  # subsequent targets will be affected
-        overwrite_x = False
+        overwrite_X = False
     if n_nonzero_coefs == None and eps == None:
         n_nonzero_coefs = int(0.1 * X.shape[1])
     if eps is not None and eps < 0:
@@ -295,20 +295,20 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, eps=None, precompute_gram=False,
         else:
             norms_squared = None
         return orthogonal_mp_gram(G, Xy, n_nonzero_coefs, eps, norms_squared,
-                                  overwrite_gram=overwrite_x,
-                                  overwrite_xy=True)
+                                  overwrite_gram=overwrite_X,
+                                  overwrite_Xy=True)
 
     coef = np.zeros((X.shape[1], y.shape[1]))
     for k in xrange(y.shape[1]):
         x, idx = _cholesky_omp(X, y[:, k], n_nonzero_coefs, eps,
-                               overwrite_x=overwrite_x)
+                               overwrite_X=overwrite_X)
         coef[idx, k] = x
     return np.squeeze(coef)
 
 
 def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, eps=None,
                        norms_squared=None, overwrite_gram=False,
-                       overwrite_xy=False):
+                       overwrite_Xy=False):
     """Gram Orthogonal Matching Pursuit (OMP)
 
     Solves n_targets Orthogonal Matching Pursuit problems using only
@@ -336,7 +336,7 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, eps=None,
         is only helpful if it is already Fortran-ordered, otherwise a copy
         is made anyway.
 
-    overwrite_xy: bool,
+    overwrite_Xy: bool,
         Whether the covariance vector Xy can be overwritten by the algorithm.
 
     Returns
@@ -386,7 +386,7 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, eps=None,
         x, idx = _gram_omp(Gram, Xy[:, k], n_nonzero_coefs,
                            norms_squared[k] if eps is not None else None, eps,
                            overwrite_gram=overwrite_gram,
-                           overwrite_xy=overwrite_xy)
+                           overwrite_Xy=overwrite_Xy)
         coef[idx, k] = x
     return np.squeeze(coef)
 
@@ -416,6 +416,21 @@ class OrthogonalMatchingPursuit(LinearModel):
         very large. Note that if you already have such matrices, you can pass
         them directly to the fit method.
 
+    overwrite_X: bool,
+        Whether the design matrix X can be overwritten by the algorithm.
+        This is only helpful if X is already Fortran-ordered, otherwise a
+        copy is made anyway.
+
+    overwrite_gram: bool,
+        Whether the gram matrix can be overwritten by the algorithm. This
+        is only helpful if it is already Fortran-ordered, otherwise a copy
+        is made anyway.
+
+    overwrite_Xy: bool,
+        Whether the covariance vector Xy can be overwritten by the
+        algorithm.
+
+
     Attributes
     ----------
     coef_: array, shape = (n_features,) or (n_features, n_targets)
@@ -446,16 +461,20 @@ class OrthogonalMatchingPursuit(LinearModel):
 
     """
 
-    def __init__(self, n_nonzero_coefs=None, eps=None, fit_intercept=True,
-                 normalize=True, precompute_gram=False):
+    def __init__(self, overwrite_X=False, overwrite_gram=False,
+            overwrite_Xy=False, n_nonzero_coefs=None, eps=None,
+            fit_intercept=True, normalize=True, precompute_gram=False):
         self.n_nonzero_coefs = n_nonzero_coefs
         self.eps = eps
         self.fit_intercept = fit_intercept
         self.normalize = normalize
         self.precompute_gram = precompute_gram
+        self.overwrite_gram = overwrite_gram
+        self.overwrite_Xy = overwrite_Xy
+        self.overwrite_X = overwrite_X
+        self.eps = eps
 
-    def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
-            overwrite_gram=False, overwrite_xy=False, **params):
+    def fit(self, X, y, Gram=None, Xy=None, **params):
         """Fit the model using X, y as training data.
 
         Parameters
@@ -473,19 +492,6 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
             (optional)
             Input targets multiplied by X: X.T * y
 
-        overwrite_x: bool,
-            Whether the design matrix X can be overwritten by the algorithm.
-            This is only helpful if X is already Fortran-ordered, otherwise a
-            copy is made anyway.
-
-        overwrite_gram: bool,
-            Whether the gram matrix can be overwritten by the algorithm. This
-            is only helpful if it is already Fortran-ordered, otherwise a copy
-            is made anyway.
-
-        overwrite_xy: bool,
-            Whether the covariance vector Xy can be overwritten by the
-            algorithm.
 
         Returns
         -------
@@ -498,11 +504,11 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
         y = np.atleast_1d(y)
 
         X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
-                self.normalize, not(overwrite_x))
+                self.normalize, self.overwrite_X)
         if Gram is not None:
             Gram = np.atleast_2d(Gram)
 
-            if not overwrite_gram:
+            if not self.overwrite_gram:
                 overwrite_gram = True
                 Gram = Gram.copy('F')
             else:
@@ -514,7 +520,7 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
             if Xy is None:
                 Xy = np.dot(X.T, y)
             else:
-                if not overwrite_xy:
+                if not self.overwrite_Xy:
                     Xy = Xy.copy()
                 if self.normalize:
                     Xy /= X_std
@@ -523,7 +529,7 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
                 Gram /= X_std
                 Gram /= X_std[:, np.newaxis]
 
-            norms_sq = np.sum((y ** 2), axis=0) if eps is not None else None
+            norms_sq = np.sum((y ** 2), axis=0) if self.eps is not None else None
             self.coef_ = orthogonal_mp_gram(Gram, Xy, self.n_nonzero_coefs,
                                             self.eps, norms_sq,
                                             overwrite_gram, True).T
@@ -533,7 +539,7 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
                 precompute_gram = X.shape[0] > X.shape[1]
             self.coef_ = orthogonal_mp(X, y, self.n_nonzero_coefs, self.eps,
                                        precompute_gram=precompute_gram,
-                                       overwrite_x=overwrite_x).T
+                                       overwrite_X=self.overwrite_X).T
 
         self._set_intercept(X_mean, y_mean, X_std)
         return self
diff --git a/scikits/learn/linear_model/ridge.py b/scikits/learn/linear_model/ridge.py
index 03b75365bcbc5..4b4c30ec5ef6f 100644
--- a/scikits/learn/linear_model/ridge.py
+++ b/scikits/learn/linear_model/ridge.py
@@ -140,6 +140,10 @@ class Ridge(LinearModel):
     normalize : boolean, optional
         If True, the regressors X are normalized
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
+
     tol: float
         Precision of the solution.
 
@@ -159,13 +163,16 @@ class Ridge(LinearModel):
     >>> X = np.random.randn(n_samples, n_features)
     >>> clf = Ridge(alpha=1.0)
     >>> clf.fit(X, y)
-    Ridge(alpha=1.0, tol=0.001, fit_intercept=True)
+    Ridge(normalize=False, alpha=1.0, overwrite_X=False, tol=0.001,
+       fit_intercept=True)
     """
 
-    def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, tol=1e-3):
+    def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
+            overwrite_X=False, tol=1e-3):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
         self.normalize = normalize
+        self.overwrite_X = overwrite_X
         self.tol = tol
 
     def fit(self, X, y, sample_weight=1.0, solver='auto', **params):
@@ -200,12 +207,13 @@ def fit(self, X, y, sample_weight=1.0, solver='auto', **params):
         X = safe_asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
 
-        X, y, X_mean, y_mean, Xstd = \
-           LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
+        X, y, X_mean, y_mean, X_std = \
+           self._center_data(X, y, self.fit_intercept,
+                   self.normalize, self.overwrite_X)
 
         self.coef_ = ridge_regression(X, y, self.alpha, sample_weight,
                                       solver, self.tol)
-        self._set_intercept(X_mean, y_mean, Xstd)
+        self._set_intercept(X_mean, y_mean, X_std)
         return self
 
 
@@ -393,7 +401,7 @@ def fit(self, X, y, sample_weight=1.0):
 
         n_samples = X.shape[0]
 
-        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y,
+        X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
                 self.fit_intercept, self.normalize)
 
         K, v, Q = self._pre_compute(X, y)
@@ -422,7 +430,7 @@ def fit(self, X, y, sample_weight=1.0):
         self.dual_coef_ = C[best]
         self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)
 
-        self._set_intercept(Xmean, ymean, Xstd)
+        self._set_intercept(X_mean, y_mean, X_std)
 
         return self
 
diff --git a/scikits/learn/linear_model/sparse/coordinate_descent.py b/scikits/learn/linear_model/sparse/coordinate_descent.py
index 38ef4245f6be7..0d99f1c6ad84b 100644
--- a/scikits/learn/linear_model/sparse/coordinate_descent.py
+++ b/scikits/learn/linear_model/sparse/coordinate_descent.py
@@ -41,12 +41,13 @@ class ElasticNet(LinearModel):
     while alpha corresponds to the lambda parameter in glmnet.
     """
 
-    def __init__(self, alpha=1.0, rho=0.5, fit_intercept=False):
+    def __init__(self, alpha=1.0, rho=0.5, fit_intercept=False, normalize=False):
         if fit_intercept:
             raise NotImplementedError("fit_intercept=True is not implemented")
         self.alpha = alpha
         self.rho = rho
         self.fit_intercept = fit_intercept
+        self.normalize = normalize
         self.intercept_ = 0.0
         self._set_coef(None)
 
@@ -133,6 +134,7 @@ class Lasso(ElasticNet):
 
     """
 
-    def __init__(self, alpha=1.0, fit_intercept=False):
+    def __init__(self, alpha=1.0, fit_intercept=False, normalize=False):
         super(Lasso, self).__init__(alpha=alpha, rho=1.0,
-                                    fit_intercept=fit_intercept)
+                                    fit_intercept=fit_intercept,
+                                    normalize=normalize)

From dd9bd4c44fa7b8ac9563fb63d2e3c5d92b39c346 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Wed, 10 Aug 2011 16:19:59 +0200
Subject: [PATCH 12/51] Fixed some errors (mainly docstrings)

---
 scikits/learn/linear_model/bayes.py       | 12 ++++++------
 scikits/learn/linear_model/least_angle.py | 19 ++++++++++++++-----
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/scikits/learn/linear_model/bayes.py b/scikits/learn/linear_model/bayes.py
index 6c56ebdce4ee7..340c0908acc26 100644
--- a/scikits/learn/linear_model/bayes.py
+++ b/scikits/learn/linear_model/bayes.py
@@ -104,8 +104,8 @@ class BayesianRidge(LinearModel):
     >>> clf = linear_model.BayesianRidge()
     >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
     BayesianRidge(normalize=False, n_iter=300, verbose=False, lambda_1=1e-06,
-       lambda_2=1e-06, fit_intercept=True, eps=0.001, alpha_2=1e-06,
-       alpha_1=1e-06, overwrite_X=False, compute_score=False)
+           lambda_2=1e-06, fit_intercept=True, eps=0.001, alpha_2=1e-06,
+           alpha_1=1e-06, overwrite_X=False, compute_score=False)
     >>> clf.predict([[1, 1]])
     array([ 1.])
 
@@ -323,10 +323,10 @@ class ARDRegression(LinearModel):
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.ARDRegression()
     >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
-    RDRegression(normalize=False, n_iter=300, verbose=False, lambda_1=1e-06,
-       lambda_2=1e-06, fit_intercept=True, eps=0.001,
-       threshold_lambda=10000.0, alpha_2=1e-06, alpha_1=1e-06,
-       overwrite_X=False, compute_score=False)
+    ARDRegression(normalize=False, n_iter=300, verbose=False, lambda_1=1e-06,
+           lambda_2=1e-06, fit_intercept=True, eps=0.001,
+           threshold_lambda=10000.0, alpha_2=1e-06, alpha_1=1e-06,
+           overwrite_X=False, compute_score=False)
     >>> clf.predict([[1, 1]])
     array([ 1.])
 
diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index e729460938416..9d567fe4fc253 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -345,7 +345,7 @@ class Lars(LinearModel):
     >>> clf = linear_model.Lars(n_nonzero_coefs=1)
     >>> clf.fit([[-1,1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS
     Lars(normalize=True, n_nonzero_coefs=1, verbose=False, fit_intercept=True,
-       eps=..., precompute='auto')
+       eps=2.2204460492503131e-16, precompute='auto', overwrite_X=False)
     >>> print clf.coef_
     [ 0. -1.]
 
@@ -474,9 +474,9 @@ class LassoLars(Lars):
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.LassoLars(alpha=0.01)
     >>> clf.fit([[-1,1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS
-LassoLars(normalize=True, verbose=False, fit_intercept=True, max_iter=500,
-     eps=2.2204460492503131e-16, precompute='auto', alpha=0.01,
-     overwrite_X=False)
+    LassoLars(normalize=True, verbose=False, fit_intercept=True, max_iter=500,
+         eps=2.2204460492503131e-16, precompute='auto', alpha=0.01,
+         overwrite_X=False)
     >>> print clf.coef_
     [ 0.         -0.96325765]
 
@@ -622,6 +622,10 @@ class LarsCV(LARS):
     normalize : boolean, optional
         If True, the regressors X are normalized
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
+
     precompute : True | False | 'auto' | array-like
         Whether to use a precomputed Gram matrix to speed up
         calculations. If set to 'auto' let us decide. The Gram
@@ -664,12 +668,13 @@ class LarsCV(LARS):
 
     def __init__(self, fit_intercept=True, verbose=False, max_iter=500,
                  normalize=True, precompute='auto', cv=None, n_jobs=1,
-                 eps=np.finfo(np.float).eps):
+                 eps=np.finfo(np.float).eps, overwrite_X=False):
         self.fit_intercept = fit_intercept
         self.max_iter = max_iter
         self.verbose = verbose
         self.normalize = normalize
         self.precompute = precompute
+        self.overwrite_X = overwrite_X
         self.cv = cv
         self.n_jobs = n_jobs
         self.eps = eps
@@ -776,6 +781,10 @@ class LassoLarsCV(LarsCV):
         Cholesky diagonal factors. Increase this for very ill-conditioned
         systems.
 
+    overwrite_X : boolean, optionnal
+        If True, X will not be copied
+        Default is False
+
 
     Attributes
     ----------

From 3a56be4504f937a4c82da465ec41f93c20612863 Mon Sep 17 00:00:00 2001
From: Jake Vanderplas <vanderplas@astro.washington.edu>
Date: Wed, 10 Aug 2011 09:51:40 -0700
Subject: [PATCH 13/51] changes to manifold doc

---
 doc/modules/manifold.rst      | 29 +++++++++++++++++++++++++----
 doc/modules/preprocessing.rst |  1 +
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst
index 6b145ebdf9db0..0a12ca5ed7e25 100644
--- a/doc/modules/manifold.rst
+++ b/doc/modules/manifold.rst
@@ -140,6 +140,10 @@ The Isomap algorithm comprises three stages:
 The overall complexity of Isomap is
 :math:`O[D \log(k) N \log(N)] + O[N^2(k + \log(N))] + O[d N^2]`.
 
+* :math:`N` : number of training data points
+* :math:`D` : input dimension
+* :math:`k` : number of nearest neighbors
+* :math:`d` : output dimension
 
 .. topic:: References:
 
@@ -182,6 +186,10 @@ The standard LLE algorithm comprises three stages:
 The overall complexity of standard LLE is
 :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`.
 
+* :math:`N` : number of training data points
+* :math:`D` : input dimension
+* :math:`k` : number of nearest neighbors
+* :math:`d` : output dimension
 
 .. topic:: References:
    
@@ -234,6 +242,10 @@ The MLLE algorithm comprises three stages:
 The overall complexity of MLLE is
 :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N (k-D) k^2] + O[d N^2]`.
 
+* :math:`N` : number of training data points
+* :math:`D` : input dimension
+* :math:`k` : number of nearest neighbors
+* :math:`d` : output dimension
 
 .. topic:: References:
      
@@ -270,14 +282,18 @@ The HLLE algorithm comprises three stages:
 
 2. **Weight Matrix Construction**. Approximately
    :math:`O[D N k^3] + O[N d^6]`.  The first term reflects a similar
-   cost as standard LLE.  The second term comes from a QR decomposition
-   of the local hessian estimator.
+   cost to that of standard LLE.  The second term comes from a QR
+   decomposition of the local hessian estimator.
 
 3. **Partial Eigenvalue Decomposition**. Same as standard LLE
 
 The overall complexity of standard HLLE is
 :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N d^6] + O[d N^2]`.
 
+* :math:`N` : number of training data points
+* :math:`D` : input dimension
+* :math:`k` : number of nearest neighbors
+* :math:`d` : output dimension
 
 .. topic:: References:
 
@@ -312,13 +328,17 @@ The LTSA algorithm comprises three stages:
 
 2. **Weight Matrix Construction**. Approximately
    :math:`O[D N k^3] + O[k^2 d]`.  The first term reflects a similar
-   cost as standard LLE.
+   cost to that of standard LLE.
 
 3. **Partial Eigenvalue Decomposition**. Same as standard LLE
 
 The overall complexity of standard LTSA is
 :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[k^2 d] + O[d N^2]`.
 
+* :math:`N` : number of training data points
+* :math:`D` : input dimension
+* :math:`k` : number of nearest neighbors
+* :math:`d` : output dimension
 
 .. topic:: References:
 
@@ -333,7 +353,8 @@ Tips on practical use
 
 * Make sure the same scale is used over all features. Because manifold
   learning methods are based on a nearest-neighbor search, the algorithm
-  may perform poorly otherwise.
+  may perform poorly otherwise.  See :ref:`Scaler <preprocessing_scaler>`
+  for convenient ways of scaling heterogeneous data.
 
 * On certain problems, the `lobcpg` solver available in the LLE variants
   might converge slowly. Supply a generous value for `max_iter` if big 
diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index 8079bf6f21040..443a276acc817 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -10,6 +10,7 @@ The ``scikits.learn.preprocessing`` package provides several common
 utility functions and transformer classes to change raw feature vectors
 into a representation that is more suitable for the downstream estimators.
 
+.. _preprocessing_scaler:
 
 Standardization or Mean Removal and Variance Scaling
 ====================================================

From 2f9f318733d903eb8eda913b9d7b399598a9d0c4 Mon Sep 17 00:00:00 2001
From: Gilles Louppe <g.louppe@gmail.com>
Date: Thu, 11 Aug 2011 10:13:34 +0200
Subject: [PATCH 14/51] Fixes of minor bugs + improved test coverage (now 100%)

---
 scikits/learn/datasets/samples_generator.py       |  7 ++++---
 .../datasets/tests/test_samples_generator.py      | 15 ++++++++++-----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/scikits/learn/datasets/samples_generator.py b/scikits/learn/datasets/samples_generator.py
index cdd4ea6693085..8539543ebb009 100644
--- a/scikits/learn/datasets/samples_generator.py
+++ b/scikits/learn/datasets/samples_generator.py
@@ -55,7 +55,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
 
     weights : list of floats or None (default=None)
         The proportions of samples assigned to each class. If None, then
-        classes are balanced.
+        classes are balanced. Note that if `len(weights) == n_classes - 1`,
+        then the last class weight is automatically inferred.
 
     flip_y : float, optional (default=0.01)
         The fraction of samples whose class are randomly exchanged.
@@ -306,7 +307,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, bias=0.0,
                                  n_features=n_features,
                                  effective_rank=effective_rank,
                                  tail_strength=tail_strength,
-                                 seed=generator)
+                                 random_state=generator)
 
     # Generate a ground truth model with only n_informative features being non
     # zeros (the other features are not correlated to y and should be ignored
@@ -352,7 +353,7 @@ def make_blobs(n_samples=100, n_features=2, centers=3, cluster_std=1.0,
     n_features : int, optional (default=2)
         The number of features for each sample.
 
-    centers : int or array of shape [n_centers, n_features], optinal (default=3)
+    centers : int or array of shape [n_centers, n_features], optional (default=3)
         The number of centers to generate, or the fixed center locations.
 
     cluster_std: float or sequence of floats, optional (default=1.0)
diff --git a/scikits/learn/datasets/tests/test_samples_generator.py b/scikits/learn/datasets/tests/test_samples_generator.py
index a083a17b2fd7c..2a8de347278ac 100644
--- a/scikits/learn/datasets/tests/test_samples_generator.py
+++ b/scikits/learn/datasets/tests/test_samples_generator.py
@@ -18,8 +18,10 @@
 
 def test_make_classification():
     X, y = make_classification(n_samples=100, n_features=20, n_informative=5,
-                               n_classes=3, n_clusters_per_class=1,
-                               weights=[0.1, 0.25, 0.65], random_state=0)
+                               n_redundant=1, n_repeated=1, n_classes=3,
+                               n_clusters_per_class=1, hypercube=False,
+                               shift=None, scale=None, weights=[0.1, 0.25],
+                               random_state=0)
 
     assert_equal(X.shape, (100, 20), "X shape mismatch")
     assert_equal(y.shape, (100,), "y shape mismatch")
@@ -31,7 +33,8 @@ def test_make_classification():
 
 def test_make_regression():
     X, y, c = make_regression(n_samples=50, n_features=10, n_informative=3,
-                              coef=True, bias=0.0, random_state=0)
+                              effective_rank=5, coef=True, bias=0.0,
+                              noise=1e-10, random_state=0)
 
     assert_equal(X.shape, (50, 10), "X shape mismatch")
     assert_equal(y.shape, (50,), "y shape mismatch")
@@ -41,9 +44,11 @@ def test_make_regression():
 
 
 def test_make_blobs():
-    X, y = make_blobs(n_samples=50, n_features=5, centers=3, random_state=0)
+    X, y = make_blobs(n_samples=50, n_features=2,
+                      centers=[[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]],
+                      random_state=0)
 
-    assert_equal(X.shape, (50, 5), "X shape mismatch")
+    assert_equal(X.shape, (50, 2), "X shape mismatch")
     assert_equal(y.shape, (50,), "y shape mismatch")
     assert_equal(np.unique(y).shape, (3,), "Unexpected number of blobs")
 

From 20d7c3ebc6a303d3dbb869e3708c046ea3e53786 Mon Sep 17 00:00:00 2001
From: Gilles Louppe <g.louppe@gmail.com>
Date: Thu, 11 Aug 2011 16:40:53 +0200
Subject: [PATCH 15/51] Improved test coverage of samples_generator and
 feature_extraction modules.

More precisely, this commit the following changes:
- Improved test coverage of datasets.samples_generator (-> 100%)
- Improved test coverage of features_extraction.image (-> 100%)
- Fixed a minor bug in features_extraction.image
- Improved test coverage of features_extraction.text (-> 99%). The only left uncovered line is `RomanPreprocessor::__repr__`.
---
 .../datasets/tests/test_samples_generator.py  | 14 +++++-----
 scikits/learn/feature_extraction/image.py     |  5 +---
 .../feature_extraction/tests/test_image.py    | 27 +++++++++++++++++++
 .../feature_extraction/tests/test_text.py     | 15 +++++++++++
 scikits/learn/feature_extraction/text.py      |  8 ++----
 5 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/scikits/learn/datasets/tests/test_samples_generator.py b/scikits/learn/datasets/tests/test_samples_generator.py
index 2a8de347278ac..50e5202ff0fbd 100644
--- a/scikits/learn/datasets/tests/test_samples_generator.py
+++ b/scikits/learn/datasets/tests/test_samples_generator.py
@@ -1,5 +1,5 @@
 import numpy as np
-from numpy.testing import assert_equal, assert_almost_equal, \
+from numpy.testing import assert_equal, assert_approx_equal, \
                           assert_array_almost_equal, assert_array_less
 
 from .. import make_classification
@@ -32,15 +32,17 @@ def test_make_classification():
 
 
 def test_make_regression():
-    X, y, c = make_regression(n_samples=50, n_features=10, n_informative=3,
+    X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3,
                               effective_rank=5, coef=True, bias=0.0,
-                              noise=1e-10, random_state=0)
+                              noise=1.0, random_state=0)
 
-    assert_equal(X.shape, (50, 10), "X shape mismatch")
-    assert_equal(y.shape, (50,), "y shape mismatch")
+    assert_equal(X.shape, (100, 10), "X shape mismatch")
+    assert_equal(y.shape, (100,), "y shape mismatch")
     assert_equal(c.shape, (10,), "coef shape mismatch")
     assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features")
-    assert_array_almost_equal(y, np.dot(X, c))
+
+    # Test that y ~= np.dot(X, c) + bias + N(0, 1.0)
+    assert_approx_equal(np.std(y - np.dot(X, c)), 1.0, significant=2)
 
 
 def test_make_blobs():
diff --git a/scikits/learn/feature_extraction/image.py b/scikits/learn/feature_extraction/image.py
index 56685530d9866..0a6525e116241 100644
--- a/scikits/learn/feature_extraction/image.py
+++ b/scikits/learn/feature_extraction/image.py
@@ -88,9 +88,6 @@ def _to_graph(n_x, n_y, n_z, mask=None, img=None,
         else:
             dtype = img.dtype
 
-    if dtype == np.bool:
-        dtype = np.int
-
     if img is not None:
         img = np.atleast_3d(img)
         weights = _compute_gradient_3d(edges, img)
@@ -247,7 +244,7 @@ def extract_patches_2d(image, patch_size, max_patches=None, random_state=None):
         if isinstance(max_patches, int) and max_patches < all_patches:
             n_patches = max_patches
         elif isinstance(max_patches, float) and 0 < max_patches < 1:
-            n_patches = max_patches * n_patches
+            n_patches = int(max_patches * all_patches)
         else:
             raise ValueError("Invalid value for max_patches: %r" % max_patches)
 
diff --git a/scikits/learn/feature_extraction/tests/test_image.py b/scikits/learn/feature_extraction/tests/test_image.py
index b77fdb97069d2..7729577e6300e 100644
--- a/scikits/learn/feature_extraction/tests/test_image.py
+++ b/scikits/learn/feature_extraction/tests/test_image.py
@@ -7,6 +7,7 @@
 from scipy import ndimage
 
 from nose.tools import assert_equal
+from numpy.testing import assert_raises
 
 from ..image import img_to_graph, grid_to_graph
 from ..image import extract_patches_2d, reconstruct_from_patches_2d, \
@@ -37,12 +38,21 @@ def test_grid_to_graph():
     mask[-roi_size:, -roi_size:] = True
     mask = mask.reshape(size ** 2)
     A = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray)
+    assert(cs_graph_components(A)[0] == 2)
 
     # Checking that the function works whatever the type of mask is
     mask = np.ones((size, size), dtype=np.int16)
     A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask)
     assert(cs_graph_components(A)[0] == 1)
 
+    # Checking dtype of the graph
+    mask = np.ones((size, size))
+    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.bool)
+    assert A.dtype == np.bool
+    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.int)
+    assert A.dtype == np.int
+    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.float)
+    assert A.dtype == np.float
 
 def test_connect_regions():
     lena = sp.lena()
@@ -133,6 +143,17 @@ def test_extract_patches_max_patches():
     patches = extract_patches_2d(lena, (p_h, p_w), max_patches=100)
     assert_equal(patches.shape, (100, p_h, p_w))
 
+    expected_n_patches = int(0.5 * (i_h - p_h + 1) * (i_w - p_w + 1))
+    patches = extract_patches_2d(lena, (p_h, p_w), max_patches=0.5)
+    assert_equal(patches.shape, (expected_n_patches, p_h, p_w))
+
+    assert_raises(ValueError, extract_patches_2d, lena,
+                                                  (p_h, p_w),
+                                                  max_patches=2.0)
+    assert_raises(ValueError, extract_patches_2d, lena,
+                                                  (p_h, p_w),
+                                                  max_patches=-1.0)
+
 
 def test_reconstruct_patches_perfect():
     lena = downsampled_lena
@@ -152,6 +173,12 @@ def test_reconstruct_patches_perfect_color():
     np.testing.assert_array_equal(lena, lena_reconstructed)
 
 
+def test_patch_extractor_fit():
+    lenas = lena_collection
+    extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0)
+    assert extr == extr.fit(lenas)
+
+
 def test_patch_extractor_max_patches():
     lenas = lena_collection
     extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0)
diff --git a/scikits/learn/feature_extraction/tests/test_text.py b/scikits/learn/feature_extraction/tests/test_text.py
index de8e16de08903..429c54b283f0e 100644
--- a/scikits/learn/feature_extraction/tests/test_text.py
+++ b/scikits/learn/feature_extraction/tests/test_text.py
@@ -16,8 +16,10 @@
             assert_false, assert_not_equal
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
+from numpy.testing import assert_raises
 
 import pickle
+from StringIO import StringIO
 
 JUNK_FOOD_DOCS = (
     "the pizza pizza beer copyright",
@@ -100,6 +102,11 @@ def test_word_analyzer_unigrams():
                 u'yesterday']
     assert_equal(wa.analyze(text), expected)
 
+    text = StringIO("This is a test with a file-like object!")
+    expected = [u'this', u'is', u'test', u'with', u'file', u'like',
+                u'object']
+    assert_equal(wa.analyze(text), expected)
+
 
 def test_word_analyzer_unigrams_and_bigrams():
     wa = WordNGramAnalyzer(min_n=1, max_n=2, stop_words=None)
@@ -127,6 +134,10 @@ def test_char_ngram_analyzer():
     expected = [u' yeste', u'yester', u'esterd', u'sterda', u'terday']
     assert_equal(cnga.analyze(text)[-5:], expected)
 
+    text = StringIO("This is a test with a file-like object!")
+    expected = [u'thi', u'his', u'is ', u's i', u' is']
+    assert_equal(cnga.analyze(text)[:5], expected)
+
 
 def test_countvectorizer_custom_vocabulary():
     what_we_like = ["pizza", "beer"]
@@ -224,6 +235,10 @@ def test_vectorizer():
     tfidf_test2 = toarray(tv.transform(test_data))
     assert_array_almost_equal(tfidf_test, tfidf_test2)
 
+    # test empty vocabulary
+    v3 = CountVectorizer(vocabulary=None)
+    assert_raises(ValueError, v3.transform, train_data)
+
 
 def test_vectorizer_max_features():
     vec_factories = (
diff --git a/scikits/learn/feature_extraction/text.py b/scikits/learn/feature_extraction/text.py
index 5872e446680f6..35f1aeeb3e1ce 100644
--- a/scikits/learn/feature_extraction/text.py
+++ b/scikits/learn/feature_extraction/text.py
@@ -148,9 +148,7 @@ def analyze(self, text_document):
             original_tokens = tokens
             tokens = []
             n_original_tokens = len(original_tokens)
-            for n in xrange(self.min_n, self.max_n + 1):
-                if n_original_tokens < n:
-                    continue
+            for n in xrange(self.min_n, min(self.max_n + 1, n_original_tokens)):
                 for i in xrange(n_original_tokens - n + 1):
                     tokens.append(u" ".join(original_tokens[i: i + n]))
 
@@ -196,9 +194,7 @@ def analyze(self, text_document):
 
         text_len = len(text_document)
         ngrams = []
-        for n in xrange(self.min_n, self.max_n + 1):
-            if text_len < n:
-                continue
+        for n in xrange(self.min_n, min(self.max_n + 1, text_len)):
             for i in xrange(text_len - n + 1):
                 ngrams.append(text_document[i: i + n])
         return ngrams

From 345eeaa9c978ad67daabefa84c2a53c33605574c Mon Sep 17 00:00:00 2001
From: Gilles Louppe <g.louppe@gmail.com>
Date: Thu, 11 Aug 2011 16:46:02 +0200
Subject: [PATCH 16/51] Fixed a small introduced due to a previous commit.

---
 scikits/learn/feature_extraction/text.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scikits/learn/feature_extraction/text.py b/scikits/learn/feature_extraction/text.py
index 35f1aeeb3e1ce..1f0d8fd3c0e24 100644
--- a/scikits/learn/feature_extraction/text.py
+++ b/scikits/learn/feature_extraction/text.py
@@ -148,7 +148,7 @@ def analyze(self, text_document):
             original_tokens = tokens
             tokens = []
             n_original_tokens = len(original_tokens)
-            for n in xrange(self.min_n, min(self.max_n + 1, n_original_tokens)):
+            for n in xrange(self.min_n, min(self.max_n + 1, n_original_tokens + 1)):
                 for i in xrange(n_original_tokens - n + 1):
                     tokens.append(u" ".join(original_tokens[i: i + n]))
 
@@ -194,7 +194,7 @@ def analyze(self, text_document):
 
         text_len = len(text_document)
         ngrams = []
-        for n in xrange(self.min_n, min(self.max_n + 1, text_len)):
+        for n in xrange(self.min_n, min(self.max_n + 1, text_len + 1)):
             for i in xrange(text_len - n + 1):
                 ngrams.append(text_document[i: i + n])
         return ngrams

From fd176ae17765560e594bb55c6fc098445ef0ec16 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Fri, 12 Aug 2011 14:56:24 +0200
Subject: [PATCH 17/51] Added a function as_float_array in scikits.learn.utils

This function converts a numpy array to dtype np.float (64 or 32, depending of the original type).
It also takes an optional argument overwrite_X.
Thus, _center_data no more takes overwrite_X as an argument, and now modify X inplace.
---
 scikits/learn/linear_model/base.py            | 21 ++++++++-----
 scikits/learn/linear_model/bayes.py           |  8 +++--
 .../learn/linear_model/coordinate_descent.py  | 15 +++++----
 scikits/learn/linear_model/least_angle.py     |  8 +++--
 scikits/learn/linear_model/omp.py             |  4 ++-
 scikits/learn/linear_model/ridge.py           | 11 ++++---
 scikits/learn/utils/__init__.py               | 31 +++++++++++++++++++
 7 files changed, 74 insertions(+), 24 deletions(-)

diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index 7cf8cefeee845..b90eb151a4373 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -18,6 +18,7 @@
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from ..utils.extmath import safe_sparse_dot
 from ..utils import safe_asanyarray
+from ..utils import as_float_array
 
 
 ###
@@ -48,25 +49,27 @@ def predict(self, X):
         return safe_sparse_dot(X, self.coef_.T) + self.intercept_
 
     @staticmethod
-    def _center_data(X, y, fit_intercept, normalize=False, overwrite_X=False):
+    def _center_data(X, y, fit_intercept, normalize=False):
         """
         Centers data to have mean zero along axis 0. This is here because
         nearly all linear models will want their data to be centered.
+
+        WARNING : This function modifies X inplace :
+            Use scikits.learn.utils.as_float_array before to convert X to np.float.
+            You can specify an argument overwrite_X (default is False).
         """
         if fit_intercept:
             if scipy.sparse.issparse(X):
                 X_mean = np.zeros(X.shape[1])
                 X_std = np.ones(X.shape[1])
             else:
-                if not overwrite_X:
-                    X = X.copy()
-
-                X_mean = X.mean(axis=0)
-                X = X - X_mean
+                X_mean = X.mean(axis=0, dtype=np.float)
+                X = X.astype(np.float)
+                X -= X_mean
                 if normalize:
                     X_std = np.sqrt(np.sum(X ** 2, axis=0))
                     X_std[X_std==0] = 1
-                    X = X / X_std
+                    X /= X_std
                 else:
                     X_std = np.ones(X.shape[1])
             y_mean = y.mean()
@@ -136,8 +139,10 @@ def fit(self, X, y, **params):
         X = np.asanyarray(X)
         y = np.asanyarray(y)
 
+        X = as_float_array(X, self.overwrite_X)
+
         X, y, X_mean, y_mean, X_std = self._center_data(X, y,
-                self.fit_intercept, self.normalize, self.overwrite_X)
+                self.fit_intercept, self.normalize)
 
         self.coef_, self.residues_, self.rank_, self.singular_ = \
                 np.linalg.lstsq(X, y)
diff --git a/scikits/learn/linear_model/bayes.py b/scikits/learn/linear_model/bayes.py
index 340c0908acc26..f51bb14b4c12f 100644
--- a/scikits/learn/linear_model/bayes.py
+++ b/scikits/learn/linear_model/bayes.py
@@ -10,6 +10,7 @@
 from scipy import linalg
 
 from .base import LinearModel
+from ..utils import as_float_array
 from ..utils.extmath import fast_logdet
 
 
@@ -147,8 +148,9 @@ def fit(self, X, y, **params):
         self._set_params(**params)
         X = np.asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
+        X = as_float_array(X, self.overwrite_X)
         X, y, X_mean, y_mean, X_std = self._center_data(X, y,
-                self.fit_intercept, self.normalize, self.overwrite_X)
+                self.fit_intercept, self.normalize)
         n_samples, n_features = X.shape
 
         ### Initialization of the values of the parameters
@@ -378,8 +380,10 @@ def fit(self, X, y, **params):
         n_samples, n_features = X.shape
         coef_ = np.zeros(n_features)
 
+        X = as_float_array(X, self.overwrite_X)
+
         X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
-                self.normalize, self.overwrite_X)
+                self.normalize)
 
         ### Launch the convergence loop
         keep_lambda = np.ones(n_features, dtype=bool)
diff --git a/scikits/learn/linear_model/coordinate_descent.py b/scikits/learn/linear_model/coordinate_descent.py
index a5d083af1f0a0..b4c976d07d460 100644
--- a/scikits/learn/linear_model/coordinate_descent.py
+++ b/scikits/learn/linear_model/coordinate_descent.py
@@ -8,6 +8,7 @@
 import numpy as np
 
 from .base import LinearModel
+from ..utils import as_float_array
 from ..cross_val import check_cv
 from . import cd_fast
 
@@ -121,9 +122,10 @@ def fit(self, X, y, Xy=None, coef_init=None, **params):
         self._set_params(**params)
         X = np.asanyarray(X, dtype=np.float64)
         y = np.asanyarray(y, dtype=np.float64)
+        X = as_float_array(X, self.overwrite_X)
 
-        X, y, Xmean, ymean, Xstd = self._center_data(X, y, self.fit_intercept,
-                self.normalize, self.overwrite_X)
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
+                self.normalize)
 
         if coef_init is None:
             self.coef_ = np.zeros(X.shape[1], dtype=np.float64)
@@ -157,7 +159,7 @@ def fit(self, X, y, Xy=None, coef_init=None, **params):
                     cd_fast.enet_coordinate_descent_gram(self.coef_, alpha,
                                 beta, Gram, Xy, y, self.max_iter, self.tol)
 
-        self._set_intercept(Xmean, ymean, Xstd)
+        self._set_intercept(X_mean, y_mean, X_std)
 
         if self.dual_gap_ > self.eps_:
             warnings.warn('Objective did not converge, you might want'
@@ -347,8 +349,9 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
     -----
     See examples/plot_lasso_coordinate_descent_path.py for an example.
     """
-    X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y,
-            fit_intercept, normalize, overwrite_X)
+    X = as_float_array(X, overwrite_X)
+    X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
+            fit_intercept, normalize)
     X = np.asfortranarray(X)  # make data contiguous in memory
 
     n_samples = X.shape[0]
@@ -374,7 +377,7 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
         model.fit(X, y, coef_init=coef_, **fit_params)
         if fit_intercept:
             model.fit_intercept = True
-            model._set_intercept(Xmean, ymean, Xstd)
+            model._set_intercept(X_mean, y_mean, X_std)
         if verbose:
             print model
         coef_ = model.coef_.copy()
diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index abbdf74df7faf..ade653090b3a8 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -13,7 +13,7 @@
 from scipy.linalg.lapack import get_lapack_funcs
 
 from .base import LinearModel
-from ..utils import arrayfuncs
+from ..utils import arrayfuncs, as_float_array
 from ..utils import deprecated
 from ..cross_val import check_cv
 from ..externals.joblib import Parallel, delayed
@@ -343,7 +343,7 @@ class Lars(LinearModel):
     --------
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.Lars(n_nonzero_coefs=1)
-    >>> clf.fit([[-1,1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS
+    >>> clf.fit([[-1,1], [0, 0], [1, 1]], [-1, 0, -1])  # doctest: +ELLIPSIS
     Lars(normalize=True, n_nonzero_coefs=1, verbose=False, fit_intercept=True,
        eps=2.2204460492503131e-16, precompute='auto', overwrite_X=False)
     >>> print clf.coef_
@@ -390,8 +390,10 @@ def fit(self, X, y, **params):
         X = np.atleast_2d(X)
         y = np.atleast_1d(y)
 
+        X = as_float_array(X, self.overwrite_X)
+
         X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
-                self.normalize, self.overwrite_X)
+            self.normalize)
         alpha = getattr(self, 'alpha', 0.)
         if hasattr(self, 'n_nonzero_coefs'):
             alpha = 0. # n_nonzero_coefs parametrization takes priority
diff --git a/scikits/learn/linear_model/omp.py b/scikits/learn/linear_model/omp.py
index 95e6fbe97caa0..36544c1c54bf7 100644
--- a/scikits/learn/linear_model/omp.py
+++ b/scikits/learn/linear_model/omp.py
@@ -13,6 +13,7 @@
 
 from .base import LinearModel
 from ..utils.arrayfuncs import solve_triangular
+from ..utils import as_float_array
 
 premature = """ Orthogonal matching pursuit ended prematurely due to linear
 dependence in the dictionary. The requested precision might not have been met.
@@ -503,9 +504,10 @@ def fit(self, X, y, Gram=None, Xy=None, **params):
         X = np.atleast_2d(X)
         y = np.atleast_1d(y)
         n_features = X.shape[1]
+        X = as_float_array(X, self.overwrite_X)
 
         X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
-                self.normalize, self.overwrite_X)
+                self.normalize)
 
         if self.n_nonzero_coefs == None and self.eps is None:
             self.n_nonzero_coefs = int(0.1 * n_features)
diff --git a/scikits/learn/linear_model/ridge.py b/scikits/learn/linear_model/ridge.py
index 4b4c30ec5ef6f..575033b325b75 100644
--- a/scikits/learn/linear_model/ridge.py
+++ b/scikits/learn/linear_model/ridge.py
@@ -9,7 +9,7 @@
 
 from .base import LinearModel
 from ..utils.extmath import safe_sparse_dot
-from ..utils import safe_asanyarray
+from ..utils import safe_asanyarray, as_float_array
 from ..preprocessing import LabelBinarizer
 from ..grid_search import GridSearchCV
 
@@ -206,10 +206,11 @@ def fit(self, X, y, sample_weight=1.0, solver='auto', **params):
 
         X = safe_asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
+        X = as_float_array(X, self.overwrite_X)
 
         X, y, X_mean, y_mean, X_std = \
            self._center_data(X, y, self.fit_intercept,
-                   self.normalize, self.overwrite_X)
+                   self.normalize)
 
         self.coef_ = ridge_regression(X, y, self.alpha, sample_weight,
                                       solver, self.tol)
@@ -336,13 +337,14 @@ class _RidgeGCV(LinearModel):
     http://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf
     """
 
-    def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True,
-                       normalize=False, score_func=None, loss_func=None):
+    def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True, normalize=False,
+            score_func=None, loss_func=None, overwrite_X=False):
         self.alphas = np.asanyarray(alphas)
         self.fit_intercept = fit_intercept
         self.normalize = normalize
         self.score_func = score_func
         self.loss_func = loss_func
+        self.overwrite_X = overwrite_X
 
     def _pre_compute(self, X, y):
         # even if X is very sparse, K is usually very dense
@@ -400,6 +402,7 @@ def fit(self, X, y, sample_weight=1.0):
         y = np.asanyarray(y, dtype=np.float)
 
         n_samples = X.shape[0]
+        X = as_float_array(X, self.overwrite_X)
 
         X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
                 self.fit_intercept, self.normalize)
diff --git a/scikits/learn/utils/__init__.py b/scikits/learn/utils/__init__.py
index ec31dc4e61da8..31b2e7ddc4d72 100644
--- a/scikits/learn/utils/__init__.py
+++ b/scikits/learn/utils/__init__.py
@@ -23,6 +23,37 @@ def safe_asanyarray(X, dtype=None, order=None):
     return X
 
 
+def as_float_array(X, overwrite_X=False):
+    """
+    Converts a numpy array to type np.float
+
+    The new dtype will be float32 or np.float64,
+    depending on the original type.
+    The function can create a copy or modify the argument depending
+    of the argument overwrite_X
+
+    Parameters
+    ----------
+    X : numpy array
+
+    overwrite_X :  bool, optional
+        if False, a copy of X will be created
+
+    Returns
+    -------
+    An array X of type np.float
+    """
+    if X.dtype == np.int32:
+        array_type = np.float32
+    else:
+        array_type = np.float64
+    if not overwrite_X:
+        X = X.astype(array_type)
+    else:
+        X = np.asanyarray(X, array_type)
+    return X
+
+
 def atleast2d_or_csr(X):
     """Like numpy.atleast_2d, but converts sparse matrices to CSR format"""
     X = X.tocsr() if sp.issparse(X) else np.atleast_2d(X)

From 0dd22e5f965426aaaeb4581dc61cb71093a44e46 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Fri, 12 Aug 2011 15:04:41 +0200
Subject: [PATCH 18/51] Fix : deleted a forgotten line

I had forgotten a none wanted line in the code, I removed it.
---
 scikits/learn/linear_model/base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index b90eb151a4373..90dd8d68db3dd 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -63,8 +63,7 @@ def _center_data(X, y, fit_intercept, normalize=False):
                 X_mean = np.zeros(X.shape[1])
                 X_std = np.ones(X.shape[1])
             else:
-                X_mean = X.mean(axis=0, dtype=np.float)
-                X = X.astype(np.float)
+                X_mean = X.mean(axis=0)
                 X -= X_mean
                 if normalize:
                     X_std = np.sqrt(np.sum(X ** 2, axis=0))

From 42a6ebb497d0c4d82aec728b97c175e5067f5c73 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 16 Aug 2011 20:02:55 +0200
Subject: [PATCH 19/51] DOC: section level fix in clustering doc

---
 doc/modules/clustering.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 7e63ce4677a08..b00616bfd6c15 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -394,7 +394,7 @@ homogeneous but not complete::
 
 
 Mathematical formulation
-------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 Homogeneity and completeness scores are formally given by:
 

From 1c123c62728870f1e39a4921d0714951741dd296 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Wed, 17 Aug 2011 18:41:05 +0200
Subject: [PATCH 20/51] FIX : corrected a bug in as_float_array and added a
 test function

The function was copying X even when overwrite_X was True.
---
 scikits/learn/utils/__init__.py            |  2 +-
 scikits/learn/utils/tests/test___init__.py | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 scikits/learn/utils/tests/test___init__.py

diff --git a/scikits/learn/utils/__init__.py b/scikits/learn/utils/__init__.py
index 31b2e7ddc4d72..2da931c8cf341 100644
--- a/scikits/learn/utils/__init__.py
+++ b/scikits/learn/utils/__init__.py
@@ -50,7 +50,7 @@ def as_float_array(X, overwrite_X=False):
     if not overwrite_X:
         X = X.astype(array_type)
     else:
-        X = np.asanyarray(X, array_type)
+        X.dtype = array_type
     return X
 
 
diff --git a/scikits/learn/utils/tests/test___init__.py b/scikits/learn/utils/tests/test___init__.py
new file mode 100644
index 0000000000000..a4a3f3de099f0
--- /dev/null
+++ b/scikits/learn/utils/tests/test___init__.py
@@ -0,0 +1,21 @@
+import numpy as np
+from .. import as_float_array
+
+def test_as_float_array():
+	"""
+	Test function for as_float_array
+	"""
+	# Checks that the return type is ok
+	X = np.ones((3, 10), dtype=np.int32)
+	X = X + np.arange(10, dtype=np.int32)
+	X2 = as_float_array(X, overwrite_X=True)
+	np.testing.assert_equal(X2.dtype, np.float32)
+	# Checking that the array was actually overwritten
+	np.testing.assert_equal(X.dtype, np.float32)
+	# Another test
+	X = X.astype(np.int64)
+	X2 = as_float_array(X, overwrite_X=False)
+	# Checking that the array wasn't overwritten
+	np.testing.assert_equal(X.dtype, np.int64)
+	# Checking that the new type is ok
+	np.testing.assert_equal(X2.dtype, np.float64)

From 468c5536a18e0bd21c3dbb0ea182622a9a1d2f86 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Wed, 17 Aug 2011 19:00:57 +0200
Subject: [PATCH 21/51] PEP8 : replaced tabulations by spaces

---
 scikits/learn/utils/tests/test___init__.py | 35 +++++++++++-----------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/scikits/learn/utils/tests/test___init__.py b/scikits/learn/utils/tests/test___init__.py
index a4a3f3de099f0..cc373806194c2 100644
--- a/scikits/learn/utils/tests/test___init__.py
+++ b/scikits/learn/utils/tests/test___init__.py
@@ -1,21 +1,22 @@
 import numpy as np
 from .. import as_float_array
 
+
 def test_as_float_array():
-	"""
-	Test function for as_float_array
-	"""
-	# Checks that the return type is ok
-	X = np.ones((3, 10), dtype=np.int32)
-	X = X + np.arange(10, dtype=np.int32)
-	X2 = as_float_array(X, overwrite_X=True)
-	np.testing.assert_equal(X2.dtype, np.float32)
-	# Checking that the array was actually overwritten
-	np.testing.assert_equal(X.dtype, np.float32)
-	# Another test
-	X = X.astype(np.int64)
-	X2 = as_float_array(X, overwrite_X=False)
-	# Checking that the array wasn't overwritten
-	np.testing.assert_equal(X.dtype, np.int64)
-	# Checking that the new type is ok
-	np.testing.assert_equal(X2.dtype, np.float64)
+    """
+    Test function for as_float_array
+    """
+    # Checks that the return type is ok
+    X = np.ones((3, 10), dtype=np.int32)
+    X = X + np.arange(10, dtype=np.int32)
+    X2 = as_float_array(X, overwrite_X=True)
+    np.testing.assert_equal(X2.dtype, np.float32)
+    # Checking that the array was actually overwritten
+    np.testing.assert_equal(X.dtype, np.float32)
+    # Another test
+    X = X.astype(np.int64)
+    X2 = as_float_array(X, overwrite_X=False)
+    # Checking that the array wasn't overwritten
+    np.testing.assert_equal(X.dtype, np.int64)
+    # Checking that the new type is ok
+    np.testing.assert_equal(X2.dtype, np.float64)

From ef553808bf467a28aa572769a9cf78db47838979 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Wed, 17 Aug 2011 22:10:42 +0200
Subject: [PATCH 22/51] FIX : if X is already of the good type, we musn't
 modify it       Added a verification in the test

---
 scikits/learn/utils/__init__.py            | 2 +-
 scikits/learn/utils/tests/test___init__.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scikits/learn/utils/__init__.py b/scikits/learn/utils/__init__.py
index 2da931c8cf341..310159784b6db 100644
--- a/scikits/learn/utils/__init__.py
+++ b/scikits/learn/utils/__init__.py
@@ -43,7 +43,7 @@ def as_float_array(X, overwrite_X=False):
     -------
     An array X of type np.float
     """
-    if X.dtype == np.int32:
+    if (X.dtype == np.int32) or (X.dtype == np.float32):
         array_type = np.float32
     else:
         array_type = np.float64
diff --git a/scikits/learn/utils/tests/test___init__.py b/scikits/learn/utils/tests/test___init__.py
index cc373806194c2..001ee284f4e34 100644
--- a/scikits/learn/utils/tests/test___init__.py
+++ b/scikits/learn/utils/tests/test___init__.py
@@ -6,17 +6,17 @@ def test_as_float_array():
     """
     Test function for as_float_array
     """
-    # Checks that the return type is ok
     X = np.ones((3, 10), dtype=np.int32)
     X = X + np.arange(10, dtype=np.int32)
+    # Checking that the array was actually overwritten
+    assert as_float_array(X, overwrite_X=True) is X
+    # Checks that the return type is ok
     X2 = as_float_array(X, overwrite_X=True)
     np.testing.assert_equal(X2.dtype, np.float32)
-    # Checking that the array was actually overwritten
-    np.testing.assert_equal(X.dtype, np.float32)
     # Another test
     X = X.astype(np.int64)
     X2 = as_float_array(X, overwrite_X=False)
     # Checking that the array wasn't overwritten
-    np.testing.assert_equal(X.dtype, np.int64)
+    assert as_float_array(X, False) is not X
     # Checking that the new type is ok
     np.testing.assert_equal(X2.dtype, np.float64)

From e77ce26b77839db8de539a359cfa27b4b49879f8 Mon Sep 17 00:00:00 2001
From: Mathieu Blondel <mathieu@mblondel.org>
Date: Sat, 20 Aug 2011 10:24:11 +0900
Subject: [PATCH 23/51] Typo.

Just trying out github's new web text editor (press "e" on the file you want to edit).
---
 scikits/learn/preprocessing/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scikits/learn/preprocessing/__init__.py b/scikits/learn/preprocessing/__init__.py
index 63606d8067bbe..323ae71c6eb21 100644
--- a/scikits/learn/preprocessing/__init__.py
+++ b/scikits/learn/preprocessing/__init__.py
@@ -19,7 +19,7 @@
 def _mean_and_std(X, axis=0, with_mean=True, with_std=True):
     """Compute mean and std dev for centering, scaling
 
-    Zero valued std components are reseted to 1.0 to avoid NaNs when scaling.
+    Zero valued std components are reset to 1.0 to avoid NaNs when scaling.
     """
     X = np.asanyarray(X)
     Xr = np.rollaxis(X, axis)

From a0e0e3a1f3c963b354539b53f66554500613a384 Mon Sep 17 00:00:00 2001
From: Emmanuelle Gouillart <emma@aleph.(none)>
Date: Sat, 20 Aug 2011 18:02:20 +0200
Subject: [PATCH 24/51] In spectral clustering example, forced the solver to be
 arpack (the amg solver is unstable for this example, and results are not
 satisfying)

---
 examples/cluster/plot_segmentation_toy.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py
index 8ae07f9c9b312..e810c25b2bfd4 100644
--- a/examples/cluster/plot_segmentation_toy.py
+++ b/examples/cluster/plot_segmentation_toy.py
@@ -68,7 +68,9 @@
 # dependant from the gradient the segmentation is close to a voronoi
 graph.data = np.exp(-graph.data/graph.data.std())
 
-labels = spectral_clustering(graph, k=4)
+# Force the solver to be arpack, since amg is numerically
+# unstable on this example
+labels = spectral_clustering(graph, k=4, mode='arpack')
 label_im = -np.ones(mask.shape)
 label_im[mask] = labels
 
@@ -86,7 +88,7 @@
 graph = image.img_to_graph(img, mask=mask)
 graph.data = np.exp(-graph.data/graph.data.std())
 
-labels = spectral_clustering(graph, k=2)
+labels = spectral_clustering(graph, k=2, mode='arpack')
 label_im = -np.ones(mask.shape)
 label_im[mask] = labels
 

From 097c3d23a0ce44466d9ecc5c8bc4416b90d53584 Mon Sep 17 00:00:00 2001
From: Gael varoquaux <gael.varoquaux@normalesup.org>
Date: Sat, 20 Aug 2011 21:34:33 +0200
Subject: [PATCH 25/51] ENH: More interesting benchmarks for OMP

---
 benchmarks/bench_plot_omp_lars.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py
index f731e2041c2b8..a3d725e4bc62b 100644
--- a/benchmarks/bench_plot_omp_lars.py
+++ b/benchmarks/bench_plot_omp_lars.py
@@ -98,8 +98,8 @@ def compute_bench(samples_range, features_range):
 
 
 if __name__ == '__main__':
-    samples_range = np.linspace(1000, 3000, 5).astype(np.int)
-    features_range = np.linspace(1000, 3000, 5).astype(np.int)
+    samples_range = np.linspace(1000, 5000, 5).astype(np.int)
+    features_range = np.linspace(1000, 5000, 5).astype(np.int)
     results = compute_bench(samples_range, features_range)
     max_time = max(np.max(t) for t in results.itervalues())
 
@@ -107,7 +107,8 @@ def compute_bench(samples_range, features_range):
     fig = pl.figure()
     for i, (label, timings) in enumerate(sorted(results.iteritems())):
         ax = fig.add_subplot(1, 2, i)
-        pl.matshow(timings, fignum=False, cmap='OrRd')
+        vmax = max(1 - timings.min(), -1 + timings.max())
+        pl.matshow(timings, fignum=False, vmin=1-vmax, vmax=1+vmax)
         ax.set_xticklabels([''] + map(str, samples_range))
         ax.set_yticklabels([''] + map(str, features_range))
         pl.xlabel('n_samples')

From fd3d95facf2ab59fc8e6a2e765b529a927b4d55e Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Sun, 21 Aug 2011 16:06:04 +0200
Subject: [PATCH 26/51] FIX : if X.dtype is changed, then a copy of X is
 returned, even if overwrite_X  is True

The test was updated
---
 scikits/learn/utils/__init__.py            | 18 +++++++++++-------
 scikits/learn/utils/tests/test___init__.py |  5 +++--
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/scikits/learn/utils/__init__.py b/scikits/learn/utils/__init__.py
index 310159784b6db..e5185aea904d2 100644
--- a/scikits/learn/utils/__init__.py
+++ b/scikits/learn/utils/__init__.py
@@ -31,6 +31,9 @@ def as_float_array(X, overwrite_X=False):
     depending on the original type.
     The function can create a copy or modify the argument depending
     of the argument overwrite_X
+    
+    WARNING : If X is not of type float, then a copy of X with the wright type
+              will be returned
 
     Parameters
     ----------
@@ -43,14 +46,15 @@ def as_float_array(X, overwrite_X=False):
     -------
     An array X of type np.float
     """
-    if (X.dtype == np.int32) or (X.dtype == np.float32):
-        array_type = np.float32
-    else:
-        array_type = np.float64
-    if not overwrite_X:
-        X = X.astype(array_type)
+    if X.dtype in [np.float32, np.float64]:
+        if overwrite_X:
+            return X
+        else:
+            return X.copy()
+    if X.dtype == np.int32:
+        X = X.astype(np.float32)
     else:
-        X.dtype = array_type
+        X = X.astype(np.float64)
     return X
 
 
diff --git a/scikits/learn/utils/tests/test___init__.py b/scikits/learn/utils/tests/test___init__.py
index 001ee284f4e34..fc0368ece2374 100644
--- a/scikits/learn/utils/tests/test___init__.py
+++ b/scikits/learn/utils/tests/test___init__.py
@@ -8,8 +8,6 @@ def test_as_float_array():
     """
     X = np.ones((3, 10), dtype=np.int32)
     X = X + np.arange(10, dtype=np.int32)
-    # Checking that the array was actually overwritten
-    assert as_float_array(X, overwrite_X=True) is X
     # Checks that the return type is ok
     X2 = as_float_array(X, overwrite_X=True)
     np.testing.assert_equal(X2.dtype, np.float32)
@@ -20,3 +18,6 @@ def test_as_float_array():
     assert as_float_array(X, False) is not X
     # Checking that the new type is ok
     np.testing.assert_equal(X2.dtype, np.float64)
+    # Here, X is of the right type, it shouldn't be modified
+    X = np.ones((3, 2), dtype=np.float32)
+    assert as_float_array(X, overwrite_X=True) is X

From 6c6289f2c61f1e6f26b552744ad323299699186e Mon Sep 17 00:00:00 2001
From: Gael varoquaux <gael.varoquaux@normalesup.org>
Date: Sun, 21 Aug 2011 18:05:08 +0200
Subject: [PATCH 27/51] API: eps -> tol in bayes

This is not a numerical error, but a tolerance in the optimization loop
---
 scikits/learn/linear_model/bayes.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/scikits/learn/linear_model/bayes.py b/scikits/learn/linear_model/bayes.py
index 4a263801c19e3..0fd1424ff979c 100644
--- a/scikits/learn/linear_model/bayes.py
+++ b/scikits/learn/linear_model/bayes.py
@@ -33,7 +33,7 @@ class BayesianRidge(LinearModel):
     n_iter : int, optional
         Maximum number of iterations.  Default is 300.
 
-    eps : float, optional
+    tol : float, optional
         Stop the algorithm if w has converged. Default is 1.e-3.
 
     alpha_1 : float, optional
@@ -92,7 +92,7 @@ class BayesianRidge(LinearModel):
     >>> clf = linear_model.BayesianRidge()
     >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
     BayesianRidge(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06,
-           fit_intercept=True, eps=0.001, alpha_2=1e-06, alpha_1=1e-06,
+           fit_intercept=True, alpha_2=1e-06, tol=0.001, alpha_1=1e-06,
            compute_score=False)
     >>> clf.predict([[1, 1]])
     array([ 1.])
@@ -102,11 +102,11 @@ class BayesianRidge(LinearModel):
     See examples/linear_model/plot_bayesian_ridge.py for an example.
     """
 
-    def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
+    def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
                 lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,
                 fit_intercept=True, verbose=False):
         self.n_iter = n_iter
-        self.eps = eps
+        self.tol = tol 
         self.alpha_1 = alpha_1
         self.alpha_2 = alpha_2
         self.lambda_1 = lambda_1
@@ -196,7 +196,7 @@ def fit(self, X, y, **params):
                 self.scores_.append(s)
 
             ### Check for convergence
-            if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.eps:
+            if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
                 if verbose:
                     print "Convergence after ", str(iter_), " iterations"
                 break
@@ -234,7 +234,7 @@ class ARDRegression(LinearModel):
     n_iter : int, optional
         Maximum number of iterations. Default is 300
 
-    eps : float, optional
+    tol : float, optional
         Stop the algorithm if w has converged. Default is 1.e-3.
 
     alpha_1 : float, optional
@@ -301,8 +301,8 @@ class ARDRegression(LinearModel):
     >>> clf = linear_model.ARDRegression()
     >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
     ARDRegression(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06,
-           fit_intercept=True, eps=0.001, threshold_lambda=10000.0,
-           alpha_2=1e-06, alpha_1=1e-06, compute_score=False)
+           fit_intercept=True, threshold_lambda=10000.0, alpha_2=1e-06,
+           tol=0.001, alpha_1=1e-06, compute_score=False)
     >>> clf.predict([[1, 1]])
     array([ 1.])
 
@@ -311,11 +311,11 @@ class ARDRegression(LinearModel):
     See examples/linear_model/plot_ard.py for an example.
     """
 
-    def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
+    def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
                   lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,
                   threshold_lambda=1.e+4, fit_intercept=True, verbose=False):
         self.n_iter = n_iter
-        self.eps = eps
+        self.tol = tol
         self.fit_intercept = fit_intercept
         self.alpha_1 = alpha_1
         self.alpha_2 = alpha_2
@@ -407,7 +407,7 @@ def fit(self, X, y, **params):
                 self.scores_.append(s)
 
             ### Check for convergence
-            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.eps:
+            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
                 if verbose:
                     print "Converged after %s iterations" % iter_
                 break

From d402deb79c205555824a5709f404f9492c2e506a Mon Sep 17 00:00:00 2001
From: vene <vlad@vene.ro>
Date: Sun, 21 Aug 2011 23:09:27 +0200
Subject: [PATCH 28/51] FIX: BayesianRidge doctest

---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index e718a52165d03..3fab5008aa43c 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -466,7 +466,7 @@ By default :math:`\alpha_1 = \alpha_2 =  \lambda_1 = \lambda_2 = 1.e^{-6}`, *i.e
     >>> clf = linear_model.BayesianRidge()
     >>> clf.fit (X, Y)
     BayesianRidge(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06,
-           fit_intercept=True, eps=0.001, alpha_2=1e-06, alpha_1=1e-06,
+           fit_intercept=True, alpha_2=1e-06, tol=0.001, alpha_1=1e-06,
            compute_score=False)
 
 After being fitted, the model can then be used to predict new values::

From ecee39a3b6de527ee13d202bcdec5a653e6c3b9d Mon Sep 17 00:00:00 2001
From: Tim Sheerman-Chase <ts00051@ts00051-desktop.(none)>
Date: Mon, 22 Aug 2011 10:42:50 +0100
Subject: [PATCH 29/51] FIX: Corrected NuSVR impl type and set epsilon to None

---
 scikits/learn/svm/classes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scikits/learn/svm/classes.py b/scikits/learn/svm/classes.py
index c2db5a0e2ff01..a62de3a6716b8 100644
--- a/scikits/learn/svm/classes.py
+++ b/scikits/learn/svm/classes.py
@@ -275,7 +275,7 @@ class SVR(BaseLibSVM, RegressorMixin):
         penalty parameter C of the error term.
 
     epsilon : float, optional (default=0.1)
-         epsilon in the epsilon-SVR model. It specifies the espilon-tube
+         epsilon in the epsilon-SVR model. It specifies the epsilon-tube
          within which no penalty is associated in the training loss function 
          with points predicted within a distance epsilon from the actual 
          value.
@@ -455,9 +455,9 @@ def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3,
                  gamma=0.0, coef0=0.0, shrinking=True,
                  probability=False, tol=1e-3):
 
-        BaseLibSVM.__init__(self, 'epsilon_svr', kernel, degree,
+        BaseLibSVM.__init__(self, 'nu_svr', kernel, degree,
                          gamma, coef0, tol, C, nu,
-                         0.0, shrinking, probability)
+                         None, shrinking, probability)
 
     def fit(self, X, y, sample_weight=None, **params):
         """

From 9e0b551c54c302f2fe30120967b07c14a4ac4619 Mon Sep 17 00:00:00 2001
From: Fabian Pedregosa <fabian.pedregosa@inria.fr>
Date: Tue, 23 Aug 2011 11:07:49 +0200
Subject: [PATCH 30/51] Refactor tests for SVR.

Guys, I'm back ...
---
 scikits/learn/svm/tests/test_svm.py | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/scikits/learn/svm/tests/test_svm.py b/scikits/learn/svm/tests/test_svm.py
index ded6936b71f97..2b032d000c5b4 100644
--- a/scikits/learn/svm/tests/test_svm.py
+++ b/scikits/learn/svm/tests/test_svm.py
@@ -140,29 +140,11 @@ def test_SVR():
     Test Support Vector Regression
     """
 
-    clf = svm.SVR(kernel='linear')
-    clf.fit(X, Y)
-    pred = clf.predict(T)
-
-    assert_array_almost_equal(clf.dual_coef_, [[-0.1, 0.1]])
-    assert_array_almost_equal(clf.coef_, [[0.2, 0.2]])
-    assert_array_almost_equal(clf.support_vectors_, [[-1, -1], [1, 1]])
-    assert_array_equal(clf.support_, [1, 3])
-    assert_array_almost_equal(clf.intercept_, [1.5])
-    assert_array_almost_equal(pred, [1.1, 2.3, 2.5])
-
-    # the same with kernel='rbf'
-    clf = svm.SVR(kernel='rbf')
-    clf.fit(X, Y)
-    pred = clf.predict(T)
-
-    assert_array_almost_equal(clf.dual_coef_,
-                              [[-0.014, -0.515, -0.013, 0.515, 0.013, 0.013]],
-                              decimal=3)
-    assert_raises(NotImplementedError, lambda: clf.coef_)
-    assert_array_almost_equal(clf.support_vectors_, X)
-    assert_array_almost_equal(clf.intercept_, [1.49997261])
-    assert_array_almost_equal(pred, [1.10001274, 1.86682485, 1.73300377])
+    diabetes = datasets.load_diabetes()
+    for clf in (svm.NuSVR(kernel='linear', nu=.4),
+                svm.SVR(kernel='linear', C=10.)):
+        clf.fit(diabetes.data, diabetes.target)
+        assert clf.score(diabetes.data, diabetes.target) > 0.02
 
 
 def test_oneclass():

From ba3e2dc76147d68c45a2db337c6f05ba14da4efb Mon Sep 17 00:00:00 2001
From: Fabian Pedregosa <fabian.pedregosa@inria.fr>
Date: Tue, 23 Aug 2011 11:08:56 +0200
Subject: [PATCH 31/51] Correct NuSVR impl in the sparse case.

---
 scikits/learn/svm/sparse/classes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scikits/learn/svm/sparse/classes.py b/scikits/learn/svm/sparse/classes.py
index 004b65430d196..cbd5697316848 100644
--- a/scikits/learn/svm/sparse/classes.py
+++ b/scikits/learn/svm/sparse/classes.py
@@ -142,7 +142,7 @@ def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3,
                  gamma=0.0, coef0=0.0, shrinking=True, epsilon=0.1,
                  probability=False, tol=1e-3):
 
-        SparseBaseLibSVM.__init__(self, 'epsilon_svr', kernel,
+        SparseBaseLibSVM.__init__(self, 'nu_svr', kernel,
                          degree, gamma, coef0, tol, C, nu,
                          epsilon, shrinking, probability)
 

From 9c9e03758148fcb556f31a0cfe6b4a4361627dbd Mon Sep 17 00:00:00 2001
From: Fabian Pedregosa <fabian.pedregosa@inria.fr>
Date: Tue, 23 Aug 2011 11:11:28 +0200
Subject: [PATCH 32/51] Add tests for last commit.

---
 scikits/learn/svm/tests/test_svm.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scikits/learn/svm/tests/test_svm.py b/scikits/learn/svm/tests/test_svm.py
index 2b032d000c5b4..6f200757c3c89 100644
--- a/scikits/learn/svm/tests/test_svm.py
+++ b/scikits/learn/svm/tests/test_svm.py
@@ -142,7 +142,9 @@ def test_SVR():
 
     diabetes = datasets.load_diabetes()
     for clf in (svm.NuSVR(kernel='linear', nu=.4),
-                svm.SVR(kernel='linear', C=10.)):
+                svm.SVR(kernel='linear', C=10.),
+                svm.sparse.NuSVR(kernel='linear', nu=.4),
+                svm.sparse.SVR(kernel='linear', C=10.)):
         clf.fit(diabetes.data, diabetes.target)
         assert clf.score(diabetes.data, diabetes.target) > 0.02
 

From e73dce95ed384e739d9b39477da66408eea661bf Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Tue, 23 Aug 2011 12:31:25 +0200
Subject: [PATCH 33/51] Test : lasso_lars_vs_lasso_*

Added a same test as the existing one, with normalization
---
 .../linear_model/tests/test_least_angle.py    | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/scikits/learn/linear_model/tests/test_least_angle.py b/scikits/learn/linear_model/tests/test_least_angle.py
index 5fa22f4c7f68a..f26871f27e444 100644
--- a/scikits/learn/linear_model/tests/test_least_angle.py
+++ b/scikits/learn/linear_model/tests/test_least_angle.py
@@ -118,6 +118,16 @@ def test_lasso_lars_vs_lasso_cd(verbose=False):
         err = np.linalg.norm(clf1.coef_ - clf2.coef_)
         assert err < 1e-3
 
+    # same test, with normalization
+    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
+    lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True)
+    for (c, a) in zip(lasso_path.T, alphas):
+        lasso_cd.alpha = a
+        lasso_cd.fit(X, y, tol=1e-8)
+        error = np.linalg.norm(c - lasso_cd.coef_)
+        assert error < 0.01
+
+
 
 def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
     """
@@ -135,6 +145,18 @@ def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
         error = np.linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
         assert error < 0.01
 
+    alphas_min = [10, 0.9, 1e-4]
+    # same test, with normalization
+    for alphas_min in alphas_min:
+        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
+                                                    alpha_min=0.9)
+        lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True)
+        lasso_cd.alpha = alphas[-1]
+        lasso_cd.fit(X, y, tol=1e-8)
+        error = np.linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
+        assert error < 0.01
+
+
 
 def test_lars_add_features(verbose=False):
     """

From 8674b017811132e7f17957ceee6e7fe71f2cc7f8 Mon Sep 17 00:00:00 2001
From: Lars Buitinck <L.J.Buitinck@uva.nl>
Date: Wed, 10 Aug 2011 13:57:21 +0200
Subject: [PATCH 34/51] ENH set_params method on BaseEstimator, deprecate
 estimator params to fit

Passing estimator parameters to fit leads to a situation where fit might
half-fail when it has set the parameters, but then fails to do input
validation.
---
 doc/developers/index.rst                      |  5 +++++
 examples/svm/plot_svm_anova.py                |  2 +-
 scikits/learn/base.py                         | 21 +++++++++++++++----
 scikits/learn/cluster/dbscan_.py              |  2 +-
 scikits/learn/cluster/k_means_.py             | 17 +++++++--------
 scikits/learn/cluster/tests/test_dbscan.py    | 10 ++++-----
 .../feature_selection/univariate_selection.py |  4 ++--
 scikits/learn/grid_search.py                  |  2 +-
 scikits/learn/tests/test_base.py              |  4 ++--
 scikits/learn/tests/test_cross_val.py         |  3 +--
 scikits/learn/tests/test_grid_search.py       |  7 +++----
 scikits/learn/tests/test_naive_bayes.py       |  3 ++-
 scikits/learn/tests/test_pipeline.py          |  6 +++---
 13 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/doc/developers/index.rst b/doc/developers/index.rst
index 23dd7dfff623c..9dd0d9b7bcc77 100644
--- a/doc/developers/index.rst
+++ b/doc/developers/index.rst
@@ -328,6 +328,11 @@ classifier or a regressor. All estimators implement the fit method::
 
     estimator.fit(X, y)
 
+All built-in estimators also have a ``set_params`` method, which sets
+data-independent parameters (overriding previous parameter values passed
+to ``__init__``). This method is not required for an object to be an
+estimator, but is used by the :class:`grid_search.GridSearchCV` class.
+
 
 Instantiation
 ^^^^^^^^^^^^^
diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py
index 6187afe801218..100cd0593f67c 100644
--- a/examples/svm/plot_svm_anova.py
+++ b/examples/svm/plot_svm_anova.py
@@ -40,7 +40,7 @@
 percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)
 
 for percentile in percentiles:
-    clf._set_params(anova__percentile=percentile)
+    clf.set_params(anova__percentile=percentile)
     # Compute cross-validation score using all CPUs
     this_scores = cross_val.cross_val_score(clf, X, y, n_jobs=1)
     score_means.append(this_scores.mean())
diff --git a/scikits/learn/base.py b/scikits/learn/base.py
index 255b995240c75..944fa82209129 100644
--- a/scikits/learn/base.py
+++ b/scikits/learn/base.py
@@ -6,8 +6,10 @@
 import inspect
 import numpy as np
 from scipy import sparse
+import warnings
 
 from .metrics import r2_score
+from .utils import deprecated
 
 
 ###############################################################################
@@ -172,13 +174,17 @@ def _get_params(self, deep=True):
             out[key] = value
         return out
 
-    def _set_params(self, **params):
+    def set_params(self, **params):
         """ Set the parameters of the estimator.
 
         The method works on simple estimators as well as on nested
         objects (such as pipelines). The former have parameters of the
-        form <component>__<parameter> so that the its possible to
-        update each component of the nested object.
+        form <component>__<parameter> so that it's possible to update
+        each component of a nested object.
+
+        Returns
+        -------
+        self
         """
         if not params:
             # Simple optimisation to gain speed (inspect is slow)
@@ -198,7 +204,7 @@ def _set_params(self, **params):
                     'sub parameter %s' %
                         (sub_name, self.__class__.__name__, sub_name)
                     )
-                sub_object._set_params(**{sub_name: value})
+                sub_object.set_params(**{sub_name: value})
             else:
                 # simple objects case
                 assert key in valid_params, ('Invalid parameter %s '
@@ -207,6 +213,13 @@ def _set_params(self, **params):
                 setattr(self, key, value)
         return self
 
+    def _set_params(self, **params):
+        if params != {}:
+            warnings.warn("Passing estimator parameters to fit is deprecated;"
+                          " use set_params instead",
+                          category=DeprecationWarning)
+        return self.set_params(**params)
+
     def __repr__(self):
         class_name = self.__class__.__name__
         return '%s(%s)' % (
diff --git a/scikits/learn/cluster/dbscan_.py b/scikits/learn/cluster/dbscan_.py
index 7ba2c0cc680a3..00b68b2d63f2b 100644
--- a/scikits/learn/cluster/dbscan_.py
+++ b/scikits/learn/cluster/dbscan_.py
@@ -188,7 +188,7 @@ def fit(self, X, **params):
             Overwrite keywords from __init__.
         """
 
-        self._set_params(**params)
+        self.set_params(**params)
         self.core_sample_indices_, self.labels_ = dbscan(X,
                                                          **self._get_params())
         self.components_ = X[self.core_sample_indices_].copy()
diff --git a/scikits/learn/cluster/k_means_.py b/scikits/learn/cluster/k_means_.py
index b91cee3329240..cc54f0e9222eb 100644
--- a/scikits/learn/cluster/k_means_.py
+++ b/scikits/learn/cluster/k_means_.py
@@ -482,25 +482,25 @@ def __init__(self, k=8, init='k-means++', n_init=10, max_iter=300,
         self.random_state = random_state
         self.copy_x = copy_x
 
-    def _check_data(self, X, **params):
-        """
-        Set parameters and check the sample given is larger than k
-        """
+    def _check_data(self, X):
+        """Verify that the number of samples given is larger than k"""
         if sp.issparse(X):
             raise ValueError("K-Means does not support sparse input matrices.")
         X = np.asanyarray(X)
         if X.shape[0] < self.k:
             raise ValueError("n_samples=%d should be larger than k=%d" % (
                 X.shape[0], self.k))
-        self._set_params(**params)
         return X
 
-    def fit(self, X, **params):
+    def fit(self, X, k=None, **params):
         """Compute k-means"""
 
         self.random_state = check_random_state(self.random_state)
 
         X = self._check_data(X, **params)
+        if k != None:
+            self.k = k
+        self._set_params(**params)
 
         self.cluster_centers_, self.labels_, self.inertia_ = k_means(
             X, k=self.k, init=self.init, n_init=self.n_init,
@@ -647,7 +647,7 @@ def __init__(self, k=8, init='random', max_iter=100,
         self.cluster_centers_ = None
         self.chunk_size = chunk_size
 
-    def fit(self, X, y=None, **params):
+    def fit(self, X, y=None):
         """
         Calculates the centroids on a batch X
 
@@ -656,7 +656,6 @@ def fit(self, X, y=None, **params):
         X: array-like, shape = [n_samples, n_features]
             Coordinates of the data points to cluster
         """
-        self._set_params(**params)
         self.random_state = check_random_state(self.random_state)
         X = check_arrays(X, sparse_format="csr", copy=False)[0]
         n_samples, n_features = X.shape
@@ -709,7 +708,7 @@ def fit(self, X, y=None, **params):
 
         return self
 
-    def partial_fit(self, X, y=None, **params):
+    def partial_fit(self, X, y=None):
         """Update k means estimate on a single mini-batch X.
 
         Parameters
diff --git a/scikits/learn/cluster/tests/test_dbscan.py b/scikits/learn/cluster/tests/test_dbscan.py
index 8b05d98f3cd9b..7c36e710e7cd2 100644
--- a/scikits/learn/cluster/tests/test_dbscan.py
+++ b/scikits/learn/cluster/tests/test_dbscan.py
@@ -55,9 +55,8 @@ def test_dbscan_feature():
     n_clusters_1 = len(set(labels)) - int(-1 in labels)
     assert_equal(n_clusters_1, n_clusters)
 
-    db = DBSCAN()
-    labels = db.fit(X, metric=metric,
-                    eps=eps, min_samples=min_samples).labels_
+    db = DBSCAN(metric=metric)
+    labels = db.fit(X, eps=eps, min_samples=min_samples).labels_
 
     n_clusters_2 = len(set(labels)) - int(-1 in labels)
     assert_equal(n_clusters_2, n_clusters)
@@ -80,9 +79,8 @@ def test_dbscan_callable():
     n_clusters_1 = len(set(labels)) - int(-1 in labels)
     assert_equal(n_clusters_1, n_clusters)
 
-    db = DBSCAN()
-    labels = db.fit(X, metric=metric,
-                    eps=eps, min_samples=min_samples).labels_
+    db = DBSCAN(metric=metric)
+    labels = db.fit(X, eps=eps, min_samples=min_samples).labels_
 
     n_clusters_2 = len(set(labels)) - int(-1 in labels)
     assert_equal(n_clusters_2, n_clusters)
diff --git a/scikits/learn/feature_selection/univariate_selection.py b/scikits/learn/feature_selection/univariate_selection.py
index 534e15398a85c..8306cc651b8e3 100644
--- a/scikits/learn/feature_selection/univariate_selection.py
+++ b/scikits/learn/feature_selection/univariate_selection.py
@@ -446,9 +446,9 @@ def _get_support_mask(self):
         selector = self._selection_modes[self.mode](lambda x: x)
         selector._pvalues = self._pvalues
         selector._scores = self._scores
-        # Now make some acrobaties to set the right named parameter in
+        # Now perform some acrobatics to set the right named parameter in
         # the selector
         possible_params = selector._get_param_names()
         possible_params.remove('score_func')
-        selector._set_params(**{possible_params[0]: self.param})
+        selector.set_params(**{possible_params[0]: self.param})
         return selector._get_support_mask()
diff --git a/scikits/learn/grid_search.py b/scikits/learn/grid_search.py
index 9056996b762e4..9f9f87bbb090e 100644
--- a/scikits/learn/grid_search.py
+++ b/scikits/learn/grid_search.py
@@ -69,7 +69,7 @@ def fit_grid_point(X, y, base_clf, clf_params, train, test, loss_func,
         print "[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.')
     # update parameters of the classifier after a copy of its base structure
     clf = copy.deepcopy(base_clf)
-    clf._set_params(**clf_params)
+    clf.set_params(**clf_params)
 
     if isinstance(X, list) or isinstance(X, tuple):
         X_train = [X[i] for i, cond in enumerate(train) if cond]
diff --git a/scikits/learn/tests/test_base.py b/scikits/learn/tests/test_base.py
index 53a422f5cbc0c..4d1fb4ab572b3 100644
--- a/scikits/learn/tests/test_base.py
+++ b/scikits/learn/tests/test_base.py
@@ -93,9 +93,9 @@ def test_get_params():
     assert_true('a__d' in test._get_params(deep=True))
     assert_true('a__d' not in test._get_params(deep=False))
 
-    test._set_params(a__d=2)
+    test.set_params(a__d=2)
     assert test.a.d == 2
-    assert_raises(AssertionError, test._set_params, a__a=2)
+    assert_raises(AssertionError, test.set_params, a__a=2)
 
 
 def test_is_classifier():
diff --git a/scikits/learn/tests/test_cross_val.py b/scikits/learn/tests/test_cross_val.py
index c849ae1328748..c130d4178825d 100644
--- a/scikits/learn/tests/test_cross_val.py
+++ b/scikits/learn/tests/test_cross_val.py
@@ -23,8 +23,7 @@ class MockClassifier(BaseEstimator):
     def __init__(self, a=0):
         self.a = a
 
-    def fit(self, X, Y, **params):
-        self._set_params(**params)
+    def fit(self, X, Y):
         return self
 
     def predict(self, T):
diff --git a/scikits/learn/tests/test_grid_search.py b/scikits/learn/tests/test_grid_search.py
index e52e06ba2a082..83e18ab46766d 100644
--- a/scikits/learn/tests/test_grid_search.py
+++ b/scikits/learn/tests/test_grid_search.py
@@ -21,8 +21,7 @@ class MockClassifier(BaseEstimator):
     def __init__(self, foo_param=0):
         self.foo_param = foo_param
 
-    def fit(self, X, Y, **params):
-        self._set_params(**params)
+    def fit(self, X, Y):
         return self
 
     def predict(self, T):
@@ -88,7 +87,7 @@ def test_grid_search_sparse_score_func():
     clf = LinearSVC()
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
     # XXX: set refit to False due to a random bug when True (default)
-    cv.fit(X_[:180], y_[:180], refit=False)
+    cv.set_params(refit=False).fit(X_[:180], y_[:180])
     y_pred = cv.predict(X_[180:])
     C = cv.best_estimator.C
 
@@ -96,7 +95,7 @@ def test_grid_search_sparse_score_func():
     clf = SparseLinearSVC()
     cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
     # XXX: set refit to False due to a random bug when True (default)
-    cv.fit(X_[:180], y_[:180], refit=False)
+    cv.set_params(refit=False).fit(X_[:180], y_[:180])
     y_pred2 = cv.predict(X_[180:])
     C2 = cv.best_estimator.C
 
diff --git a/scikits/learn/tests/test_naive_bayes.py b/scikits/learn/tests/test_naive_bayes.py
index 81321931d1122..730b7e5866ab2 100644
--- a/scikits/learn/tests/test_naive_bayes.py
+++ b/scikits/learn/tests/test_naive_bayes.py
@@ -123,7 +123,8 @@ def test_discretenb_uniform_prior():
        when fit_prior=False and class_prior=None"""
 
     for cls in [BernoulliNB, MultinomialNB]:
-        clf = cls(fit_prior=False)
+        clf = cls()
+        clf.set_params(fit_prior=False)
         clf.fit([[0], [0], [1]], [0, 0, 1])
         prior = np.exp(clf.class_log_prior_)
         assert prior[0] == prior[1]
diff --git a/scikits/learn/tests/test_pipeline.py b/scikits/learn/tests/test_pipeline.py
index 98911a44c0a6b..42e1f7f78fa1e 100644
--- a/scikits/learn/tests/test_pipeline.py
+++ b/scikits/learn/tests/test_pipeline.py
@@ -44,7 +44,7 @@ def test_pipeline_init():
                  dict(svc__a=None, svc__b=None, svc=clf))
 
     # Check that params are set
-    pipe._set_params(svc__a=0.1)
+    pipe.set_params(svc__a=0.1)
     assert_equal(clf.a, 0.1)
     # Smoke test the repr:
     repr(pipe)
@@ -55,13 +55,13 @@ def test_pipeline_init():
     pipe = Pipeline([('anova', filter1), ('svc', clf)])
 
     # Check that params are set
-    pipe._set_params(svc__C=0.1)
+    pipe.set_params(svc__C=0.1)
     assert_equal(clf.C, 0.1)
     # Smoke test the repr:
     repr(pipe)
 
     # Check that params are not set when naming them wrong
-    assert_raises(AssertionError, pipe._set_params, anova__C=0.1)
+    assert_raises(AssertionError, pipe.set_params, anova__C=0.1)
 
     # Test clone
     pipe2 = clone(pipe)

From 34aee3387b13a212c2e98fca565cc9826830566f Mon Sep 17 00:00:00 2001
From: Lars Buitinck <L.J.Buitinck@uva.nl>
Date: Wed, 10 Aug 2011 16:00:34 +0200
Subject: [PATCH 35/51] set_params: update according to @GaelVaroquaux's review

---
 doc/developers/index.rst          | 2 +-
 scikits/learn/cluster/k_means_.py | 4 ++--
 scikits/learn/grid_search.py      | 2 ++
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/doc/developers/index.rst b/doc/developers/index.rst
index 9dd0d9b7bcc77..c2fb3caceb7ee 100644
--- a/doc/developers/index.rst
+++ b/doc/developers/index.rst
@@ -331,7 +331,7 @@ classifier or a regressor. All estimators implement the fit method::
 All built-in estimators also have a ``set_params`` method, which sets
 data-independent parameters (overriding previous parameter values passed
 to ``__init__``). This method is not required for an object to be an
-estimator, but is used by the :class:`grid_search.GridSearchCV` class.
+estimator.
 
 
 Instantiation
diff --git a/scikits/learn/cluster/k_means_.py b/scikits/learn/cluster/k_means_.py
index cc54f0e9222eb..dcf466afe02ad 100644
--- a/scikits/learn/cluster/k_means_.py
+++ b/scikits/learn/cluster/k_means_.py
@@ -488,7 +488,7 @@ def _check_data(self, X):
             raise ValueError("K-Means does not support sparse input matrices.")
         X = np.asanyarray(X)
         if X.shape[0] < self.k:
-            raise ValueError("n_samples=%d should be larger than k=%d" % (
+            raise ValueError("n_samples=%d should be >= k=%d" % (
                 X.shape[0], self.k))
         return X
 
@@ -497,7 +497,7 @@ def fit(self, X, k=None, **params):
 
         self.random_state = check_random_state(self.random_state)
 
-        X = self._check_data(X, **params)
+        X = self._check_data(X)
         if k != None:
             self.k = k
         self._set_params(**params)
diff --git a/scikits/learn/grid_search.py b/scikits/learn/grid_search.py
index 9f9f87bbb090e..9c45af853b103 100644
--- a/scikits/learn/grid_search.py
+++ b/scikits/learn/grid_search.py
@@ -67,7 +67,9 @@ def fit_grid_point(X, y, base_clf, clf_params, train, test, loss_func,
         msg = '%s' % (', '.join('%s=%s' % (k, v)
                                      for k, v in clf_params.iteritems()))
         print "[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.')
+
     # update parameters of the classifier after a copy of its base structure
+    # FIXME we should be doing a clone here
     clf = copy.deepcopy(base_clf)
     clf.set_params(**clf_params)
 

From 392cbe41f2954d409562886fca6c6ced7fdd685c Mon Sep 17 00:00:00 2001
From: Lars Buitinck <L.J.Buitinck@uva.nl>
Date: Fri, 12 Aug 2011 14:30:37 +0200
Subject: [PATCH 36/51] Rm k param from KMeans.fit again

Two tests fail mysteriously...
---
 scikits/learn/cluster/k_means_.py           |  4 +-
 scikits/learn/cluster/tests/test_k_means.py | 44 ++++++++++-----------
 2 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/scikits/learn/cluster/k_means_.py b/scikits/learn/cluster/k_means_.py
index dcf466afe02ad..a4e52331bb720 100644
--- a/scikits/learn/cluster/k_means_.py
+++ b/scikits/learn/cluster/k_means_.py
@@ -492,14 +492,12 @@ def _check_data(self, X):
                 X.shape[0], self.k))
         return X
 
-    def fit(self, X, k=None, **params):
+    def fit(self, X, **params):
         """Compute k-means"""
 
         self.random_state = check_random_state(self.random_state)
 
         X = self._check_data(X)
-        if k != None:
-            self.k = k
         self._set_params(**params)
 
         self.cluster_centers_, self.labels_, self.inertia_ = k_means(
diff --git a/scikits/learn/cluster/tests/test_k_means.py b/scikits/learn/cluster/tests/test_k_means.py
index 8144a0536f5fd..66cbab694a826 100644
--- a/scikits/learn/cluster/tests/test_k_means.py
+++ b/scikits/learn/cluster/tests/test_k_means.py
@@ -18,7 +18,7 @@
 
 def test_k_means_pp_init():
     np.random.seed(1)
-    k_means = KMeans(init="k-means++").fit(X, k=n_clusters)
+    k_means = KMeans(init="k-means++", k=n_clusters).fit(X)
 
     centers = k_means.cluster_centers_
     assert_equal(centers.shape, (n_clusters, 2))
@@ -30,7 +30,7 @@ def test_k_means_pp_init():
     assert_equal(np.unique(labels[40:]).size, 1)
 
     # check error on dataset being too small
-    assert_raises(ValueError, k_means.fit, [[0., 1.]], k=n_clusters)
+    assert_raises(ValueError, k_means.fit, [[0., 1.]])
 
 
 def test_mini_batch_k_means_pp_init():
@@ -61,7 +61,7 @@ def test_sparse_mini_batch_k_means_pp_init():
 
 def test_k_means_pp_random_init():
     np.random.seed(1)
-    k_means = KMeans(init="random").fit(X, k=n_clusters)
+    k_means = KMeans(init="random", k=n_clusters).fit(X)
 
     centers = k_means.cluster_centers_
     assert_equal(centers.shape, (n_clusters, 2))
@@ -73,13 +73,13 @@ def test_k_means_pp_random_init():
     assert_equal(np.unique(labels[40:]).size, 1)
 
     # check error on dataset being too small
-    assert_raises(ValueError, k_means.fit, [[0., 1.]], k=n_clusters)
+    assert_raises(ValueError, k_means.fit, [[0., 1.]])
 
 
 def test_k_means_fixed_array_init():
     np.random.seed(1)
     init_array = np.vstack([X[5], X[25], X[45]])
-    k_means = KMeans(init=init_array, n_init=1).fit(X, k=n_clusters)
+    k_means = KMeans(init=init_array, n_init=1, k=n_clusters).fit(X)
 
     centers = k_means.cluster_centers_
     assert_equal(centers.shape, (n_clusters, 2))
@@ -91,20 +91,20 @@ def test_k_means_fixed_array_init():
     assert_equal(np.unique(labels[40:]).size, 1)
 
     # check error on dataset being too small
-    assert_raises(ValueError, k_means.fit, [[0., 1.]], k=n_clusters)
+    assert_raises(ValueError, k_means.fit, [[0., 1.]])
 
 
 def test_k_means_invalid_init():
     np.random.seed(1)
-    k_means = KMeans(init="invalid", n_init=1)
-    assert_raises(ValueError, k_means.fit, X, k=n_clusters)
+    k_means = KMeans(init="invalid", n_init=1, k=n_clusters)
+    assert_raises(ValueError, k_means.fit, X)
 
 
 def test_k_means_copyx():
     """Check if copy_x=False returns nearly equal X after de-centering."""
     np.random.seed(1)
     my_X = X.copy()
-    k_means = KMeans(copy_x=False).fit(my_X, k=n_clusters)
+    k_means = KMeans(copy_x=False, k=n_clusters).fit(my_X)
     centers = k_means.cluster_centers_
     assert_equal(centers.shape, (n_clusters, 2))
 
@@ -123,7 +123,7 @@ def test_k_means_singleton():
     np.random.seed(1)
     my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 0.9]])
     array_init = np.array([[1.0, 1.0], [5.0, 5.0]])
-    k_means = KMeans(init=array_init).fit(my_X, k=2)
+    k_means = KMeans(init=array_init, k=2).fit(my_X)
 
     # must be singleton clustering
     assert_equal(np.unique(k_means.labels_).size, 1)
@@ -132,7 +132,7 @@ def test_k_means_singleton():
 def test_mbk_means_fixed_array_init():
     np.random.seed(1)
     init_array = np.vstack([X[5], X[25], X[45]])
-    mbk_means = MiniBatchKMeans(init=init_array).fit(X)
+    mbk_means = MiniBatchKMeans(init=init_array, k=n_clusters).fit(X)
 
     centers = mbk_means.cluster_centers_
     assert_equal(centers.shape, (n_clusters, 2))
@@ -140,7 +140,7 @@ def test_mbk_means_fixed_array_init():
     labels = mbk_means.labels_
     assert_equal(np.unique(labels).size, 3)
 
-    assert_raises(ValueError, mbk_means.fit, [[0., 1.]], k=n_clusters)
+    assert_raises(ValueError, mbk_means.fit, [[0., 1.]])
 
 
 def test_sparse_mbk_means_fixed_array_init():
@@ -154,13 +154,13 @@ def test_sparse_mbk_means_fixed_array_init():
     labels = mbk_means.labels_
     assert_equal(np.unique(labels).size, 3)
 
-    assert_raises(ValueError, mbk_means.fit, [[0., 1.]], k=n_clusters)
+    assert_raises(ValueError, mbk_means.fit, [[0., 1.]])
 
 
 def test_sparse_mbk_means_pp_init():
     np.random.seed(1)
-    mbk_means = MiniBatchKMeans(init="k-means++")
-    assert_raises(ValueError, mbk_means.fit, S, k=n_clusters)
+    mbk_means = MiniBatchKMeans(init="k-means++", k=n_clusters)
+    assert_raises(ValueError, mbk_means.fit, S)
 
 
 def test_sparse_mbk_means_callable_init():
@@ -176,17 +176,17 @@ def test_init(Xbar, k, random_state):
     labels = mbk_means.labels_
     assert_equal(np.unique(labels).size, 3)
 
-    assert_raises(ValueError, mbk_means.fit, [[0., 1.]], k=n_clusters)
+    assert_raises(ValueError, mbk_means.fit, [[0., 1.]])
 
 
 def test_k_means_fixed_array_init_fit():
     np.random.seed(1)
     init_array = np.vstack([X[5], X[25], X[45]])
-    k_means = KMeans(init=init_array, n_init=1).fit(X, k=n_clusters)
+    k_means = KMeans(init=init_array, n_init=1, k=n_clusters).fit(X)
 
     another_init_array = np.vstack([X[1], X[30], X[50]])
-    other_k_means = KMeans(init=init_array, n_init=1)
-    other_k_means.fit(X, init=another_init_array, k=n_clusters)
+    other_k_means = KMeans(init=init_array, n_init=1, k=n_clusters)
+    other_k_means.fit(X, init=another_init_array)
     assert_true(not np.allclose(k_means.init, other_k_means.init),
                 "init attributes must be different")
 
@@ -194,11 +194,11 @@ def test_k_means_fixed_array_init_fit():
 def test_mbkm_fixed_array_init_fit():
     np.random.seed(1)
     init_array = np.vstack([X[5], X[25], X[45]])
-    k_means = MiniBatchKMeans(init=init_array).fit(X, k=n_clusters)
+    k_means = MiniBatchKMeans(init=init_array, k=n_clusters).fit(X)
 
     another_init_array = np.vstack([X[1], X[30], X[50]])
-    other_k_means = MiniBatchKMeans(init=init_array)
-    other_k_means.fit(X, init=another_init_array, k=n_clusters)
+    other_k_means = MiniBatchKMeans(init=another_init_array, k=n_clusters)
+    other_k_means.fit(X)
     assert_true(not np.allclose(k_means.init, other_k_means.init),
                 "init attributes must be different")
 

From ce9814bbe88edd9d5ef1b1d003d5e05656490a34 Mon Sep 17 00:00:00 2001
From: Fabian Pedregosa <fabian.pedregosa@inria.fr>
Date: Tue, 23 Aug 2011 15:21:05 +0200
Subject: [PATCH 37/51] Remove fit params from all objects.

---
 .../learn/cluster/affinity_propagation_.py    |  3 +-
 scikits/learn/cluster/hierarchical.py         |  4 +--
 scikits/learn/cluster/k_means_.py             |  3 +-
 scikits/learn/cluster/mean_shift_.py          |  3 +-
 scikits/learn/cluster/spectral.py             |  3 +-
 scikits/learn/cluster/tests/test_k_means.py   |  3 +-
 scikits/learn/cluster/tests/test_spectral.py  |  2 +-
 .../learn/covariance/empirical_covariance_.py |  3 +-
 .../learn/covariance/shrunk_covariance_.py    |  3 +-
 scikits/learn/decomposition/fastica_.py       |  3 +-
 scikits/learn/decomposition/kernel_pca.py     |  3 +-
 scikits/learn/decomposition/nmf.py            |  3 +-
 scikits/learn/decomposition/pca.py            |  6 ++--
 scikits/learn/decomposition/sparse_pca.py     | 11 +++----
 .../feature_selection/univariate_selection.py |  9 ++----
 scikits/learn/lda.py                          |  3 +-
 scikits/learn/linear_model/base.py            |  9 ++----
 scikits/learn/linear_model/bayes.py           |  6 ++--
 .../learn/linear_model/coordinate_descent.py  | 12 +++----
 scikits/learn/linear_model/least_angle.py     | 10 ++----
 scikits/learn/linear_model/omp.py             |  4 +--
 scikits/learn/linear_model/ridge.py           | 14 +++------
 .../linear_model/sparse/coordinate_descent.py | 17 +++++-----
 .../sparse/tests/test_coordinate_descent.py   | 27 +++++++++-------
 .../tests/test_coordinate_descent.py          | 31 +++++++++++--------
 .../linear_model/tests/test_least_angle.py    | 10 +++---
 .../learn/linear_model/tests/test_ridge.py    | 15 ++++-----
 scikits/learn/manifold/isomap.py              |  6 ++--
 scikits/learn/manifold/locally_linear.py      |  9 ++----
 .../manifold/tests/test_locally_linear.py     |  8 +++--
 scikits/learn/neighbors.py                    | 20 +++++-------
 scikits/learn/pls.py                          | 18 +++++------
 scikits/learn/qda.py                          |  3 +-
 scikits/learn/svm/base.py                     |  7 ++---
 scikits/learn/svm/sparse/base.py              |  7 ++---
 scikits/learn/tests/test_neighbors.py         | 15 ++++-----
 scikits/learn/tests/test_pls.py               | 20 ++++++------
 37 files changed, 148 insertions(+), 185 deletions(-)

diff --git a/scikits/learn/cluster/affinity_propagation_.py b/scikits/learn/cluster/affinity_propagation_.py
index 6db00c3cc07b1..a96d70d25eee9 100644
--- a/scikits/learn/cluster/affinity_propagation_.py
+++ b/scikits/learn/cluster/affinity_propagation_.py
@@ -210,7 +210,7 @@ def __init__(self, damping=.5, max_iter=200, convit=30, copy=True):
         self.convit = convit
         self.copy = copy
 
-    def fit(self, S, p=None, **params):
+    def fit(self, S, p=None):
         """compute MeanShift
 
         Parameters
@@ -227,7 +227,6 @@ def fit(self, S, p=None, **params):
             algorithm, for memory efficiency
 
         """
-        self._set_params(**params)
         self.cluster_centers_indices_, self.labels_ = affinity_propagation(S,
                                 p, max_iter=self.max_iter, convit=self.convit,
                                 damping=self.damping,
diff --git a/scikits/learn/cluster/hierarchical.py b/scikits/learn/cluster/hierarchical.py
index 5451fd49aff1f..ba98de6d262e8 100644
--- a/scikits/learn/cluster/hierarchical.py
+++ b/scikits/learn/cluster/hierarchical.py
@@ -292,7 +292,7 @@ def __init__(self, n_clusters=2, memory=Memory(cachedir=None, verbose=0),
         self.n_components = n_components
         self.connectivity = connectivity
 
-    def fit(self, X, **params):
+    def fit(self, X):
         """Fit the hierarchical clustering on the data
 
         Parameters
@@ -304,8 +304,6 @@ def fit(self, X, **params):
         -------
         self
         """
-        self._set_params(**params)
-
         memory = self.memory
         if isinstance(memory, basestring):
             memory = Memory(cachedir=memory)
diff --git a/scikits/learn/cluster/k_means_.py b/scikits/learn/cluster/k_means_.py
index a4e52331bb720..c1f3b7b908012 100644
--- a/scikits/learn/cluster/k_means_.py
+++ b/scikits/learn/cluster/k_means_.py
@@ -492,13 +492,12 @@ def _check_data(self, X):
                 X.shape[0], self.k))
         return X
 
-    def fit(self, X, **params):
+    def fit(self, X):
         """Compute k-means"""
 
         self.random_state = check_random_state(self.random_state)
 
         X = self._check_data(X)
-        self._set_params(**params)
 
         self.cluster_centers_, self.labels_, self.inertia_ = k_means(
             X, k=self.k, init=self.init, n_init=self.n_init,
diff --git a/scikits/learn/cluster/mean_shift_.py b/scikits/learn/cluster/mean_shift_.py
index 9f554487a795c..7e2edbd177d22 100644
--- a/scikits/learn/cluster/mean_shift_.py
+++ b/scikits/learn/cluster/mean_shift_.py
@@ -191,7 +191,7 @@ class MeanShift(BaseEstimator):
     def __init__(self, bandwidth=None):
         self.bandwidth = bandwidth
 
-    def fit(self, X, **params):
+    def fit(self, X):
         """ Compute MeanShift
 
             Parameters
@@ -200,6 +200,5 @@ def fit(self, X, **params):
                 Input points
 
         """
-        self._set_params(**params)
         self.cluster_centers_, self.labels_ = mean_shift(X, self.bandwidth)
         return self
diff --git a/scikits/learn/cluster/spectral.py b/scikits/learn/cluster/spectral.py
index 82a4a01ca992b..20b75e8da6753 100644
--- a/scikits/learn/cluster/spectral.py
+++ b/scikits/learn/cluster/spectral.py
@@ -252,7 +252,7 @@ def __init__(self, k=8, mode=None, random_state=None):
         self.mode = mode
         self.random_state = random_state
 
-    def fit(self, X, **params):
+    def fit(self, X):
         """Compute the spectral clustering from the affinity matrix
 
         Parameters
@@ -281,7 +281,6 @@ def fit(self, X, **params):
         speeds up computation.
         """
         self.random_state = check_random_state(self.random_state)
-        self._set_params(**params)
         self.labels_ = spectral_clustering(X, k=self.k, mode=self.mode,
                                            random_state=self.random_state)
         return self
diff --git a/scikits/learn/cluster/tests/test_k_means.py b/scikits/learn/cluster/tests/test_k_means.py
index 66cbab694a826..4b37c6c3620cc 100644
--- a/scikits/learn/cluster/tests/test_k_means.py
+++ b/scikits/learn/cluster/tests/test_k_means.py
@@ -186,7 +186,8 @@ def test_k_means_fixed_array_init_fit():
 
     another_init_array = np.vstack([X[1], X[30], X[50]])
     other_k_means = KMeans(init=init_array, n_init=1, k=n_clusters)
-    other_k_means.fit(X, init=another_init_array)
+    other_k_means.set_params(init=another_init_array)
+    other_k_means.fit(X)
     assert_true(not np.allclose(k_means.init, other_k_means.init),
                 "init attributes must be different")
 
diff --git a/scikits/learn/cluster/tests/test_spectral.py b/scikits/learn/cluster/tests/test_spectral.py
index 3846f53ad293b..a2663160c760d 100644
--- a/scikits/learn/cluster/tests/test_spectral.py
+++ b/scikits/learn/cluster/tests/test_spectral.py
@@ -21,7 +21,7 @@ def test_spectral_clustering():
                  ])
 
     for mat in (S, sparse.csr_matrix(S)):
-        model = SpectralClustering(random_state=0).fit(mat, k=2)
+        model = SpectralClustering(random_state=0, k=2).fit(mat)
         labels = model.labels_
         if labels[0] == 0:
             labels = 1 - labels
diff --git a/scikits/learn/covariance/empirical_covariance_.py b/scikits/learn/covariance/empirical_covariance_.py
index 80860ef56150d..ff488a9ce85fb 100644
--- a/scikits/learn/covariance/empirical_covariance_.py
+++ b/scikits/learn/covariance/empirical_covariance_.py
@@ -108,7 +108,7 @@ def _set_estimates(self, covariance):
         else:
             self.precision_ = None
 
-    def fit(self, X, assume_centered=False, **params):
+    def fit(self, X, assume_centered=False):
         """ Fits the Maximum Likelihood Estimator covariance model
         according to the given training data and parameters.
 
@@ -130,7 +130,6 @@ def fit(self, X, assume_centered=False, **params):
             Returns self.
 
         """
-        self._set_params(**params)
         covariance = empirical_covariance(X, assume_centered=assume_centered)
         self._set_estimates(covariance)
 
diff --git a/scikits/learn/covariance/shrunk_covariance_.py b/scikits/learn/covariance/shrunk_covariance_.py
index 60f83ab8e5aa4..bd083663bd5c6 100644
--- a/scikits/learn/covariance/shrunk_covariance_.py
+++ b/scikits/learn/covariance/shrunk_covariance_.py
@@ -99,7 +99,7 @@ def __init__(self, store_precision=True, shrinkage=0.1):
         self.store_precision = store_precision
         self.shrinkage = shrinkage
 
-    def fit(self, X, assume_centered=False, **params):
+    def fit(self, X, assume_centered=False):
         """ Fits the shrunk covariance model
         according to the given training data and parameters.
 
@@ -121,7 +121,6 @@ def fit(self, X, assume_centered=False, **params):
             Returns self.
 
         """
-        self._set_params(**params)
         empirical_cov = empirical_covariance(X,
                                              assume_centered=assume_centered)
         covariance = shrunk_covariance(empirical_cov, self.shrinkage)
diff --git a/scikits/learn/decomposition/fastica_.py b/scikits/learn/decomposition/fastica_.py
index 2c13cd33dbc92..cb8746cca14bb 100644
--- a/scikits/learn/decomposition/fastica_.py
+++ b/scikits/learn/decomposition/fastica_.py
@@ -356,8 +356,7 @@ def __init__(self, n_components=None, algorithm='parallel', whiten=True,
         self.tol = tol
         self.w_init = w_init
 
-    def fit(self, X, **params):
-        self._set_params(**params)
+    def fit(self, X):
         whitening_, unmixing_, sources_ = fastica(X, self.n_components,
                         self.algorithm, self.whiten,
                         self.fun, self.fun_prime, self.fun_args, self.max_iter,
diff --git a/scikits/learn/decomposition/kernel_pca.py b/scikits/learn/decomposition/kernel_pca.py
index 3e4de77191bcc..de5f8ba7f7274 100644
--- a/scikits/learn/decomposition/kernel_pca.py
+++ b/scikits/learn/decomposition/kernel_pca.py
@@ -174,7 +174,7 @@ def _fit_inverse_transform(self, X_transformed, X):
         self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True)
         self.X_transformed_fit_ = X_transformed
 
-    def fit(self, X, y=None, **params):
+    def fit(self, X, y=None):
         """Fit the model from data in X.
 
         Parameters
@@ -188,7 +188,6 @@ def fit(self, X, y=None, **params):
         self : object
             Returns the instance itself.
         """
-        self._set_params(**params)
         self._fit_transform(X)
 
         if self.fit_inverse_transform:
diff --git a/scikits/learn/decomposition/nmf.py b/scikits/learn/decomposition/nmf.py
index 7ab69774ef69c..3db28b394263b 100644
--- a/scikits/learn/decomposition/nmf.py
+++ b/scikits/learn/decomposition/nmf.py
@@ -344,7 +344,7 @@ def __init__(self, n_components=None, init="nndsvdar", sparseness=None,
         self.max_iter = max_iter
         self.nls_max_iter = nls_max_iter
 
-    def fit_transform(self, X, y=None, **params):
+    def fit_transform(self, X, y=None):
         """Learn a NMF model for the data X and returns the transformed data.
 
         This is more efficient than calling fit followed by transform.
@@ -360,7 +360,6 @@ def fit_transform(self, X, y=None, **params):
         data: array, [n_samples, n_components]
             Transformed data
         """
-        self._set_params(**params)
         X = np.atleast_2d(X)
         if (X < 0).any():
             raise ValueError("Negative data passed to NMF.fit.")
diff --git a/scikits/learn/decomposition/pca.py b/scikits/learn/decomposition/pca.py
index 81029e5e9c0ca..bd112fcde07d1 100644
--- a/scikits/learn/decomposition/pca.py
+++ b/scikits/learn/decomposition/pca.py
@@ -215,8 +215,7 @@ def fit_transform(self, X, y=None, **params):
 
         return U
 
-    def _fit(self, X, **params):
-        self._set_params(**params)
+    def _fit(self, X):
         X = np.atleast_2d(X)
         n_samples, n_features = X.shape
         if self.copy:
@@ -426,7 +425,7 @@ def __init__(self, n_components, copy=True, iterated_power=3,
         self.whiten = whiten
         self.mean_ = None
 
-    def fit(self, X, y=None, **params):
+    def fit(self, X, y=None):
         """Fit the model to the data X.
 
         Parameters
@@ -440,7 +439,6 @@ def fit(self, X, y=None, **params):
         self : object
             Returns the instance itself.
         """
-        self._set_params(**params)
         if not hasattr(X, 'todense'):
             X = np.atleast_2d(X)
 
diff --git a/scikits/learn/decomposition/sparse_pca.py b/scikits/learn/decomposition/sparse_pca.py
index bad147cff0ad6..f6b02dc1d4ac7 100644
--- a/scikits/learn/decomposition/sparse_pca.py
+++ b/scikits/learn/decomposition/sparse_pca.py
@@ -77,13 +77,14 @@ def _update_code(dictionary, Y, alpha, code=None, Gram=None, method='lars',
         finally:
             np.seterr(**err_mgt)
     elif method == 'cd':
-        clf = Lasso(alpha=alpha, fit_intercept=False, precompute=Gram)
+        clf = Lasso(alpha=alpha, fit_intercept=False, precompute=Gram,
+                    max_iter=1000, tol=tol)
         for k in range(n_features):
             # A huge amount of time is spent in this loop. It needs to be
             # tight.
             if code is not None:
                 clf.coef_ = code[:, k]  # Init with previous value of Vk
-            clf.fit(dictionary, Y[:, k], max_iter=1000, tol=tol)
+            clf.fit(dictionary, Y[:, k])
             new_code[:, k] = clf.coef_
     else:
         raise NotImplemented("Lasso method %s is not implemented." % method)
@@ -601,7 +602,7 @@ def __init__(self, n_components, alpha=1, ridge_alpha=0.01, max_iter=1000,
         self.verbose = verbose
         self.random_state = random_state
 
-    def fit(self, X, y=None, **params):
+    def fit(self, X, y=None):
         """Fit the model from data in X.
 
         Parameters
@@ -615,7 +616,6 @@ def fit(self, X, y=None, **params):
         self : object
             Returns the instance itself.
         """
-        self._set_params(**params)
         self.random_state = check_random_state(self.random_state)
         X = np.asanyarray(X)
         code_init = self.V_init.T if self.V_init is not None else None
@@ -726,7 +726,7 @@ def __init__(self, n_components, alpha=1, ridge_alpha=0.01, n_iter=100,
         self.method = method
         self.random_state = random_state
 
-    def fit(self, X, y=None, **params):
+    def fit(self, X, y=None):
         """Fit the model from data in X.
 
         Parameters
@@ -740,7 +740,6 @@ def fit(self, X, y=None, **params):
         self : object
             Returns the instance itself.
         """
-        self._set_params(**params)
         self.random_state = check_random_state(self.random_state)
         X = np.asanyarray(X)
         Vt, _ = dict_learning_online(X.T, self.n_components, alpha=self.alpha,
diff --git a/scikits/learn/feature_selection/univariate_selection.py b/scikits/learn/feature_selection/univariate_selection.py
index 8306cc651b8e3..3350795ea2f90 100644
--- a/scikits/learn/feature_selection/univariate_selection.py
+++ b/scikits/learn/feature_selection/univariate_selection.py
@@ -228,11 +228,10 @@ def __init__(self, score_func):
                 "was passed." % (score_func, type(score_func)))
         self.score_func = score_func
 
-    def fit(self, X, y, **params):
+    def fit(self, X, y):
         """
         Evaluate the function
         """
-        self._set_params(**params)
         _scores = self.score_func(X, y)
         self._scores = _scores[0]
         self._pvalues = _scores[1]
@@ -245,18 +244,16 @@ def get_support(self, indices=False):
         mask = self._get_support_mask()
         return mask if not indices else np.where(mask)[0]
 
-    def transform(self, X, **params):
+    def transform(self, X):
         """
         Transform a new matrix using the selected features
         """
-        self._set_params(**params)
         return safe_asanyarray(X)[:, self.get_support(indices=issparse(X))]
 
-    def inverse_transform(self, X, **params):
+    def inverse_transform(self, X):
         """
         Transform a new matrix using the selected features
         """
-        self._set_params(**params)
         support_ = self.get_support()
         if X.ndim == 1:
             X = X[None, :]
diff --git a/scikits/learn/lda.py b/scikits/learn/lda.py
index 8d208164e62f2..54c5e19fa8cc9 100644
--- a/scikits/learn/lda.py
+++ b/scikits/learn/lda.py
@@ -66,7 +66,7 @@ def __init__(self, n_components=None, priors=None):
                 print 'warning: the priors do not sum to 1. Renormalizing'
                 self.priors = self.priors / self.priors.sum()
 
-    def fit(self, X, y, store_covariance=False, tol=1.0e-4, **params):
+    def fit(self, X, y, store_covariance=False, tol=1.0e-4):
         """
         Fit the LDA model according to the given training data and parameters.
 
@@ -81,7 +81,6 @@ def fit(self, X, y, store_covariance=False, tol=1.0e-4, **params):
             If True the covariance matrix (shared by all classes) is computed
             and stored in self.covariance_ attribute.
         """
-        self._set_params(**params)
         X = np.asanyarray(X)
         y = np.asanyarray(y)
         if y.dtype.char.lower() not in ('b', 'h', 'i'):
diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index d71701b2c122f..3304ba5ddd328 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -97,7 +97,7 @@ class LinearRegression(LinearModel):
     def __init__(self, fit_intercept=True):
         self.fit_intercept = fit_intercept
 
-    def fit(self, X, y, **params):
+    def fit(self, X, y):
         """
         Fit linear model.
 
@@ -116,7 +116,6 @@ def fit(self, X, y, **params):
         -------
         self : returns an instance of self.
         """
-        self._set_params(**params)
         X = np.asanyarray(X)
         y = np.asanyarray(y)
 
@@ -299,7 +298,7 @@ def _set_class_weight(self, class_weight, classes, y):
         self.class_weight = weight
 
     def fit(self, X, y, coef_init=None, intercept_init=None,
-            class_weight=None, sample_weight=None, **params):
+            class_weight=None, sample_weight=None):
         """Fit linear model with Stochastic Gradient Descent.
 
         Parameters
@@ -330,7 +329,6 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
         -------
         self : returns an instance of self.
         """
-        self._set_params(**params)
 
         # check only y because X might be dense or sparse
         y = np.asanyarray(y, dtype=np.float64, order='C')
@@ -443,7 +441,7 @@ def _set_loss_function(self, loss):
             raise ValueError("The loss %s is not supported. " % loss)
 
     def fit(self, X, y, coef_init=None, intercept_init=None,
-            sample_weight=None, **params):
+            sample_weight=None):
         """Fit linear model with Stochastic Gradient Descent.
 
         Parameters
@@ -467,7 +465,6 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
         -------
         self : returns an instance of self.
         """
-        self._set_params(**params)
         y = np.asanyarray(y, dtype=np.float64, order="C")
 
         # make sure X has shape
diff --git a/scikits/learn/linear_model/bayes.py b/scikits/learn/linear_model/bayes.py
index 0fd1424ff979c..1e0206399690b 100644
--- a/scikits/learn/linear_model/bayes.py
+++ b/scikits/learn/linear_model/bayes.py
@@ -115,7 +115,7 @@ def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
         self.fit_intercept = fit_intercept
         self.verbose = verbose
 
-    def fit(self, X, y, **params):
+    def fit(self, X, y):
         """Fit the model
 
         Parameters
@@ -129,7 +129,6 @@ def fit(self, X, y, **params):
         -------
         self : returns an instance of self.
         """
-        self._set_params(**params)
         X = np.asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
         X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
@@ -325,7 +324,7 @@ def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
         self.threshold_lambda = threshold_lambda
         self.verbose = verbose
 
-    def fit(self, X, y, **params):
+    def fit(self, X, y):
         """Fit the ARDRegression model according to the given training data
         and parameters.
 
@@ -343,7 +342,6 @@ def fit(self, X, y, **params):
         -------
         self : returns an instance of self.
         """
-        self._set_params(**params)
 
         X = np.asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
diff --git a/scikits/learn/linear_model/coordinate_descent.py b/scikits/learn/linear_model/coordinate_descent.py
index 55af2b422acd1..0a09d1c1c0f00 100644
--- a/scikits/learn/linear_model/coordinate_descent.py
+++ b/scikits/learn/linear_model/coordinate_descent.py
@@ -83,7 +83,7 @@ def __init__(self, alpha=1.0, rho=0.5, fit_intercept=True,
         self.max_iter = max_iter
         self.tol = tol
 
-    def fit(self, X, y, Xy=None, coef_init=None, **params):
+    def fit(self, X, y, Xy=None, coef_init=None):
         """Fit Elastic Net model with coordinate descent
 
         Parameters
@@ -108,7 +108,6 @@ def fit(self, X, y, Xy=None, coef_init=None, **params):
         To avoid memory re-allocation it is advised to allocate the
         initial data in memory directly using that format.
         """
-        self._set_params(**params)
         X = np.asanyarray(X, dtype=np.float64)
         y = np.asanyarray(y, dtype=np.float64)
 
@@ -326,17 +325,19 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
         alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered
     coef_ = None  # init coef_
     models = []
+    Xy = None
 
     if not 'precompute' in fit_params \
         or fit_params['precompute'] is True \
         or (fit_intercept and hasattr(fit_params['precompute'], '__array__')):
         fit_params['precompute'] = np.dot(X.T, X)
         if not 'Xy' in fit_params or fit_params['Xy'] is None:
-            fit_params['Xy'] = np.dot(X.T, y)
+            Xy = np.dot(X.T, y)
 
     for alpha in alphas:
         model = ElasticNet(alpha=alpha, rho=rho, fit_intercept=False)
-        model.fit(X, y, coef_init=coef_, **fit_params)
+        model.set_params(**fit_params)
+        model.fit(X, y, coef_init=coef_, Xy=Xy)
         if fit_intercept:
             model.fit_intercept = True
             model._set_intercept(Xmean, ymean)
@@ -362,7 +363,7 @@ def __init__(self, eps=1e-3, n_alphas=100, alphas=None,
         self.tol = tol
         self.cv = cv
 
-    def fit(self, X, y, **fit_params):
+    def fit(self, X, y):
         """Fit linear model with coordinate descent along decreasing alphas
         using cross-validation
 
@@ -380,7 +381,6 @@ def fit(self, X, y, **fit_params):
             keyword arguments passed to the Lasso fit method
 
         """
-        self._set_params(**fit_params)
         X = np.asfortranarray(X, dtype=np.float64)
         y = np.asanyarray(y, dtype=np.float64)
 
diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index 098203a877ba7..da2b7232c243a 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -390,7 +390,7 @@ def _get_gram(self):
             Gram = None
         return Gram
 
-    def fit(self, X, y, overwrite_X=False, **params):
+    def fit(self, X, y, overwrite_X=False):
         """Fit the model using X, y as training data.
 
         parameters
@@ -406,7 +406,6 @@ def fit(self, X, y, overwrite_X=False, **params):
         self : object
             returns an instance of self.
         """
-        self._set_params(**params)
 
         X = np.atleast_2d(X)
         y = np.atleast_1d(y)
@@ -684,7 +683,7 @@ def __init__(self, fit_intercept=True, verbose=False, max_iter=500,
         self.n_jobs = n_jobs
         self.eps = eps
 
-    def fit(self, X, y, **params):
+    def fit(self, X, y):
         """Fit the model using X, y as training data.
 
         Parameters
@@ -700,7 +699,6 @@ def fit(self, X, y, **params):
         self : object
             returns an instance of self.
         """
-        self._set_params(**params)
         X = np.asanyarray(X)
 
         n_samples, n_features = X.shape
@@ -917,7 +915,7 @@ def __init__(self, criterion='aic', fit_intercept=True, verbose=False,
         self.precompute = precompute
         self.eps = eps
 
-    def fit(self, X, y, overwrite_X=False, **params):
+    def fit(self, X, y, overwrite_X=False):
         """Fit the model using X, y as training data.
 
         parameters
@@ -933,8 +931,6 @@ def fit(self, X, y, overwrite_X=False, **params):
         self : object
             returns an instance of self.
         """
-        self._set_params(**params)
-
         X = np.atleast_2d(X)
         y = np.atleast_1d(y)
 
diff --git a/scikits/learn/linear_model/omp.py b/scikits/learn/linear_model/omp.py
index a9afba929ed3e..d07344672a410 100644
--- a/scikits/learn/linear_model/omp.py
+++ b/scikits/learn/linear_model/omp.py
@@ -455,7 +455,7 @@ def __init__(self, n_nonzero_coefs=None, eps=None, fit_intercept=True,
         self.precompute_gram = precompute_gram
 
     def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
-            overwrite_gram=False, overwrite_xy=False, **params):
+            overwrite_gram=False, overwrite_xy=False):
         """Fit the model using X, y as training data.
 
         Parameters
@@ -492,8 +492,6 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
         self: object
             returns an instance of self.
         """
-        self._set_params(**params)
-
         X = np.atleast_2d(X)
         y = np.atleast_1d(y)
 
diff --git a/scikits/learn/linear_model/ridge.py b/scikits/learn/linear_model/ridge.py
index d295a412b9c5a..1889c089d0f05 100644
--- a/scikits/learn/linear_model/ridge.py
+++ b/scikits/learn/linear_model/ridge.py
@@ -164,7 +164,7 @@ def __init__(self, alpha=1.0, fit_intercept=True, tol=1e-3):
         self.fit_intercept = fit_intercept
         self.tol = tol
 
-    def fit(self, X, y, sample_weight=1.0, solver='auto', **params):
+    def fit(self, X, y, sample_weight=1.0, solver='auto'):
         """
         Fit Ridge regression model
 
@@ -191,8 +191,6 @@ def fit(self, X, y, sample_weight=1.0, solver='auto', **params):
         -------
         self : returns an instance of self.
         """
-        self._set_params(**params)
-
         X = safe_asanyarray(X, dtype=np.float)
         y = np.asanyarray(y, dtype=np.float)
 
@@ -463,7 +461,7 @@ def __init__(self, alphas=np.array([0.1, 1.0, 10.0]), fit_intercept=True,
         self.loss_func = loss_func
         self.cv = cv
 
-    def fit(self, X, y, sample_weight=1.0, **params):
+    def fit(self, X, y, sample_weight=1.0):
         """Fit Ridge regression model
 
         Parameters
@@ -485,8 +483,6 @@ def fit(self, X, y, sample_weight=1.0, **params):
         -------
         self : Returns self.
         """
-        self._set_params(**params)
-
         if self.cv is None:
             estimator = _RidgeGCV(self.alphas, self.fit_intercept,
                                   self.score_func, self.loss_func)
@@ -512,7 +508,7 @@ def fit(self, X, y, sample_weight=1.0, **params):
 
 class RidgeClassifierCV(RidgeCV):
 
-    def fit(self, X, y, sample_weight=1.0, class_weight=None, **params):
+    def fit(self, X, y, sample_weight=1.0, class_weight=None):
         """
         Fit the ridge classifier.
 
@@ -538,15 +534,13 @@ def fit(self, X, y, sample_weight=1.0, class_weight=None, **params):
         self : object
             Returns self.
         """
-        self._set_params(**params)
         if class_weight is None:
             class_weight = {}
         sample_weight2 = np.array([class_weight.get(k, 1.0) for k in y])
         self.label_binarizer = LabelBinarizer()
         Y = self.label_binarizer.fit_transform(y)
         RidgeCV.fit(self, X, Y,
-                    sample_weight=sample_weight * sample_weight2,
-                    cv=self.cv)
+                    sample_weight=sample_weight * sample_weight2)
         return self
 
     def decision_function(self, X):
diff --git a/scikits/learn/linear_model/sparse/coordinate_descent.py b/scikits/learn/linear_model/sparse/coordinate_descent.py
index 38ef4245f6be7..9beeaeace03f8 100644
--- a/scikits/learn/linear_model/sparse/coordinate_descent.py
+++ b/scikits/learn/linear_model/sparse/coordinate_descent.py
@@ -41,13 +41,16 @@ class ElasticNet(LinearModel):
     while alpha corresponds to the lambda parameter in glmnet.
     """
 
-    def __init__(self, alpha=1.0, rho=0.5, fit_intercept=False):
+    def __init__(self, alpha=1.0, rho=0.5, fit_intercept=False,
+                 max_iter=1000, tol=1e-4):
         if fit_intercept:
             raise NotImplementedError("fit_intercept=True is not implemented")
         self.alpha = alpha
         self.rho = rho
         self.fit_intercept = fit_intercept
         self.intercept_ = 0.0
+        self.max_iter = max_iter
+        self.tol = tol
         self._set_coef(None)
 
     def _set_coef(self, coef_):
@@ -58,13 +61,12 @@ def _set_coef(self, coef_):
             # sparse representation of the fitted coef for the predict method
             self.sparse_coef_ = sparse.csr_matrix(coef_)
 
-    def fit(self, X, y, max_iter=1000, tol=1e-4, **params):
+    def fit(self, X, y):
         """Fit current model with coordinate descent
 
         X is expected to be a sparse matrix. For maximum efficiency, use a
         sparse matrix in CSC format (scipy.sparse.csc_matrix)
         """
-        self._set_params(**params)
         X = sparse.csc_matrix(X)
         y = np.asanyarray(y, dtype=np.float64)
 
@@ -83,7 +85,7 @@ def fit(self, X, y, max_iter=1000, tol=1e-4, **params):
         coef_, self.dual_gap_, self.eps_ = \
                 cd_fast_sparse.enet_coordinate_descent(
                     self.coef_, alpha, beta, X_data, X.indices, X.indptr, y,
-                    max_iter, tol)
+                    self.max_iter, self.tol)
 
         # update self.coef_ and self.sparse_coef_ consistently
         self._set_coef(coef_)
@@ -133,6 +135,7 @@ class Lasso(ElasticNet):
 
     """
 
-    def __init__(self, alpha=1.0, fit_intercept=False):
-        super(Lasso, self).__init__(alpha=alpha, rho=1.0,
-                                    fit_intercept=fit_intercept)
+    def __init__(self, alpha=1.0, fit_intercept=False, max_iter=1000, tol=1e-4):
+        super(Lasso, self).__init__(
+            alpha=alpha, rho=1.0, fit_intercept=fit_intercept, max_iter=max_iter,
+            tol=tol)
diff --git a/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py b/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py
index 3abecce590af6..fa53a0ca8183a 100644
--- a/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py
+++ b/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py
@@ -50,8 +50,8 @@ def test_enet_toy_list_input():
     assert_array_almost_equal(pred, [2, 3, 4])
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = SparseENet(alpha=0.5, rho=0.3)
-    clf.fit(X, Y, max_iter=1000)
+    clf = SparseENet(alpha=0.5, rho=0.3, max_iter=1000)
+    clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
     assert_array_almost_equal(pred, [1.0163,  1.5245,  2.0327], decimal=3)
@@ -89,8 +89,8 @@ def test_enet_toy_explicit_sparse_input():
     assert_array_almost_equal(pred, [2, 3, 4])
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = SparseENet(alpha=0.5, rho=0.3)
-    clf.fit(X, Y, max_iter=1000)
+    clf = SparseENet(alpha=0.5, rho=0.3, max_iter=1000)
+    clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
     assert_array_almost_equal(pred, [1.0163,  1.5245,  2.0327], decimal=3)
@@ -132,14 +132,16 @@ def test_sparse_enet_not_as_toy_dataset():
     X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
     y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]
 
-    s_clf = SparseENet(alpha=0.1, rho=0.8, fit_intercept=False)
-    s_clf.fit(X_train, y_train, max_iter=max_iter, tol=1e-7)
+    s_clf = SparseENet(alpha=0.1, rho=0.8, fit_intercept=False,
+                       max_iter=max_iter, tol=1e-7)
+    s_clf.fit(X_train, y_train)
     assert_almost_equal(s_clf.dual_gap_, 0, 4)
     assert s_clf.score(X_test, y_test) > 0.85
 
     # check the convergence is the same as the dense version
-    d_clf = DenseENet(alpha=0.1, rho=0.8, fit_intercept=False)
-    d_clf.fit(X_train, y_train, max_iter=max_iter, tol=1e-7)
+    d_clf = DenseENet(alpha=0.1, rho=0.8, fit_intercept=False,
+                      max_iter=max_iter, tol=1e-7)
+    d_clf.fit(X_train, y_train)
     assert_almost_equal(d_clf.dual_gap_, 0, 4)
     assert d_clf.score(X_test, y_test) > 0.85
 
@@ -158,14 +160,15 @@ def test_sparse_lasso_not_as_toy_dataset():
     X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
     y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]
 
-    s_clf = SparseLasso(alpha=0.1, fit_intercept=False)
-    s_clf.fit(X_train, y_train, max_iter=max_iter, tol=1e-7)
+    s_clf = SparseLasso(alpha=0.1, fit_intercept=False,
+                        max_iter=max_iter, tol=1e-7)
+    s_clf.fit(X_train, y_train)
     assert_almost_equal(s_clf.dual_gap_, 0, 4)
     assert s_clf.score(X_test, y_test) > 0.85
 
     # check the convergence is the same as the dense version
-    d_clf = DenseLasso(alpha=0.1, fit_intercept=False)
-    d_clf.fit(X_train, y_train, max_iter=max_iter, tol=1e-7)
+    d_clf = DenseLasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
+    d_clf.fit(X_train, y_train)
     assert_almost_equal(d_clf.dual_gap_, 0, 4)
     assert d_clf.score(X_test, y_test) > 0.85
 
diff --git a/scikits/learn/linear_model/tests/test_coordinate_descent.py b/scikits/learn/linear_model/tests/test_coordinate_descent.py
index 34c92fc4e7d78..01ab83a6c48dc 100644
--- a/scikits/learn/linear_model/tests/test_coordinate_descent.py
+++ b/scikits/learn/linear_model/tests/test_coordinate_descent.py
@@ -83,20 +83,23 @@ def test_enet_toy():
     assert_array_almost_equal(pred, [2, 3, 4])
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf = ElasticNet(alpha=0.5, rho=0.3)
-    clf.fit(X, Y, max_iter=1000, precompute=False)
+    clf = ElasticNet(alpha=0.5, rho=0.3, max_iter=1000,
+                     precompute=False)
+    clf.fit(X, Y)
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
     assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf.fit(X, Y, max_iter=1000, precompute=True)  # with Gram
+    clf.set_params(max_iter=1000, precompute=True)
+    clf.fit(X, Y)  # with Gram
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
     assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
     assert_almost_equal(clf.dual_gap_, 0)
 
-    clf.fit(X, Y, max_iter=1000, precompute=np.dot(X.T, X))  # with Gram
+    clf.set_params(max_iter=1000, precompute=np.dot(X.T, X))
+    clf.fit(X, Y)  # with Gram
     pred = clf.predict(T)
     assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
     assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
@@ -121,11 +124,11 @@ def test_lasso_path():
     X = random_state.randn(n_samples, n_features)
     y = np.dot(X, w)
 
-    clf = LassoCV(n_alphas=100, eps=1e-3).fit(X, y, max_iter=max_iter)
+    clf = LassoCV(n_alphas=100, eps=1e-3, max_iter=max_iter).fit(X, y)
     assert_almost_equal(clf.alpha, 0.011, 2)
 
-    clf = LassoCV(n_alphas=100, eps=1e-3)
-    clf.fit(X, y, max_iter=max_iter, precompute=True)
+    clf = LassoCV(n_alphas=100, eps=1e-3, max_iter=max_iter, precompute=True)
+    clf.fit(X, y)
     assert_almost_equal(clf.alpha, 0.011, 2)
 
     # test set
@@ -145,12 +148,13 @@ def test_enet_path():
     X = random_state.randn(n_samples, n_features)
     y = np.dot(X, w)
 
-    clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95, cv=5)
-    clf.fit(X, y, max_iter=max_iter)
+    clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95, cv=5, max_iter=max_iter)
+    clf.fit(X, y)
     assert_almost_equal(clf.alpha, 0.00779, 2)
 
-    clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95, cv=5)
-    clf.fit(X, y, max_iter=max_iter, precompute=True)
+    clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95, cv=5,
+                       max_iter=max_iter, precompute=True)
+    clf.fit(X, y)
     assert_almost_equal(clf.alpha, 0.00779, 2)
 
     # test set
@@ -170,8 +174,9 @@ def test_path_parameters():
     X = random_state.randn(n_samples, n_features)
     y = np.dot(X, w)
 
-    clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95)
-    clf.fit(X, y, max_iter=max_iter, rho=0.5, n_alphas=50)  # new params
+    clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter,
+                       rho=0.5)
+    clf.fit(X, y)  # new params
     assert_almost_equal(0.5, clf.rho)
     assert_equal(50, clf.n_alphas)
     assert_equal(50, len(clf.alphas))
diff --git a/scikits/learn/linear_model/tests/test_least_angle.py b/scikits/learn/linear_model/tests/test_least_angle.py
index 7e84ce37ea56f..8f048075a813d 100644
--- a/scikits/learn/linear_model/tests/test_least_angle.py
+++ b/scikits/learn/linear_model/tests/test_least_angle.py
@@ -104,17 +104,17 @@ def test_lasso_lars_vs_lasso_cd(verbose=False):
     X = 3 * diabetes.data
 
     alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
-    lasso_cd = linear_model.Lasso(fit_intercept=False)
+    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
     for (c, a) in zip(lasso_path.T, alphas):
         lasso_cd.alpha = a
-        lasso_cd.fit(X, y, tol=1e-8)
+        lasso_cd.fit(X, y)
         error = np.linalg.norm(c - lasso_cd.coef_)
         assert error < 0.01
 
     # similar test, with the classifiers
     for alpha in np.linspace(1e-2, 1 - 1e-2):
         clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y)
-        clf2 = linear_model.Lasso(alpha=alpha).fit(X, y, tol=1e-8)
+        clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8).fit(X, y)
         err = np.linalg.norm(clf1.coef_ - clf2.coef_)
         assert err < 1e-3
 
@@ -129,9 +129,9 @@ def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
     for alphas_min in alphas_min:
         alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                     alpha_min=0.9)
-        lasso_cd = linear_model.Lasso(fit_intercept=False)
+        lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
         lasso_cd.alpha = alphas[-1]
-        lasso_cd.fit(X, y, tol=1e-8)
+        lasso_cd.fit(X, y)
         error = np.linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
         assert error < 0.01
 
diff --git a/scikits/learn/linear_model/tests/test_ridge.py b/scikits/learn/linear_model/tests/test_ridge.py
index 1c229308cc04d..d5ca7fb31b849 100644
--- a/scikits/learn/linear_model/tests/test_ridge.py
+++ b/scikits/learn/linear_model/tests/test_ridge.py
@@ -101,15 +101,15 @@ def test_ridge_vs_lstsq():
     y = np.random.randn(n_samples)
     X = np.random.randn(n_samples, n_features)
 
-    ridge = Ridge(alpha=0.)
-    ols = LinearRegression()
+    ridge = Ridge(alpha=0., fit_intercept=False)
+    ols = LinearRegression(fit_intercept=False)
 
     ridge.fit(X, y)
     ols.fit (X, y)
     assert_almost_equal(ridge.coef_, ols.coef_)
 
-    ridge.fit(X, y, fit_intercept=False)
-    ols.fit (X, y, fit_intercept=False)
+    ridge.fit(X, y)
+    ols.fit (X, y)
     assert_almost_equal(ridge.coef_, ols.coef_)
 
 def _test_ridge_loo(filter_):
@@ -182,7 +182,8 @@ def _test_ridge_cv(filter_):
     assert_equal(type(ridge_cv.intercept_), np.float64)
 
     cv = KFold(n_samples, 5)
-    ridge_cv.fit(filter_(X_diabetes), y_diabetes, cv=cv)
+    ridge_cv.set_params(cv=cv)
+    ridge_cv.fit(filter_(X_diabetes), y_diabetes)
     ridge_cv.predict(filter_(X_diabetes))
 
     assert_equal(len(ridge_cv.coef_.shape), 1)
@@ -216,10 +217,10 @@ def _test_ridge_classifiers(filter_):
         y_pred = clf.predict(filter_(X_iris))
         assert np.mean(y_iris == y_pred) >= 0.8
 
-    clf = RidgeClassifierCV()
     n_samples = X_iris.shape[0]
     cv = KFold(n_samples, 5)
-    clf.fit(filter_(X_iris), y_iris, cv=cv)
+    clf = RidgeClassifierCV(cv=cv)
+    clf.fit(filter_(X_iris), y_iris)
     y_pred = clf.predict(filter_(X_iris))
     assert np.mean(y_iris == y_pred) >= 0.8
 
diff --git a/scikits/learn/manifold/isomap.py b/scikits/learn/manifold/isomap.py
index df7b298b4eb5b..c78fed1707512 100644
--- a/scikits/learn/manifold/isomap.py
+++ b/scikits/learn/manifold/isomap.py
@@ -122,7 +122,7 @@ def reconstruction_error(self):
         evals = self.kernel_pca_.lambdas_
         return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]
 
-    def fit(self, X, y=None, **params):
+    def fit(self, X, y=None):
         """Compute the embedding vectors for data X
 
         Parameters
@@ -134,11 +134,10 @@ def fit(self, X, y=None, **params):
         -------
         self : returns an instance of self.
         """
-        self._set_params(**params)
         self._fit_transform(X)
         return self
 
-    def fit_transform(self, X, y=None, **params):
+    def fit_transform(self, X, y=None):
         """Fit the model from data in X and transform X.
 
         Parameters
@@ -151,7 +150,6 @@ def fit_transform(self, X, y=None, **params):
         -------
         X_new: array-like, shape (n_samples, out_dim)
         """
-        self._set_params(**params)
         self._fit_transform(X)
         return self.embedding_
 
diff --git a/scikits/learn/manifold/locally_linear.py b/scikits/learn/manifold/locally_linear.py
index c12e7ed49c190..8daa86a1786a2 100644
--- a/scikits/learn/manifold/locally_linear.py
+++ b/scikits/learn/manifold/locally_linear.py
@@ -458,7 +458,7 @@ def _fit_transform(self, X):
                 max_iter=self.max_iter, method=self.method,
                 hessian_tol=self.hessian_tol, modified_tol=self.modified_tol)
 
-    def fit(self, X, y=None, **params):
+    def fit(self, X, y=None):
         """Compute the embedding vectors for data X
 
         Parameters
@@ -470,11 +470,10 @@ def fit(self, X, y=None, **params):
         -------
         self : returns an instance of self.
         """
-        self._set_params(**params)
         self._fit_transform(X)
         return self
 
-    def fit_transform(self, X, y=None, **params):
+    def fit_transform(self, X, y=None):
         """Compute the embedding vectors for data X and transform X.
 
         Parameters
@@ -486,11 +485,10 @@ def fit_transform(self, X, y=None, **params):
         -------
         X_new: array-like, shape (n_samples, out_dim)
         """
-        self._set_params(**params)
         self._fit_transform(X)
         return self.embedding_
 
-    def transform(self, X, **params):
+    def transform(self, X):
         """
         Transform new points into embedding space.
 
@@ -507,7 +505,6 @@ def transform(self, X, **params):
         Because of scaling performed by this method, it is discouraged to use
         it together with methods that are not scale-invariant (like SVMs)
         """
-        self._set_params(**params)
         X = np.atleast_2d(X)
         if not hasattr(self, 'ball_tree_'):
             raise ValueError('The model is not fitted')
diff --git a/scikits/learn/manifold/tests/test_locally_linear.py b/scikits/learn/manifold/tests/test_locally_linear.py
index 25bff361dad3e..01007484d72e8 100644
--- a/scikits/learn/manifold/tests/test_locally_linear.py
+++ b/scikits/learn/manifold/tests/test_locally_linear.py
@@ -31,7 +31,8 @@ def test_lle_simple_grid():
     assert_lower(reconstruction_error, tol)
 
     for solver in eigen_solvers:
-        clf.fit(X, eigen_solver=solver)
+        clf.set_params(eigen_solver=solver)
+        clf.fit(X)
         assert clf.embedding_.shape[1] == out_dim
         reconstruction_error = np.linalg.norm(
             np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
@@ -53,7 +54,7 @@ def test_lle_manifold():
     X = np.c_[X, X[:, 0] ** 2 / 20]
     out_dim = 2
     clf = manifold.LocallyLinearEmbedding(n_neighbors=5, out_dim=out_dim)
-    tol = .5
+    tol = 1.5
 
     N = neighbors.kneighbors_graph(X, clf.n_neighbors,
                                    mode='barycenter').toarray()
@@ -61,7 +62,8 @@ def test_lle_manifold():
     assert_lower(reconstruction_error, tol)
 
     for solver in eigen_solvers:
-        clf.fit(X, eigen_solver=solver)
+        clf.set_params(eigen_solver=solver)        
+        clf.fit(X)
         assert clf.embedding_.shape[1] == out_dim
         reconstruction_error = np.linalg.norm(
             np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
diff --git a/scikits/learn/neighbors.py b/scikits/learn/neighbors.py
index 04361fbb98e01..616acacf18e8a 100644
--- a/scikits/learn/neighbors.py
+++ b/scikits/learn/neighbors.py
@@ -111,7 +111,7 @@ def __init__(self, n_neighbors=5, algorithm='auto', leaf_size=20):
         self.leaf_size = leaf_size
         self.algorithm = algorithm
 
-    def fit(self, X, y, **params):
+    def fit(self, X, y):
         """Fit the model using X, y as training data
 
         Parameters
@@ -122,14 +122,11 @@ def fit(self, X, y, **params):
         y : {array-like, sparse matrix}, shape = [n_samples]
             Target values, array of integer values.
 
-        params : list of keyword, optional
-            Overwrite keywords from __init__
         """
         X = safe_asanyarray(X)
         if y is None:
             raise ValueError("y must not be None")
         self._y = np.asanyarray(y)
-        self._set_params(**params)
 
         if issparse(X):
             self.ball_tree = None
@@ -142,7 +139,7 @@ def fit(self, X, y, **params):
             self._fit_X = X
         return self
 
-    def kneighbors(self, X, return_distance=True, **params):
+    def kneighbors(self, X, n_neighbors=None, return_distance=True):
         """Finds the K-neighbors of a point.
 
         Returns distance
@@ -193,21 +190,22 @@ class from an array representing our data set and ask who's
                [2]]...)
 
         """
-        self._set_params(**params)
         X = atleast2d_or_csr(X)
+        if n_neighbors is None:
+            n_neighbors = self.n_neighbors
         if self.ball_tree is None:
             dist = euclidean_distances(X, self._fit_X, squared=True)
             # XXX: should be implemented with a partial sort
-            neigh_ind = dist.argsort(axis=1)[:, :self.n_neighbors]
+            neigh_ind = dist.argsort(axis=1)[:, :n_neighbors]
             if not return_distance:
                 return neigh_ind
             else:
                 return dist.T[neigh_ind], neigh_ind
         else:
-            return self.ball_tree.query(X, self.n_neighbors,
+            return self.ball_tree.query(X, n_neighbors,
                                         return_distance=return_distance)
 
-    def predict(self, X, **params):
+    def predict(self, X):
         """Predict the class labels for the provided data
 
         Parameters
@@ -225,7 +223,6 @@ def predict(self, X, **params):
             List of class labels (one for each data sample).
         """
         X = atleast2d_or_csr(X)
-        self._set_params(**params)
 
         # get neighbors
         neigh_ind = self.kneighbors(X, return_distance=False)
@@ -295,7 +292,7 @@ def __init__(self, n_neighbors=5, mode='mean', algorithm='auto',
         self.mode = mode
         self.algorithm = algorithm
 
-    def predict(self, X, **params):
+    def predict(self, X):
         """Predict the target for the provided data
 
         Parameters
@@ -313,7 +310,6 @@ def predict(self, X, **params):
             List of target values (one for each data sample).
         """
         X = atleast2d_or_csr(X)
-        self._set_params(**params)
 
         # compute nearest neighbors
         neigh_ind = self.kneighbors(X, return_distance=False)
diff --git a/scikits/learn/pls.py b/scikits/learn/pls.py
index 126918ed1b8ae..ab7602ed5a062 100644
--- a/scikits/learn/pls.py
+++ b/scikits/learn/pls.py
@@ -201,8 +201,7 @@ def __init__(self, n_components=2, deflation_mode="canonical", mode="A",
         self.tol = tol
         self.copy = copy
 
-    def fit(self, X, Y, **params):
-        self._set_params(**params)
+    def fit(self, X, Y):
         # copy since this will contains the residuals (deflated) matrices
         if self.copy:
             X = np.asanyarray(X, dtype=np.float).copy()
@@ -472,8 +471,8 @@ class PLSRegression(_PLS):
     >>> from scikits.learn.pls import PLSCanonical, PLSRegression, CCA
     >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
     >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
-    >>> pls2 = PLSRegression()
-    >>> pls2.fit(X, Y, n_components=2)
+    >>> pls2 = PLSRegression(n_components=2)
+    >>> pls2.fit(X, Y)
     PLSRegression(scale=True, algorithm='nipals', max_iter=500, n_components=2,
            tol=1e-06, copy=True)
     >>> Y_pred = pls2.predict(X)
@@ -577,8 +576,8 @@ class PLSCanonical(_PLS):
     >>> from scikits.learn.pls import PLSCanonical, PLSRegression, CCA
     >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
     >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
-    >>> plsca = PLSCanonical()
-    >>> plsca.fit(X, Y, n_components=2)
+    >>> plsca = PLSCanonical(n_components=2)
+    >>> plsca.fit(X, Y)
     PLSCanonical(scale=True, algorithm='nipals', max_iter=500, n_components=2,
            tol=1e-06, copy=True)
     >>> X_c, Y_c = plsca.transform(X, Y)
@@ -685,8 +684,8 @@ class CCA(_PLS):
     >>> from scikits.learn.pls import PLSCanonical, PLSRegression, CCA
     >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
     >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
-    >>> cca = CCA()
-    >>> cca.fit(X, Y, n_components=1)
+    >>> cca = CCA(n_components=1)
+    >>> cca.fit(X, Y)
     CCA(scale=True, algorithm='nipals', max_iter=500, n_components=1, tol=1e-06,
       copy=True)
     >>> X_c, Y_c = cca.transform(X, Y)
@@ -762,8 +761,7 @@ def __init__(self, n_components=2, scale=True, copy=True):
         self.n_components = n_components
         self.scale = scale
 
-    def fit(self, X, Y, **params):
-        self._set_params(**params)
+    def fit(self, X, Y):
         # copy since this will contains the centered data
         if self.copy:
             X = np.asanyarray(X).copy()
diff --git a/scikits/learn/qda.py b/scikits/learn/qda.py
index e709d3628b8bd..b275410dc50a0 100644
--- a/scikits/learn/qda.py
+++ b/scikits/learn/qda.py
@@ -63,7 +63,7 @@ class QDA(BaseEstimator, ClassifierMixin):
     def __init__(self, priors=None):
         self.priors = np.asarray(priors) if priors is not None else None
 
-    def fit(self, X, y, store_covariances=False, tol=1.0e-4, **params):
+    def fit(self, X, y, store_covariances=False, tol=1.0e-4):
         """
         Fit the QDA model according to the given training data and parameters.
 
@@ -78,7 +78,6 @@ def fit(self, X, y, store_covariances=False, tol=1.0e-4, **params):
             If True the covariance matrices are computed and stored in the
             self.covariances_ attribute.
         """
-        self._set_params(**params)
         X = np.asanyarray(X)
         y = np.asanyarray(y)
         if X.ndim != 2:
diff --git a/scikits/learn/svm/base.py b/scikits/learn/svm/base.py
index 67883ac94000a..ed9a1d259194b 100644
--- a/scikits/learn/svm/base.py
+++ b/scikits/learn/svm/base.py
@@ -71,8 +71,7 @@ def _compute_kernel(self, X):
                                dtype=np.float64, order='C')
         return X
 
-    def fit(self, X, y, class_weight=None, sample_weight=None, cache_size=100.,
-            **params):
+    def fit(self, X, y, class_weight=None, sample_weight=None, cache_size=100.):
         """
         Fit the SVM model according to the given training data and
         parameters.
@@ -111,7 +110,6 @@ class frequencies.
         copied.
 
         """
-        self._set_params(**params)
 
         X = np.asanyarray(X, dtype=np.float64, order='C')
         y = np.asanyarray(y, dtype=np.float64, order='C')
@@ -349,7 +347,7 @@ def _get_solver_type(self):
                              + solver_type)
         return self._solver_type_dict[solver_type]
 
-    def fit(self, X, y, class_weight=None, **params):
+    def fit(self, X, y, class_weight=None):
         """
         Fit the model according to the given training data and
         parameters.
@@ -372,7 +370,6 @@ def fit(self, X, y, class_weight=None, **params):
         self : object
             Returns self.
         """
-        self._set_params(**params)
 
         self.class_weight, self.class_weight_label = \
                      _get_class_weight(class_weight, y)
diff --git a/scikits/learn/svm/sparse/base.py b/scikits/learn/svm/sparse/base.py
index 6bc983719a2cd..df8a822863d73 100644
--- a/scikits/learn/svm/sparse/base.py
+++ b/scikits/learn/svm/sparse/base.py
@@ -47,8 +47,7 @@ def __init__(self, impl, kernel, degree, gamma, coef0,
         # only used in classification
         self.n_support_ = np.empty(0, dtype=np.int32, order='C')
 
-    def fit(self, X, y, class_weight=None, sample_weight=[], cache_size=100.,
-            **params):
+    def fit(self, X, y, class_weight=None, sample_weight=[], cache_size=100.):
         """
         Fit the SVM model according to the given training data and
         parameters.
@@ -84,7 +83,6 @@ def fit(self, X, y, class_weight=None, sample_weight=[], cache_size=100.,
         For maximum effiency, use a sparse matrix in csr format
         (scipy.sparse.csr_matrix)
         """
-        self._set_params(**params)
 
         import scipy.sparse
         X = scipy.sparse.csr_matrix(X)
@@ -222,7 +220,7 @@ def predict_proba(self, X):
 
 class SparseBaseLibLinear(BaseLibLinear):
 
-    def fit(self, X, y, class_weight=None, **params):
+    def fit(self, X, y, class_weight=None):
         """
         Fit the model using X, y as training data.
 
@@ -239,7 +237,6 @@ def fit(self, X, y, class_weight=None, **params):
         self : object
             Returns an instance of self.
         """
-        self._set_params(**params)
 
         import scipy.sparse
         X = scipy.sparse.csr_matrix(X)
diff --git a/scikits/learn/tests/test_neighbors.py b/scikits/learn/tests/test_neighbors.py
index 14567e4dac5ad..66a459ab95757 100644
--- a/scikits/learn/tests/test_neighbors.py
+++ b/scikits/learn/tests/test_neighbors.py
@@ -101,16 +101,17 @@ def test_neighbors_iris():
     """
 
     for s in ('auto', 'ball_tree', 'brute', 'inplace'):
-        clf = neighbors.NeighborsClassifier()
-        clf.fit(iris.data, iris.target, n_neighbors=1, algorithm=s)
+        clf = neighbors.NeighborsClassifier(n_neighbors=1, algorithm=s)
+        clf.fit(iris.data, iris.target)
         assert_array_equal(clf.predict(iris.data), iris.target)
 
-        clf.fit(iris.data, iris.target, n_neighbors=9, algorithm=s)
+        clf.set_params(n_neighbors=9, algorithm=s)
+        clf.fit(iris.data, iris.target)
         assert np.mean(clf.predict(iris.data) == iris.target) > 0.95
 
         for m in ('barycenter', 'mean'):
-            rgs = neighbors.NeighborsRegressor()
-            rgs.fit(iris.data, iris.target, mode=m, algorithm=s)
+            rgs = neighbors.NeighborsRegressor(mode=m)
+            rgs.fit(iris.data, iris.target)
             assert np.mean(
                 rgs.predict(iris.data).round() == iris.target) > 0.95
 
@@ -125,8 +126,8 @@ def _test_neighbors_sparse_regression():
 
     for sparse1 in SPARSE_TYPES:
         for m in ('barycenter', 'mean'):
-            rgs = neighbors.NeighborsRegressor()
-            rgs.fit(sparse1(iris.data), iris.target, mode=m)
+            rgs = neighbors.NeighborsRegressor(mode=m)
+            rgs.fit(sparse1(iris.data), iris.target)
             for sparse2 in SPARSE_OR_DENSE:
                 data2 = sparse2(iris.data)
                 assert np.mean(rgs.predict(data2).round() == iris.target) > 0.95
diff --git a/scikits/learn/tests/test_pls.py b/scikits/learn/tests/test_pls.py
index 75f9c4e506877..e66ea38dac226 100644
--- a/scikits/learn/tests/test_pls.py
+++ b/scikits/learn/tests/test_pls.py
@@ -14,10 +14,10 @@ def test_pls():
     # ===========================================================
     # Compare 2 algo.: nipals vs. svd
     # ------------------------------
-    pls_bynipals = pls.PLSCanonical()
-    pls_bynipals.fit(X, Y, n_components=n_components)
-    pls_bysvd = pls.PLSCanonical(algorithm="svd")
-    pls_bysvd.fit(X, Y, n_components=n_components)
+    pls_bynipals = pls.PLSCanonical(n_components=n_components)
+    pls_bynipals.fit(X, Y)
+    pls_bysvd = pls.PLSCanonical(algorithm="svd", n_components=n_components)
+    pls_bysvd.fit(X, Y)
     # check that the loading vectors are highly correlated
     assert_array_almost_equal(
         [np.abs(np.corrcoef(pls_bynipals.x_loadings_[:, k],
@@ -35,8 +35,8 @@ def test_pls():
 
     # Check PLS properties (with n_components=X.shape[1])
     # ---------------------------------------------------
-    plsca = pls.PLSCanonical()
-    plsca.fit(X, Y, n_components=X.shape[1])
+    plsca = pls.PLSCanonical(n_components=X.shape[1])
+    plsca.fit(X, Y)
     T = plsca.x_scores_
     P = plsca.x_loadings_
     Wx = plsca.x_weights_
@@ -81,8 +81,8 @@ def check_ortho(M, err_msg):
 
     # "Non regression test" on canonical PLS
     # --------------------------------------
-    pls_bynipals = pls.PLSCanonical()
-    pls_bynipals.fit(X, Y, n_components=n_components)
+    pls_bynipals = pls.PLSCanonical(n_components=n_components)
+    pls_bynipals.fit(X, Y)
 
     pls_ca = pls_bynipals
     x_loadings = np.array(
@@ -111,8 +111,8 @@ def check_ortho(M, err_msg):
 
     # 2) Regression PLS (PLS2): "Non regression test"
     # ===============================================
-    pls2 = pls.PLSRegression()
-    pls2.fit(X, Y, n_components=n_components)
+    pls2 = pls.PLSRegression(n_components=n_components)
+    pls2.fit(X, Y)
 
     x_loadings = np.array(
           [[-0.66591531, -0.01976472],

From c78908a3a36243206e8d1c6d8dbd155d85168784 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 23 Aug 2011 16:56:36 +0200
Subject: [PATCH 38/51] checkpoint style improvements for the KMeans predict

---
 examples/cluster/vq_china.py                | 52 +++++++++---------
 scikits/learn/cluster/k_means_.py           | 20 +++----
 scikits/learn/cluster/tests/test_k_means.py | 61 ++++++++++++++++-----
 3 files changed, 82 insertions(+), 51 deletions(-)

diff --git a/examples/cluster/vq_china.py b/examples/cluster/vq_china.py
index c0b048af1f932..d2a296d880823 100644
--- a/examples/cluster/vq_china.py
+++ b/examples/cluster/vq_china.py
@@ -4,15 +4,15 @@
 Vector Quantization of Lena using k-means
 =========================================
 
-Performs a Vector Quatization of an image, reducing the 
-number of colors required to show the image.
+Performs a Vector Quantization of an image, reducing the number of colors
+required to show the image.
 """
 print __doc__
-import os
 import numpy as np
 import pylab as pl
 from scikits.learn.cluster import KMeans
 from scikits.learn.datasets import load_sample_images
+from scikits.learn.utils import shuffle
 
 # Get all sample images and obtain just china.jpg
 sample_image_name = "china.jpg"
@@ -28,42 +28,40 @@
 
 # Load Image and transform to a 2D numpy array.
 w, h, d = original_shape = tuple(image_data.shape)
-image_array = np.reshape(image_data, (w * h, 3))
+assert d == 3
+image_array = np.reshape(image_data, (w * h, d))
 
-# Take a sample of the data.
-sample_indices = range(len(image_array))
-np.random.shuffle(sample_indices)
-sample_indices = sample_indices[:int(len(image_array) * 0.2)]
-sample_data = image_array[sample_indices]
+print "Fitting estimator on a sub sample of the data"
+image_array_sample = shuffle(image_array, random_state=0)[:1000]
+kmeans = KMeans(k=10, max_iter=1000).fit(image_array_sample)
+print "done."
 
-# Perform Vector Quantisation with 256 clusters.
-k = 256
-kmeans = KMeans(k=k)
-kmeans.fit(sample_data)
 # Get labels for all points
+print "Predicting labels:"
 labels = kmeans.predict(image_array)
-# Save the reduced dataset. Only the centroids and labels need to be saved.
-reduced_image = (kmeans.cluster_centers_, labels)
+print "done."
 
-def recreate_image(centroids, labels, w, h):
-    # Recreates the (compressed) image from centroids, labels and dimensions
-    d = len(centroids[0])
+def recreate_image(codebook, labels, w, h):
+    # Recreates the (compressed) image from the code book, labels and dimensions
+    d = codebook.shape[1]
     image = np.zeros((w, h, d))
-    label_num = 0
+    label_idx = 0
     for i in range(w):
         for j in range(h):
-            image[i][j] = centroids[labels[label_num]]
-            label_num += 1
-    print np.histogram(labels)
-    print set(labels)
+            image[i][j] = codebook[labels[label_idx]]
+            label_idx += 1
     return image
 
 # Display all results, alongside original image
 pl.figure()
-ax = pl.axes([0,0,1,1], frameon=False)
+ax = pl.axes([0, 0, 1, 1], frameon=False)
 ax.set_axis_off()
-centroids, labels = reduced_image
-im = pl.imshow(recreate_image(centroids, labels, w, h))
+pl.imshow(image_data)
 
-pl.show()
+pl.figure()
+ax = pl.axes([0, 0, 1, 1], frameon=False)
+ax.set_axis_off()
 
+pl.imshow(recreate_image(kmeans.cluster_centers_, labels, w, h))
+
+pl.show()
diff --git a/scikits/learn/cluster/k_means_.py b/scikits/learn/cluster/k_means_.py
index f5056dbcfbcee..2c5eda4a94979 100644
--- a/scikits/learn/cluster/k_means_.py
+++ b/scikits/learn/cluster/k_means_.py
@@ -229,7 +229,7 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
         for i in range(max_iter):
             centers_old = centers.copy()
             labels, inertia = _e_step(X, centers,
-                                        x_squared_norms=x_squared_norms)
+                                      x_squared_norms=x_squared_norms)
             centers = _m_step(X, labels, k)
 
             if verbose:
@@ -537,7 +537,7 @@ def transform(self, X):
         return euclidean_distances(X, self.cluster_centers_)
 
     def predict(self, X):
-        """ Predict the closest cluster each sample belongs to
+        """Predict the closest cluster each sample in X belongs to.
 
         In the vector quantization literature, `cluster_centers_` is called
         the code book and each value returned by `predict` is the index of
@@ -550,17 +550,17 @@ def predict(self, X):
 
         Returns
         -------
-        Y : array, shape [n_samples, ]
+        Y : array, shape [n_samples,]
             Index of the closest center each sample belongs to.
         """
         if not hasattr(self, "cluster_centers_"):
             raise AttributeError("Model has not been trained yet. "
                                  "Fit k-means before using predict.")
-        cluster_shape = self.cluster_centers_.shape[1]
-        if not X.shape[1] == cluster_shape:
+        expected_n_features = self.cluster_centers_.shape[1]
+        if not X.shape[1] == expected_n_features:
             raise ValueError("Incorrect number of features. "
-                             "Got %d features, expected %d" % (X.shape[1],
-                                                               cluster_shape))
+                             "Got %d features, expected %d" % (
+                                 X.shape[1], expected_n_features))
         return _e_step(X, self.cluster_centers_)[0]
 
 
@@ -633,8 +633,7 @@ def _mini_batch_step_sparse(X, batch_slice, centers, counts, x_squared_norms):
 
 
 class MiniBatchKMeans(KMeans):
-    """
-    Mini-Batch K-Means clustering
+    """Mini-Batch K-Means clustering
 
     Parameters
     ----------
@@ -703,8 +702,7 @@ def __init__(self, k=8, init='random', max_iter=100,
         self.chunk_size = chunk_size
 
     def fit(self, X, y=None, **params):
-        """
-        Calculates the centroids on a batch X
+        """Compute the centroids on X by chunking it into mini-batches.
 
         Parameters
         ----------
diff --git a/scikits/learn/cluster/tests/test_k_means.py b/scikits/learn/cluster/tests/test_k_means.py
index e17c739340080..76fab2af1129e 100644
--- a/scikits/learn/cluster/tests/test_k_means.py
+++ b/scikits/learn/cluster/tests/test_k_means.py
@@ -14,7 +14,7 @@
 
 n_clusters = 3
 X = generate_clustered_data(n_clusters=n_clusters, std=.1)
-S = sp.csr_matrix(X)
+X_csr = sp.csr_matrix(X)
 
 
 def test_k_means_pp_init():
@@ -49,14 +49,14 @@ def test_mini_batch_k_means_pp_init():
 
 def test_sparse_mini_batch_k_means_pp_init():
     np.random.seed(1)
-    sample = S[0:S.shape[0] / 2]
+    sample = X_csr[0:X_csr.shape[0] / 2]
     km = MiniBatchKMeans(init="random").partial_fit(sample)
     # Let's recalculate the inertia on the whole dataset
-    km.partial_fit(S)
+    km.partial_fit(X_csr)
     inertia = km.inertia_
-    km.partial_fit(S[S.shape[0] / 2:])
+    km.partial_fit(X_csr[X_csr.shape[0] / 2:])
     # And again
-    km.partial_fit(S)
+    km.partial_fit(X_csr)
     assert(km.inertia_ < inertia)
 
 
@@ -147,7 +147,7 @@ def test_mbk_means_fixed_array_init():
 def test_sparse_mbk_means_fixed_array_init():
     np.random.seed(1)
     init_array = np.vstack([X[5], X[25], X[45]])
-    mbk_means = MiniBatchKMeans(init=init_array).fit(S)
+    mbk_means = MiniBatchKMeans(init=init_array).fit(X_csr)
 
     centers = mbk_means.cluster_centers_
     assert_equal(centers.shape, (n_clusters, 2))
@@ -161,7 +161,7 @@ def test_sparse_mbk_means_fixed_array_init():
 def test_sparse_mbk_means_pp_init():
     np.random.seed(1)
     mbk_means = MiniBatchKMeans(init="k-means++")
-    assert_raises(ValueError, mbk_means.fit, S, k=n_clusters)
+    assert_raises(ValueError, mbk_means.fit, X_csr, k=n_clusters)
 
 
 def test_sparse_mbk_means_callable_init():
@@ -169,7 +169,7 @@ def test_sparse_mbk_means_callable_init():
 
     def test_init(Xbar, k, random_state):
         return np.vstack([X[5], X[25], X[45]])
-    mbk_means = MiniBatchKMeans(init=test_init).fit(S)
+    mbk_means = MiniBatchKMeans(init=test_init).fit(X_csr)
 
     centers = mbk_means.cluster_centers_
     assert_equal(centers.shape, (n_clusters, 2))
@@ -209,12 +209,13 @@ def test_mbk_means():
     true_labels = np.zeros((n_samples,), dtype=np.int32)
     true_labels[20:40] = 1
     true_labels[40:] = 2
-    chunk_size = n_samples * 2
+    chunk_size = n_samples / 10
     # make sure init clusters are in different clusters
     init_array = np.vstack([X[5], X[25], X[45]])
 
     # shuffle original data
-    X_shuffled, S_shuffled, true_labels = shuffle(X, S, true_labels, random_state=1)
+    X_shuffled, X_csr_shuffled, true_labels = shuffle(X, X_csr, true_labels,
+                                                      random_state=1)
 
     mbk_means = MiniBatchKMeans(init=init_array, chunk_size=chunk_size,
                                 k=n_clusters, random_state=1)
@@ -223,16 +224,50 @@ def test_mbk_means():
 
     mbk_means = MiniBatchKMeans(init=init_array, chunk_size=chunk_size,
                                 k=n_clusters, random_state=1)
-    mbk_means.fit(S_shuffled)
+    mbk_means.fit(X_csr_shuffled)
     assert_equal(true_labels, mbk_means.labels_)
 
 
 def test_predict():
-    k_means = KMeans(k=n_clusters)
-    k_means.fit(X)
+    k_means = KMeans(k=n_clusters).fit(X)
+
+    # sanity check: predict centroid labels
     pred = k_means.predict(k_means.cluster_centers_)
     assert_array_equal(pred, np.arange(n_clusters))
 
+    # sanity check: re-predict labeling for training set samples
+    pred = k_means.predict(X)
+    assert_array_equal(k_means.predict(X), k_means.labels_)
+
+
+def test_predict_minibatch():
+    mbk_means = MiniBatchKMeans(k=n_clusters).fit(X)
+
+    # sanity check: predict centroid labels
+    pred = mbk_means.predict(mbk_means.cluster_centers_)
+    assert_array_equal(pred, np.arange(n_clusters))
+
+    # sanity check: re-predict labeling for training set samples
+    pred = mbk_means.predict(X)
+    assert_array_equal(mbk_means.predict(X), mbk_means.labels_)
+
+
+def test_predict_minibatch_sparse_input():
+    mbk_means = MiniBatchKMeans(k=n_clusters).fit(X_csr)
+
+    # sanity check: predict centroid labels
+    pred = mbk_means.predict(mbk_means.cluster_centers_)
+    assert_array_equal(pred, np.arange(n_clusters))
+
+    # sanity check: re-predict labeling for training set samples
+    pred = mbk_means.predict(X_csr)
+    assert_array_equal(mbk_means.predict(X), mbk_means.labels_)
+
+    # check that models trained on sparse input also works for dense input at
+    # predict time
+    pred = mbk_means.predict(X)
+    assert_array_equal(mbk_means.predict(X), mbk_means.labels_)
+
 
 def test_transform():
     k_means = KMeans(k=n_clusters)

From 9ff1929342965bbaeea12c4b7911c8e1d00804e3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 23 Aug 2011 18:25:26 +0200
Subject: [PATCH 39/51] time the main operations

---
 examples/cluster/vq_china.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/examples/cluster/vq_china.py b/examples/cluster/vq_china.py
index d2a296d880823..372c210327873 100644
--- a/examples/cluster/vq_china.py
+++ b/examples/cluster/vq_china.py
@@ -13,6 +13,7 @@
 from scikits.learn.cluster import KMeans
 from scikits.learn.datasets import load_sample_images
 from scikits.learn.utils import shuffle
+from time import time
 
 # Get all sample images and obtain just china.jpg
 sample_image_name = "china.jpg"
@@ -31,15 +32,17 @@
 assert d == 3
 image_array = np.reshape(image_data, (w * h, d))
 
-print "Fitting estimator on a sub sample of the data"
+print "Fitting estimator on a small sub-sample of the data"
+t0 = time()
 image_array_sample = shuffle(image_array, random_state=0)[:1000]
 kmeans = KMeans(k=10, max_iter=1000).fit(image_array_sample)
-print "done."
+print "done in %0.3fs." % (time() - t0)
 
 # Get labels for all points
-print "Predicting labels:"
+print "Predicting labels on the full image"
+t0 = time()
 labels = kmeans.predict(image_array)
-print "done."
+print "done in %0.3fs." % (time() - t0)
 
 def recreate_image(codebook, labels, w, h):
     # Recreates the (compressed) image from the code book, labels and dimensions
@@ -63,5 +66,4 @@ def recreate_image(codebook, labels, w, h):
 ax.set_axis_off()
 
 pl.imshow(recreate_image(kmeans.cluster_centers_, labels, w, h))
-
 pl.show()

From d93991494fe215bdd244fd4667d4c2d171f20d6a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 23 Aug 2011 18:28:29 +0200
Subject: [PATCH 40/51] add warning utils and use it in KMeans when data matrix
 is integers, boolean, complex...

---
 scikits/learn/cluster/k_means_.py | 11 +++++++----
 scikits/learn/utils/__init__.py   |  9 +++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/scikits/learn/cluster/k_means_.py b/scikits/learn/cluster/k_means_.py
index 3f653a0f7b3c8..d709657caba5a 100644
--- a/scikits/learn/cluster/k_means_.py
+++ b/scikits/learn/cluster/k_means_.py
@@ -16,10 +16,11 @@
 
 from ..base import BaseEstimator
 from ..metrics.pairwise import euclidean_distances
-from ..utils import check_random_state
 from ..utils import check_arrays
-from ..utils import shuffle
+from ..utils import check_random_state
 from ..utils import gen_even_slices
+from ..utils import shuffle
+from ..utils import warn_if_not_float
 
 from . import _k_means
 
@@ -493,11 +494,12 @@ def _check_data(self, X):
                 X.shape[0], self.k))
         return X
 
-    def fit(self, X):
+    def fit(self, X, y=None):
         """Compute k-means"""
         self.random_state = check_random_state(self.random_state)
 
         X = self._check_data(X)
+        warn_if_not_float(X, self)
 
         self.cluster_centers_, self.labels_, self.inertia_ = k_means(
             X, k=self.k, init=self.init, n_init=self.n_init,
@@ -505,7 +507,7 @@ def fit(self, X):
             tol=self.tol, random_state=self.random_state, copy_x=self.copy_x)
         return self
 
-    def transform(self, X):
+    def transform(self, X, y=None):
         """ Transform the data to a cluster-distance space
 
         In the new space, each dimension is the distance to the cluster centers.
@@ -707,6 +709,7 @@ def fit(self, X, y=None):
         """
         self.random_state = check_random_state(self.random_state)
         X = check_arrays(X, sparse_format="csr", copy=False)[0]
+        warn_if_not_float(X, self)
         n_samples, n_features = X.shape
         if n_samples < self.k:
             raise ValueError("Number of samples smaller than number "\
diff --git a/scikits/learn/utils/__init__.py b/scikits/learn/utils/__init__.py
index ec31dc4e61da8..938bae7ce6470 100644
--- a/scikits/learn/utils/__init__.py
+++ b/scikits/learn/utils/__init__.py
@@ -112,6 +112,15 @@ def check_arrays(*arrays, **options):
     return checked_arrays
 
 
+def warn_if_not_float(X, estimator='This algorithm'):
+    """Warning utility function to check that data type is floating point"""
+    if not isinstance(estimator, basestring):
+        estimator = estimator.__class__.__name__
+    if X.dtype.kind != 'f':
+        warnings.warn("%s assumes floating point values as input, "
+                      "got %s" % (estimator, X.dtype))
+
+
 class deprecated(object):
     """Decorator to mark a function or class as deprecated.
 

From 2faa1099b86b4606d21084fb0070dfe41b1265fa Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Tue, 23 Aug 2011 19:03:12 +0200
Subject: [PATCH 41/51] FIX : Ellipsis in least_angle.py doctests

---
 scikits/learn/linear_model/least_angle.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index 6a03a35209e66..b7a0e60fcdd63 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -348,7 +348,7 @@ class Lars(LinearModel):
     >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS
     Lars(normalize=True, n_nonzero_coefs=1, verbose=False, fit_intercept=True,
        eps=2.2204460492503131e-16, precompute='auto', overwrite_X=False)
-    >>> print clf.coef_
+    >>> print clf.coef_ # doctest : +ELLIPSIS
     [ 0. -1.]
 
     References
@@ -486,7 +486,7 @@ class LassoLars(Lars):
     LassoLars(normalize=True, verbose=False, fit_intercept=True, max_iter=500,
          eps=2.2204460492503131e-16, precompute='auto', alpha=0.01,
          overwrite_X=False)
-    >>> print clf.coef_
+    >>> print clf.coef_ # doctest : +ELLIPSIS
     [ 0.         -0.96325765]
 
     References
@@ -898,7 +898,7 @@ class LassoLarsIC(LassoLars):
     LassoLarsIC(normalize=True, verbose=False, fit_intercept=True, max_iter=500,
           eps=... precompute='auto', criterion='bic',
           overwrite_X=False)
-    >>> print clf.coef_
+    >>> print clf.coef_ # doctest: +ELLIPSIS
     [ 0. -1.]
 
     References

From 47458beb9ad1ff8f7fa4bc9e494a1157d5dcb90a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 23 Aug 2011 19:33:22 +0200
Subject: [PATCH 42/51] checkpointing work in progress on VQ example

---
 examples/cluster/vq_china.py       | 34 ++++++++++++------------
 scikits/learn/datasets/__init__.py |  1 +
 scikits/learn/datasets/base.py     | 42 +++++++++++++++++++++++++-----
 3 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/examples/cluster/vq_china.py b/examples/cluster/vq_china.py
index 372c210327873..5c7c86498994a 100644
--- a/examples/cluster/vq_china.py
+++ b/examples/cluster/vq_china.py
@@ -10,38 +10,35 @@
 print __doc__
 import numpy as np
 import pylab as pl
+from scikits.learn.preprocessing import Scaler
 from scikits.learn.cluster import KMeans
-from scikits.learn.datasets import load_sample_images
+from scikits.learn.datasets import load_sample_image
 from scikits.learn.utils import shuffle
 from time import time
 
 # Get all sample images and obtain just china.jpg
-sample_image_name = "china.jpg"
-sample_images = load_sample_images()
-index = None
-for i, filename in enumerate(sample_images.filenames):
-    if filename.endswith(sample_image_name):
-        index = i
-        break
-if index is None:
-    raise AttributeError("Cannot find sample image: %s" % sample_image_name)
-image_data = sample_images.images[index]
+china = load_sample_image("china.jpg")
+
+# Convert to floats instead of the default 8 bits integer coding
+china = np.array(china, dtype=np.float64)
 
 # Load Image and transform to a 2D numpy array.
-w, h, d = original_shape = tuple(image_data.shape)
+w, h, d = original_shape = tuple(china.shape)
 assert d == 3
-image_array = np.reshape(image_data, (w * h, d))
+image_array = np.reshape(china, (w * h, d))
+scaler = Scaler()
+image_scaled = scaler.fit_transform(image_array)
 
 print "Fitting estimator on a small sub-sample of the data"
 t0 = time()
-image_array_sample = shuffle(image_array, random_state=0)[:1000]
-kmeans = KMeans(k=10, max_iter=1000).fit(image_array_sample)
+image_scaled_sample = shuffle(image_scaled, random_state=0)[:1000]
+kmeans = KMeans(k=10, max_iter=1000).fit(image_scaled_sample)
 print "done in %0.3fs." % (time() - t0)
 
 # Get labels for all points
 print "Predicting labels on the full image"
 t0 = time()
-labels = kmeans.predict(image_array)
+labels = kmeans.predict(image_scaled)
 print "done in %0.3fs." % (time() - t0)
 
 def recreate_image(codebook, labels, w, h):
@@ -59,11 +56,14 @@ def recreate_image(codebook, labels, w, h):
 pl.figure()
 ax = pl.axes([0, 0, 1, 1], frameon=False)
 ax.set_axis_off()
-pl.imshow(image_data)
+pl.imshow(china)
 
 pl.figure()
 ax = pl.axes([0, 0, 1, 1], frameon=False)
 ax.set_axis_off()
 
+# transform back the images in the original space
+codebook = scaler.inverse_transform(kmeans.cluster_centers_)
+
 pl.imshow(recreate_image(kmeans.cluster_centers_, labels, w, h))
 pl.show()
diff --git a/scikits/learn/datasets/__init__.py b/scikits/learn/datasets/__init__.py
index 9d06df2a0c9ba..4fec421c4ea9d 100644
--- a/scikits/learn/datasets/__init__.py
+++ b/scikits/learn/datasets/__init__.py
@@ -7,6 +7,7 @@
 from .base import get_data_home
 from .base import clear_data_home
 from .base import load_sample_images
+from .base import load_sample_image
 from .mlcomp import load_mlcomp
 from .lfw import load_lfw_pairs
 from .lfw import load_lfw_people
diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py
index 6077374cc0917..eac1aea41f9e7 100644
--- a/scikits/learn/datasets/base.py
+++ b/scikits/learn/datasets/base.py
@@ -10,7 +10,6 @@
 import os
 import csv
 import shutil
-import textwrap
 from os import environ
 from os.path import dirname
 from os.path import join
@@ -348,9 +347,8 @@ def load_boston():
                  feature_names=feature_names,
                  DESCR=fdescr.read())
 
-
 def load_sample_images():
-    """ Load sample images for image manipulation.
+    """Load sample images for image manipulation.
 
     Return
     ------
@@ -366,11 +364,18 @@ def load_sample_images():
     To load the data and visualize the images::
 
     >>> from scikits.learn.datasets import load_sample_images
-    >>> images = load_sample_images()
+    >>> dataset = load_sample_images()
+    >>> len(dataset.images)
+    2
+    >>> first_img_data = dataset.images[0]
+    >>> first_img_data.shape  # height, width, channels
+    (427, 640, 3)
+    >>> first_img_data.dtype
+    dtype('uint8')
 
     >>> # import pylab as pl
     >>> # pl.gray()
-    >>> # pl.matshow(images.images[0]) # Visualize the first image
+    >>> # pl.matshow(dataset.images[0]) # Visualize the first image
     >>> # pl.show()
     """
     # Try to import Image and imresize from PIL. We do this here to prevent
@@ -391,7 +396,32 @@ def load_sample_images():
     # Load image data for each image in the source folder.
     images = [np.asarray(Image.open(filename))
               for filename in filenames]
+
     return Bunch(images=images,
                  filenames=filenames,
                  DESCR=descr)
-    
+
+def load_sample_image(image_name):
+    """Load the numpy array of a single sample image
+
+    >>> china = load_sample_image('china.jpg')
+    >>> china.dtype
+    dtype('uint8')
+    >>> china.shape
+    (427, 640, 3)
+
+    >>> flower = load_sample_image('flower.jpg')
+    >>> flower.dtype
+    dtype('uint8')
+    >>> flower.shape
+    (427, 640, 3)
+    """
+    images = load_sample_images()
+    index = None
+    for i, filename in enumerate(images.filenames):
+        if filename.endswith(image_name):
+            index = i
+            break
+    if index is None:
+        raise AttributeError("Cannot find sample image: %s" % image_name)
+    return images.images[index]

From 6575434e30d2b73d2466748787ef37bf38fb3856 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@inria.fr>
Date: Tue, 23 Aug 2011 13:26:23 -0400
Subject: [PATCH 43/51] Style + typos

---
 scikits/learn/utils/__init__.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scikits/learn/utils/__init__.py b/scikits/learn/utils/__init__.py
index e5185aea904d2..d14e61247d891 100644
--- a/scikits/learn/utils/__init__.py
+++ b/scikits/learn/utils/__init__.py
@@ -27,24 +27,24 @@ def as_float_array(X, overwrite_X=False):
     """
     Converts a numpy array to type np.float
 
-    The new dtype will be float32 or np.float64,
-    depending on the original type.
+    The new dtype will be float32 or np.float64, depending on the original type.
     The function can create a copy or modify the argument depending
     of the argument overwrite_X
-    
-    WARNING : If X is not of type float, then a copy of X with the wright type
+
+    WARNING : If X is not of type float, then a copy of X with the right type
               will be returned
 
     Parameters
     ----------
-    X : numpy array
+    X : array
 
-    overwrite_X :  bool, optional
+    overwrite_X : bool, optional
         if False, a copy of X will be created
 
     Returns
     -------
-    An array X of type np.float
+    X : array
+        An array of type np.float
     """
     if X.dtype in [np.float32, np.float64]:
         if overwrite_X:

From f2b0c614acd4f5fbcc5f0d715eef816b617a46a2 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@inria.fr>
Date: Tue, 23 Aug 2011 17:52:23 -0400
Subject: [PATCH 44/51] API : adding proper normalize options in Lasso and
 ElasticNet with clean up ENH : fix doctests

---
 .../learn/linear_model/coordinate_descent.py  | 122 ++++++++++++------
 scikits/learn/linear_model/least_angle.py     |  17 ++-
 scikits/learn/linear_model/omp.py             |  14 +-
 .../linear_model/sparse/coordinate_descent.py |  13 +-
 .../linear_model/tests/test_least_angle.py    |  21 +--
 5 files changed, 121 insertions(+), 66 deletions(-)

diff --git a/scikits/learn/linear_model/coordinate_descent.py b/scikits/learn/linear_model/coordinate_descent.py
index 017cc2ca221f2..f11498a61b17a 100644
--- a/scikits/learn/linear_model/coordinate_descent.py
+++ b/scikits/learn/linear_model/coordinate_descent.py
@@ -80,7 +80,6 @@ class ElasticNet(LinearModel):
         alpha = a + b and rho = a/(a+b)
 
     """
-
     def __init__(self, alpha=1.0, rho=0.5, fit_intercept=True,
                  normalize=False, precompute='auto', max_iter=1000,
                  overwrite_X=False, tol=1e-4):
@@ -123,15 +122,23 @@ def fit(self, X, y, Xy=None, coef_init=None):
         y = np.asanyarray(y, dtype=np.float64)
         X = as_float_array(X, self.overwrite_X)
 
-        X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
-                self.normalize)
+        n_samples, n_features = X.shape
+
+        X_init = X
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y,
+                                                        self.fit_intercept,
+                                                        self.normalize)
+        precompute = self.precompute
+        if X_init is not X and hasattr(precompute, '__array__'):
+            precompute = 'auto'  # recompute Gram
+        if X_init is not X and Xy is not None:
+            Xy = None  # recompute Xy
 
         if coef_init is None:
-            self.coef_ = np.zeros(X.shape[1], dtype=np.float64)
+            self.coef_ = np.zeros(n_features, dtype=np.float64)
         else:
             self.coef_ = coef_init
 
-        n_samples = X.shape[0]
         alpha = self.alpha * self.rho * n_samples
         beta = self.alpha * (1.0 - self.rho) * n_samples
 
@@ -139,9 +146,9 @@ def fit(self, X, y, Xy=None, coef_init=None):
 
         # precompute if n_samples > n_features
         if hasattr(self.precompute, '__array__'):
-            Gram = self.precompute
-        elif self.precompute == True or \
-               (self.precompute == 'auto' and X.shape[0] > X.shape[1]):
+            Gram = precompute
+        elif precompute == True or \
+               (precompute == 'auto' and n_samples > n_features):
             Gram = np.dot(X.T, X)
         else:
             Gram = None
@@ -242,17 +249,21 @@ class Lasso(ElasticNet):
     """
 
     def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
-                 precompute='auto', overwrite_X=False, max_iter=1000, tol=1e-4):
+                 precompute='auto', overwrite_X=False, max_iter=1000,
+                 tol=1e-4):
         super(Lasso, self).__init__(alpha=alpha, rho=1.0,
-                            fit_intercept=fit_intercept, precompute=precompute,
-                            max_iter=max_iter, overwrite_X=overwrite_X, tol=tol)
+                            fit_intercept=fit_intercept, normalize=normalize,
+                            precompute=precompute, overwrite_X=overwrite_X,
+                            max_iter=max_iter, tol=tol)
 
 
 ###############################################################################
 # Classes to store linear models along a regularization path
 
-def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
-               normalize=False, overwrite_X=False, verbose=False, **fit_params):
+def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None,
+               precompute='auto', Xy=None, fit_intercept=True,
+               normalize=False, overwrite_X=False, verbose=False,
+               **params):
     """Compute Lasso path with coordinate descent
 
     Parameters
@@ -275,8 +286,17 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
         List of alphas where to compute the models.
         If None alphas are set automatically
 
-    fit_params : kwargs
-        keyword arguments passed to the Lasso fit method
+    precompute : True | False | 'auto' | array-like
+        Whether to use a precomputed Gram matrix to speed up
+        calculations. If set to 'auto' let us decide. The Gram
+        matrix can also be passed as argument.
+
+    Xy : array-like, optional
+        Xy = np.dot(X.T, y) that can be precomputed. It is useful
+        only when the Gram matrix is precomuted.
+
+    fit_intercept : bool
+        Fit or not an intercept
 
     normalize : boolean, optional
         If True, the regressors X are normalized
@@ -285,6 +305,12 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
         If True, X will not be copied
         Default is False
 
+    verbose : bool
+        Verbose computation or not.
+
+    params : kwargs
+        keyword arguments passed to the Lasso objects
+
     Returns
     -------
     models : a list of models along the regularization path
@@ -297,13 +323,15 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
     should be directly passed as a fortran contiguous numpy array.
     """
     return enet_path(X, y, rho=1., eps=eps, n_alphas=n_alphas, alphas=alphas,
-                  fit_intercept=fit_intercept, normalize=normalize,
-                  verbose=verbose, overwrite_X=overwrite_X, **fit_params)
+                     precompute='auto', Xy=None,
+                     fit_intercept=fit_intercept, normalize=normalize,
+                     overwrite_X=overwrite_X, verbose=verbose, **params)
 
 
 def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
-              fit_intercept=True, normalize=False, verbose=False,
-              overwrite_X=False, **fit_params):
+              precompute='auto', Xy=None, fit_intercept=True,
+              normalize=False, overwrite_X=False, verbose=False,
+              **params):
     """Compute Elastic-Net path with coordinate descent
 
     Parameters
@@ -330,8 +358,17 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
         List of alphas where to compute the models.
         If None alphas are set automatically
 
-    fit_params : kwargs
-        keyword arguments passed to the Lasso fit method
+    precompute : True | False | 'auto' | array-like
+        Whether to use a precomputed Gram matrix to speed up
+        calculations. If set to 'auto' let us decide. The Gram
+        matrix can also be passed as argument.
+
+    Xy : array-like, optional
+        Xy = np.dot(X.T, y) that can be precomputed. It is useful
+        only when the Gram matrix is precomuted.
+
+    fit_intercept : bool
+        Fit or not an intercept
 
     normalize : boolean, optional
         If True, the regressors X are normalized
@@ -340,6 +377,12 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
         If True, X will not be copied
         Default is False
 
+    verbose : bool
+        Verbose computation or not.
+
+    params : kwargs
+        keyword arguments passed to the Lasso objects
+
     Returns
     -------
     models : a list of models along the regularization path
@@ -349,31 +392,40 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
     See examples/plot_lasso_coordinate_descent_path.py for an example.
     """
     X = as_float_array(X, overwrite_X)
+
+    X_init = X
     X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
-            fit_intercept, normalize)
+                                                           fit_intercept,
+                                                           normalize)
     X = np.asfortranarray(X)  # make data contiguous in memory
+    n_samples, n_features = X.shape
+
+    if X_init is not X and hasattr(precompute, '__array__'):
+        precompute = 'auto'
+    if X_init is not X and Xy is not None:
+        Xy = None
+
+    if 'precompute' is True or \
+                ((precompute == 'auto') and (n_samples > n_features)):
+        precompute = np.dot(X.T, X)
+
+    if Xy is None:
+        Xy = np.dot(X.T, y)
 
     n_samples = X.shape[0]
     if alphas is None:
-        alpha_max = np.abs(np.dot(X.T, y)).max() / (n_samples * rho)
+        alpha_max = np.abs(Xy).max() / (n_samples * rho)
         alphas = np.logspace(np.log10(alpha_max * eps), np.log10(alpha_max),
                              num=n_alphas)[::-1]
     else:
         alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered
     coef_ = None  # init coef_
     models = []
-    Xy = None
-
-    if not 'precompute' in fit_params \
-        or fit_params['precompute'] is True \
-        or (fit_intercept and hasattr(fit_params['precompute'], '__array__')):
-        fit_params['precompute'] = np.dot(X.T, X)
-        if not 'Xy' in fit_params or fit_params['Xy'] is None:
-            Xy = np.dot(X.T, y)
 
     for alpha in alphas:
-        model = ElasticNet(alpha=alpha, rho=rho, fit_intercept=False)
-        model.set_params(**fit_params)
+        model = ElasticNet(alpha=alpha, rho=rho, fit_intercept=False,
+                           precompute=precompute)
+        model.set_params(**params)
         model.fit(X, y, coef_init=coef_, Xy=Xy)
         if fit_intercept:
             model.fit_intercept = True
@@ -504,7 +556,6 @@ class LassoCV(LinearModelCV):
     To avoid unnecessary memory duplication the X argument of the fit method
     should be directly passed as a fortran contiguous numpy array.
     """
-
     path = staticmethod(lasso_path)
     estimator = Lasso
 
@@ -549,7 +600,7 @@ class ElasticNetCV(LinearModelCV):
 
     cv : integer or crossvalidation generator, optional
         If an integer is passed, it is the number of fold (default 3).
-        Specific crossvalidation objects can be passed, see 
+        Specific crossvalidation objects can be passed, see
         scikits.learn.cross_val module for the list of possible objects
 
 
@@ -577,7 +628,6 @@ class ElasticNetCV(LinearModelCV):
         alpha = a + b and rho = a/(a+b)
 
     """
-
     path = staticmethod(enet_path)
     estimator = ElasticNet
 
diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index 22eadcef476e2..e10a38182b8f1 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -24,7 +24,7 @@
 def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
               alpha_min=0, method='lar', overwrite_X=False,
               eps=np.finfo(np.float).eps,
-              overwrite_Gram=False, verbose=False, ):
+              overwrite_Gram=False, verbose=False):
     """Compute Least Angle Regression and LASSO path
 
     Parameters
@@ -349,7 +349,7 @@ class Lars(LinearModel):
     Lars(normalize=True, n_nonzero_coefs=1, verbose=False, fit_intercept=True,
        eps=2.2204460492503131e-16, precompute='auto', overwrite_X=False)
     >>> print clf.coef_ # doctest : +ELLIPSIS
-    [ 0. -1.]
+    [ 0.         -1.00000015]
 
     References
     ----------
@@ -405,8 +405,9 @@ def fit(self, X, y, overwrite_X=False):
 
         X = as_float_array(X, self.overwrite_X)
 
-        X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
-            self.normalize)
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y,
+                                                        self.fit_intercept,
+                                                        self.normalize)
         alpha = getattr(self, 'alpha', 0.)
         if hasattr(self, 'n_nonzero_coefs'):
             alpha = 0.  # n_nonzero_coefs parametrization takes priority
@@ -485,7 +486,7 @@ class LassoLars(Lars):
          eps=2.2204460492503131e-16, precompute='auto', alpha=0.01,
          overwrite_X=False)
     >>> print clf.coef_ # doctest : +ELLIPSIS
-    [ 0.         -0.96325765]
+    [ 0.         -0.96325779]
 
     References
     ----------
@@ -896,7 +897,7 @@ class LassoLarsIC(LassoLars):
           eps=... precompute='auto', criterion='bic',
           overwrite_X=False)
     >>> print clf.coef_ # doctest: +ELLIPSIS
-    [ 0. -1.]
+    [ 0.         -1.00000015]
 
     References
     ----------
@@ -947,7 +948,9 @@ def fit(self, X, y, overwrite_X=False):
         y = np.atleast_1d(y)
 
         X = as_float_array(X, self.overwrite_X)
-        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, normalize=self.normalize)
+        X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y,
+                                                    self.fit_intercept,
+                                                    normalize=self.normalize)
         max_iter = self.max_iter
 
         Gram = self._get_gram()
diff --git a/scikits/learn/linear_model/omp.py b/scikits/learn/linear_model/omp.py
index fa4e73509e4d1..3c6e7bda6d603 100644
--- a/scikits/learn/linear_model/omp.py
+++ b/scikits/learn/linear_model/omp.py
@@ -461,7 +461,6 @@ class OrthogonalMatchingPursuit(LinearModel):
     LassoLars
 
     """
-
     def __init__(self, overwrite_X=False, overwrite_gram=False,
             overwrite_Xy=False, n_nonzero_coefs=None, eps=None,
             fit_intercept=True, normalize=True, precompute_gram=False):
@@ -475,8 +474,7 @@ def __init__(self, overwrite_X=False, overwrite_gram=False,
         self.overwrite_X = overwrite_X
         self.eps = eps
 
-    def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
-            overwrite_gram=False, overwrite_xy=False):
+    def fit(self, X, y, Gram=None, Xy=None):
         """Fit the model using X, y as training data.
 
         Parameters
@@ -505,8 +503,9 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
         n_features = X.shape[1]
         X = as_float_array(X, self.overwrite_X)
 
-        X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
-                self.normalize)
+        X, y, X_mean, y_mean, X_std = self._center_data(X, y,
+                                                        self.fit_intercept,
+                                                        self.normalize)
 
         if self.n_nonzero_coefs == None and self.eps is None:
             self.n_nonzero_coefs = int(0.1 * n_features)
@@ -520,6 +519,7 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
             else:
                 Gram = np.asfortranarray(Gram)
 
+            overwrite_gram = self.overwrite_gram
             if y.shape[1] > 1:  # subsequent targets will be affected
                 overwrite_gram = False
 
@@ -538,7 +538,7 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
                 Gram /= X_std
                 Gram /= X_std[:, np.newaxis]
 
-            norms_sq = np.sum((y ** 2), axis=0) if self.eps is not None else None
+            norms_sq = np.sum(y ** 2, axis=0) if self.eps is not None else None
             self.coef_ = orthogonal_mp_gram(Gram, Xy, self.n_nonzero_coefs,
                                             self.eps, norms_sq,
                                             overwrite_gram, True).T
@@ -547,7 +547,7 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
             if precompute_gram == 'auto':
                 precompute_gram = X.shape[0] > X.shape[1]
             self.coef_ = orthogonal_mp(X, y, self.n_nonzero_coefs, self.eps,
-                                       precompute_gram=precompute_gram,
+                                       precompute_gram=self.precompute_gram,
                                        overwrite_X=self.overwrite_X).T
 
         self._set_intercept(X_mean, y_mean, X_std)
diff --git a/scikits/learn/linear_model/sparse/coordinate_descent.py b/scikits/learn/linear_model/sparse/coordinate_descent.py
index ece4613000c8c..f8773481d5fd0 100644
--- a/scikits/learn/linear_model/sparse/coordinate_descent.py
+++ b/scikits/learn/linear_model/sparse/coordinate_descent.py
@@ -40,9 +40,8 @@ class ElasticNet(LinearModel):
     The parameter rho corresponds to alpha in the glmnet R package
     while alpha corresponds to the lambda parameter in glmnet.
     """
-
     def __init__(self, alpha=1.0, rho=0.5, fit_intercept=False,
-                 max_iter=1000, tol=1e-4, normalize=True):
+                 normalize=False, max_iter=1000, tol=1e-4):
         if fit_intercept:
             raise NotImplementedError("fit_intercept=True is not implemented")
         self.alpha = alpha
@@ -135,8 +134,8 @@ class Lasso(ElasticNet):
         data is assumed to be already centered.
 
     """
-
-    def __init__(self, alpha=1.0, fit_intercept=False, normalize=False):
-        super(Lasso, self).__init__(alpha=alpha, rho=1.0,
-                                    fit_intercept=fit_intercept,
-                                    normalize=normalize)
+    def __init__(self, alpha=1.0, fit_intercept=False, normalize=False,
+                 max_iter=1000, tol=1e-4):
+        super(Lasso, self).__init__(
+            alpha=alpha, rho=1.0, fit_intercept=fit_intercept,
+            normalize=normalize, max_iter=max_iter, tol=tol)
diff --git a/scikits/learn/linear_model/tests/test_least_angle.py b/scikits/learn/linear_model/tests/test_least_angle.py
index f65485e83f69e..ceadcb33cf65c 100644
--- a/scikits/learn/linear_model/tests/test_least_angle.py
+++ b/scikits/learn/linear_model/tests/test_least_angle.py
@@ -105,7 +105,7 @@ def test_lasso_lars_vs_lasso_cd(verbose=False):
 
     alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
     lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
-    for (c, a) in zip(lasso_path.T, alphas):
+    for c, a in zip(lasso_path.T, alphas):
         lasso_cd.alpha = a
         lasso_cd.fit(X, y)
         error = np.linalg.norm(c - lasso_cd.coef_)
@@ -114,21 +114,23 @@ def test_lasso_lars_vs_lasso_cd(verbose=False):
     # similar test, with the classifiers
     for alpha in np.linspace(1e-2, 1 - 1e-2):
         clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y)
-        clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8).fit(X, y)
+        clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8,
+                                  normalize=False).fit(X, y)
         err = np.linalg.norm(clf1.coef_ - clf2.coef_)
         assert err < 1e-3
 
-    # same test, with normalization
+    # same test, with normalized data
+    X = diabetes.data
     alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
-    lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True)
-    for (c, a) in zip(lasso_path.T, alphas):
+    lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
+                                  tol=1e-8)
+    for c, a in zip(lasso_path.T, alphas):
         lasso_cd.alpha = a
-        lasso_cd.fit(X, y, tol=1e-8)
+        lasso_cd.fit(X, y)
         error = np.linalg.norm(c - lasso_cd.coef_)
         assert error < 0.01
 
 
-
 def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
     """
     Test that LassoLars and Lasso using coordinate descent give the
@@ -150,9 +152,10 @@ def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
     for alphas_min in alphas_min:
         alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                     alpha_min=0.9)
-        lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True)
+        lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True,
+                                      tol=1e-8)
         lasso_cd.alpha = alphas[-1]
-        lasso_cd.fit(X, y, tol=1e-8)
+        lasso_cd.fit(X, y)
         error = np.linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
         assert error < 0.01
 

From 0e540d989476047662d38774278053dece8e98fa Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@inria.fr>
Date: Tue, 23 Aug 2011 17:59:18 -0400
Subject: [PATCH 45/51] ENH : more standard import of scipy.sparse

---
 scikits/learn/linear_model/base.py            |  4 ++--
 .../linear_model/sparse/coordinate_descent.py | 10 +++++-----
 scikits/learn/linear_model/sparse/logistic.py |  4 ++--
 .../sparse/stochastic_gradient.py             | 20 +++++++++----------
 .../sparse/tests/test_coordinate_descent.py   | 10 +++++-----
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py
index 2c42563b8cde2..ddc5cd0a37e4f 100644
--- a/scikits/learn/linear_model/base.py
+++ b/scikits/learn/linear_model/base.py
@@ -12,7 +12,7 @@
 # License: BSD Style.
 
 import numpy as np
-import scipy.sparse
+import scipy.sparse as sp
 
 from ..base import BaseEstimator, RegressorMixin, ClassifierMixin
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
@@ -59,7 +59,7 @@ def _center_data(X, y, fit_intercept, normalize=False):
             You can specify an argument overwrite_X (default is False).
         """
         if fit_intercept:
-            if scipy.sparse.issparse(X):
+            if sp.issparse(X):
                 X_mean = np.zeros(X.shape[1])
                 X_std = np.ones(X.shape[1])
             else:
diff --git a/scikits/learn/linear_model/sparse/coordinate_descent.py b/scikits/learn/linear_model/sparse/coordinate_descent.py
index f8773481d5fd0..80e150d54827d 100644
--- a/scikits/learn/linear_model/sparse/coordinate_descent.py
+++ b/scikits/learn/linear_model/sparse/coordinate_descent.py
@@ -7,7 +7,7 @@
 
 import warnings
 import numpy as np
-from scipy import sparse
+import scipy.sparse as sp
 
 from ..base import LinearModel
 from . import cd_fast_sparse
@@ -59,7 +59,7 @@ def _set_coef(self, coef_):
             self.sparse_coef_ = None
         else:
             # sparse representation of the fitted coef for the predict method
-            self.sparse_coef_ = sparse.csr_matrix(coef_)
+            self.sparse_coef_ = sp.csr_matrix(coef_)
 
     def fit(self, X, y):
         """Fit current model with coordinate descent
@@ -67,7 +67,7 @@ def fit(self, X, y):
         X is expected to be a sparse matrix. For maximum efficiency, use a
         sparse matrix in CSC format (scipy.sparse.csc_matrix)
         """
-        X = sparse.csc_matrix(X)
+        X = sp.csc_matrix(X)
         y = np.asanyarray(y, dtype=np.float64)
 
         # NOTE: we are explicitly not centering the data the naive way to
@@ -111,8 +111,8 @@ def predict(self, X):
         array, shape = [n_samples] with the predicted real values
         """
         # np.dot only works correctly if both arguments are sparse matrices
-        if not sparse.issparse(X):
-            X = sparse.csr_matrix(X)
+        if not sp.issparse(X):
+            X = sp.csr_matrix(X)
         return np.ravel(np.dot(self.sparse_coef_, X.T).todense()
                         + self.intercept_)
 
diff --git a/scikits/learn/linear_model/sparse/logistic.py b/scikits/learn/linear_model/sparse/logistic.py
index 7b41d2d6cf73e..6f01d7caca9bb 100644
--- a/scikits/learn/linear_model/sparse/logistic.py
+++ b/scikits/learn/linear_model/sparse/logistic.py
@@ -6,6 +6,7 @@
 """
 
 import numpy as np
+import scipy.sparse as sp
 
 from ...base import ClassifierMixin
 from ...svm.sparse.base import SparseBaseLibLinear
@@ -92,8 +93,7 @@ def predict_proba(self, X):
         The returned estimates for all classes are ordered by the
         label of classes.
         """
-        import scipy.sparse
-        X = scipy.sparse.csr_matrix(X)
+        X = sp.csr_matrix(X)
         X.data = np.asanyarray(X.data, dtype=np.float64, order='C')
         probas = csr_predict_prob(X.shape[1], X.data, X.indices,
                                   X.indptr, self.raw_coef_,
diff --git a/scikits/learn/linear_model/sparse/stochastic_gradient.py b/scikits/learn/linear_model/sparse/stochastic_gradient.py
index fd3db80b2953e..10d8dccbeb605 100644
--- a/scikits/learn/linear_model/sparse/stochastic_gradient.py
+++ b/scikits/learn/linear_model/sparse/stochastic_gradient.py
@@ -4,7 +4,7 @@
 """Implementation of Stochastic Gradient Descent (SGD) with sparse data."""
 
 import numpy as np
-from scipy import sparse
+import scipy.sparse as sp
 
 from ...externals.joblib import Parallel, delayed
 from ..base import BaseSGDClassifier, BaseSGDRegressor
@@ -128,13 +128,13 @@ def _set_coef(self, coef_):
             self.sparse_coef_ = None
         else:
             # sparse representation of the fitted coef for the predict method
-            self.sparse_coef_ = sparse.csr_matrix(coef_)
+            self.sparse_coef_ = sp.csr_matrix(coef_)
 
     def _fit_binary(self, X, y):
         """Fit a binary classifier.
         """
         # interprete X as CSR matrix
-        X = sparse.csr_matrix(X)
+        X = sp.csr_matrix(X)
 
         # encode original class labels as 1 (classes[1]) or -1 (classes[0]).
         y_new = np.ones(y.shape, dtype=np.float64, order="C") * -1.0
@@ -174,7 +174,7 @@ def _fit_multiclass(self, X, y):
         all others (OVA: One Versus All).
         """
         # interprete X as CSR matrix
-        X = sparse.csr_matrix(X)
+        X = sp.csr_matrix(X)
 
         # get sparse matrix datastructures
         X_data = np.array(X.data, dtype=np.float64, order="C")
@@ -217,8 +217,8 @@ def decision_function(self, X):
           The signed 'distances' to the hyperplane(s).
         """
         # np.dot only works correctly if both arguments are sparse matrices
-        if not sparse.issparse(X):
-            X = sparse.csr_matrix(X)
+        if not sp.issparse(X):
+            X = sp.csr_matrix(X)
         scores = np.asarray(np.dot(X, self.sparse_coef_.T).todense()
                             + self.intercept_)
         if self.classes.shape[0] == 2:
@@ -353,11 +353,11 @@ def _set_coef(self, coef_):
             self.sparse_coef_ = None
         else:
             # sparse representation of the fitted coef for the predict method
-            self.sparse_coef_ = sparse.csr_matrix(coef_)
+            self.sparse_coef_ = sp.csr_matrix(coef_)
 
     def _fit_regressor(self, X, y):
         # interprete X as CSR matrix
-        X = sparse.csr_matrix(X)
+        X = sp.csr_matrix(X)
 
         # get sparse matrix datastructures
         X_data = np.array(X.data, dtype=np.float64, order="C")
@@ -400,8 +400,8 @@ def predict(self, X):
            Array containing the predicted class labels.
         """
         # np.dot only works correctly if both arguments are sparse matrices
-        if not sparse.issparse(X):
-            X = sparse.csr_matrix(X)
+        if not sp.issparse(X):
+            X = sp.csr_matrix(X)
         scores = np.asarray(np.dot(X, self.sparse_coef_.T).todense()
                             + self.intercept_).ravel()
         return scores
diff --git a/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py b/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py
index fa53a0ca8183a..90e5cee442698 100644
--- a/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py
+++ b/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py
@@ -1,5 +1,5 @@
 import numpy as np
-from scipy import sparse
+import scipy.sparse as sp
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_almost_equal
 from numpy.testing import assert_equal
@@ -12,7 +12,7 @@
 
 def test_sparse_predict():
     """Check that the predict method works with dense coef_ and sparse X"""
-    X = sparse.lil_matrix((3, 2))
+    X = sp.lil_matrix((3, 2))
     X[0, 0] = 1
     X[0, 1] = 0.5
     X[1, 0] = -1
@@ -25,7 +25,7 @@ def test_sparse_predict():
 
 def test_lasso_zero():
     """Check that the sparse lasso can handle zero data without crashing"""
-    X = sparse.csc_matrix((3, 1))
+    X = sp.csc_matrix((3, 1))
     y = [0, 0, 0]
     T = np.array([[1], [2], [3]])
     clf = SparseLasso().fit(X, y)
@@ -69,14 +69,14 @@ def test_enet_toy_explicit_sparse_input():
     """Test ElasticNet for various parameters of alpha and rho with sparse X"""
 
     # training samples
-    X = sparse.lil_matrix((3, 1))
+    X = sp.lil_matrix((3, 1))
     X[0, 0] = -1
     # X[1, 0] = 0
     X[2, 0] = 1
     Y = [-1, 0, 1]       # just a straight line (the identity function)
 
     # test samples
-    T = sparse.lil_matrix((3, 1))
+    T = sp.lil_matrix((3, 1))
     T[0, 0] = 2
     T[1, 0] = 3
     T[2, 0] = 4

From 133f7ec836d3691022d74d584dacb9f610e4290f Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 24 Aug 2011 10:56:30 +0200
Subject: [PATCH 46/51] ENH: add missing inverse_transform method for Scaler

---
 scikits/learn/preprocessing/__init__.py       | 22 ++++++++++++++++++-
 .../preprocessing/tests/test_preprocessing.py | 21 ++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/scikits/learn/preprocessing/__init__.py b/scikits/learn/preprocessing/__init__.py
index 323ae71c6eb21..7b5191b690416 100644
--- a/scikits/learn/preprocessing/__init__.py
+++ b/scikits/learn/preprocessing/__init__.py
@@ -178,13 +178,33 @@ def transform(self, X, y=None, copy=None):
         X = np.asanyarray(X)
         if copy:
             X = X.copy()
-        # We are taking a view of the X array and modifying it
         if self.with_mean:
             X -= self.mean_
         if self.with_std:
             X /= self.std_
         return X
 
+    def inverse_transform(self, X, copy=None):
+        """Scale back the data to the original representation
+
+        Parameters
+        ----------
+        X : array-like with shape [n_samples, n_features]
+            The data used to scale along the features axis.
+        """
+        copy = copy if copy is not None else self.copy
+        if sp.issparse(X):
+            raise NotImplementedError(
+                "Scaling is not yet implement for sparse matrices")
+        X = np.asanyarray(X)
+        if copy:
+            X = X.copy()
+        if self.with_std:
+            X *= self.std_
+        if self.with_mean:
+            X += self.mean_
+        return X
+
 
 def normalize(X, norm='l2', axis=1, copy=True):
     """Normalize a dataset along any axis
diff --git a/scikits/learn/preprocessing/tests/test_preprocessing.py b/scikits/learn/preprocessing/tests/test_preprocessing.py
index 6cba3d33cd285..446a1c63adf54 100644
--- a/scikits/learn/preprocessing/tests/test_preprocessing.py
+++ b/scikits/learn/preprocessing/tests/test_preprocessing.py
@@ -35,12 +35,17 @@ def test_scaler():
     """Test scaling of dataset along all axis"""
     # First test with 1D data
     X = np.random.randn(5)
+    X_orig_copy = X.copy()
 
     scaler = Scaler()
     X_scaled = scaler.fit(X).transform(X, copy=False)
     assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
     assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
 
+    # check inverse transform
+    X_scaled_back = scaler.inverse_transform(X_scaled)
+    assert_array_almost_equal(X_scaled_back, X_orig_copy)
+
     # Test with 1D list
     X = [0., 1., 2, 0.4, 1.]
     scaler = Scaler()
@@ -65,6 +70,12 @@ def test_scaler():
     # Check that X has not been copied
     assert X_scaled is not X
 
+    # check inverse transform
+    X_scaled_back = scaler.inverse_transform(X_scaled)
+    assert X_scaled_back is not X
+    assert X_scaled_back is not X_scaled
+    assert_array_almost_equal(X_scaled_back, X)
+
     X_scaled = scale(X, axis=1, with_std=False)
     assert not np.any(np.isnan(X_scaled))
     assert_array_almost_equal(X_scaled.mean(axis=1), 4 * [0.0])
@@ -108,6 +119,11 @@ def test_scaler_without_centering():
     # Check that X has not been copied
     assert X_scaled is not X
 
+    X_scaled_back = scaler.inverse_transform(X_scaled)
+    assert X_scaled_back is not X
+    assert X_scaled_back is not X_scaled
+    assert_array_almost_equal(X_scaled_back, X)
+
     X_scaled = scale(X, with_mean=False)
     assert not np.any(np.isnan(X_scaled))
 
@@ -117,6 +133,11 @@ def test_scaler_without_centering():
     # Check that X has not been copied
     assert X_scaled is not X
 
+    X_scaled_back = scaler.inverse_transform(X_scaled)
+    assert X_scaled_back is not X
+    assert X_scaled_back is not X_scaled
+    assert_array_almost_equal(X_scaled_back, X)
+
 
 def test_normalizer_l1():
     rng = np.random.RandomState(0)

From ea36a26d6781d951d141527deeda1eb5c2011854 Mon Sep 17 00:00:00 2001
From: Jean Kossaifi <jean.kossaifi@gmail.com>
Date: Wed, 24 Aug 2011 11:05:11 +0200
Subject: [PATCH 47/51] FIX : ELLIPSIS in least_angle.py doctests

---
 scikits/learn/linear_model/least_angle.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py
index e10a38182b8f1..2a27c1e69f29a 100644
--- a/scikits/learn/linear_model/least_angle.py
+++ b/scikits/learn/linear_model/least_angle.py
@@ -345,11 +345,11 @@ class Lars(LinearModel):
     --------
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.Lars(n_nonzero_coefs=1)
-    >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS
+    >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111]) # doctest: +ELLIPSIS
     Lars(normalize=True, n_nonzero_coefs=1, verbose=False, fit_intercept=True,
        eps=2.2204460492503131e-16, precompute='auto', overwrite_X=False)
-    >>> print clf.coef_ # doctest : +ELLIPSIS
-    [ 0.         -1.00000015]
+    >>> print clf.coef_ # doctest: +ELLIPSIS
+    [ 0.     -1.1111...]
 
     References
     ----------
@@ -485,8 +485,8 @@ class LassoLars(Lars):
     LassoLars(normalize=True, verbose=False, fit_intercept=True, max_iter=500,
          eps=2.2204460492503131e-16, precompute='auto', alpha=0.01,
          overwrite_X=False)
-    >>> print clf.coef_ # doctest : +ELLIPSIS
-    [ 0.         -0.96325779]
+    >>> print clf.coef_ # doctest: +ELLIPSIS
+    [ 0.         -0.963257...]
 
     References
     ----------
@@ -892,12 +892,12 @@ class LassoLarsIC(LassoLars):
     --------
     >>> from scikits.learn import linear_model
     >>> clf = linear_model.LassoLarsIC(criterion='bic')
-    >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS
+    >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111]) # doctest: +ELLIPSIS
     LassoLarsIC(normalize=True, verbose=False, fit_intercept=True, max_iter=500,
           eps=... precompute='auto', criterion='bic',
           overwrite_X=False)
     >>> print clf.coef_ # doctest: +ELLIPSIS
-    [ 0.         -1.00000015]
+    [ 0.     -1.1111...]
 
     References
     ----------

From 3a1d5e2f8fbcc22cf7c5dd440f1deca012f1533f Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 24 Aug 2011 11:35:45 +0200
Subject: [PATCH 48/51] fix the VQ example by switching to floats in range 0 -
 1

---
 examples/cluster/vq_china.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/examples/cluster/vq_china.py b/examples/cluster/vq_china.py
index 5c7c86498994a..50fee6a0cee46 100644
--- a/examples/cluster/vq_china.py
+++ b/examples/cluster/vq_china.py
@@ -10,35 +10,34 @@
 print __doc__
 import numpy as np
 import pylab as pl
-from scikits.learn.preprocessing import Scaler
 from scikits.learn.cluster import KMeans
 from scikits.learn.datasets import load_sample_image
 from scikits.learn.utils import shuffle
 from time import time
 
-# Get all sample images and obtain just china.jpg
+# Load the Summer Palace photo
 china = load_sample_image("china.jpg")
 
-# Convert to floats instead of the default 8 bits integer coding
-china = np.array(china, dtype=np.float64)
+# Convert to floats instead of the default 8 bits integer coding. Dividing by
+# 255 is important so that pl.imshow behaves works well on foat data (need to be
+# in the range [0-1]
+china = np.array(china, dtype=np.float64) / 255
 
 # Load Image and transform to a 2D numpy array.
 w, h, d = original_shape = tuple(china.shape)
 assert d == 3
 image_array = np.reshape(china, (w * h, d))
-scaler = Scaler()
-image_scaled = scaler.fit_transform(image_array)
 
 print "Fitting estimator on a small sub-sample of the data"
 t0 = time()
-image_scaled_sample = shuffle(image_scaled, random_state=0)[:1000]
-kmeans = KMeans(k=10, max_iter=1000).fit(image_scaled_sample)
+image_array_sample = shuffle(image_array, random_state=0)[:1000]
+kmeans = KMeans(k=10, random_state=0).fit(image_array_sample)
 print "done in %0.3fs." % (time() - t0)
 
 # Get labels for all points
 print "Predicting labels on the full image"
 t0 = time()
-labels = kmeans.predict(image_scaled)
+labels = kmeans.predict(image_array)
 print "done in %0.3fs." % (time() - t0)
 
 def recreate_image(codebook, labels, w, h):
@@ -61,9 +60,5 @@ def recreate_image(codebook, labels, w, h):
 pl.figure()
 ax = pl.axes([0, 0, 1, 1], frameon=False)
 ax.set_axis_off()
-
-# transform back the images in the original space
-codebook = scaler.inverse_transform(kmeans.cluster_centers_)
-
 pl.imshow(recreate_image(kmeans.cluster_centers_, labels, w, h))
 pl.show()

From 2a548ed1314d57d02d13d24d6f828b3b3a483bed Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 24 Aug 2011 11:41:52 +0200
Subject: [PATCH 49/51] cosmit

---
 examples/cluster/vq_china.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cluster/vq_china.py b/examples/cluster/vq_china.py
index 50fee6a0cee46..1c19e10339da6 100644
--- a/examples/cluster/vq_china.py
+++ b/examples/cluster/vq_china.py
@@ -41,7 +41,7 @@
 print "done in %0.3fs." % (time() - t0)
 
 def recreate_image(codebook, labels, w, h):
-    # Recreates the (compressed) image from the code book, labels and dimensions
+    """Recreate the (compressed) image from the code book & labels"""
     d = codebook.shape[1]
     image = np.zeros((w, h, d))
     label_idx = 0

From 15872a66803d6f62ef17dfdf0ab1a6197ade0b43 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 24 Aug 2011 11:49:15 +0200
Subject: [PATCH 50/51] use the scipy public API rather than PIL

---
 scikits/learn/datasets/base.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py
index eac1aea41f9e7..8cc60fd2184da 100644
--- a/scikits/learn/datasets/base.py
+++ b/scikits/learn/datasets/base.py
@@ -378,13 +378,13 @@ def load_sample_images():
     >>> # pl.matshow(dataset.images[0]) # Visualize the first image
     >>> # pl.show()
     """
-    # Try to import Image and imresize from PIL. We do this here to prevent
+    # Try to import imread from scipy. We do this lazily here to prevent
     # this module from depending on PIL.
     try:
         try:
-            from scipy.misc import Image
+            from scipy.misc import imread
         except ImportError:
-            from scipy.misc.pilutil import Image
+            from scipy.misc.pilutil import imread
     except ImportError:
         raise ImportError("The Python Imaging Library (PIL)"
                           "is required to load data from jpeg files")
@@ -394,8 +394,7 @@ def load_sample_images():
                  for filename in os.listdir(module_path)
                  if filename.endswith(".jpg")]
     # Load image data for each image in the source folder.
-    images = [np.asarray(Image.open(filename))
-              for filename in filenames]
+    images = [imread(filename) for filename in filenames]
 
     return Bunch(images=images,
                  filenames=filenames,

From c21d8d053f40b6954e71438fea2af44c9460ebb1 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 24 Aug 2011 12:25:46 +0200
Subject: [PATCH 51/51] update the documentation

---
 doc/datasets/index.rst                        | 25 +++++++++++++++++++
 .../cluster/{vq_china.py => plot_vq_china.py} | 24 ++++++++++--------
 2 files changed, 38 insertions(+), 11 deletions(-)
 rename examples/cluster/{vq_china.py => plot_vq_china.py} (79%)

diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
index 8e2b3531db545..18c66b8975cbc 100644
--- a/doc/datasets/index.rst
+++ b/doc/datasets/index.rst
@@ -47,6 +47,31 @@ These datasets are useful to quickly illustrate the behavior of the
 various algorithms implemented in the scikit. They are however often too
 small to be representative of real world machine learning tasks.
 
+Sample images
+=============
+
+The scikit also embed a couple of sample JPEG images published under Creative
+Commons license by their authors. Those image can be useful to test algorithms
+and pipeline on 2D data.
+
+.. autosummary::
+
+   load_sample_images
+   load_sample_image
+
+.. note::
+
+  The default coding of images is based on the ``uint8`` dtype to
+  spare memory.  Often machine learning algorithms work best if the
+  input is converted to a floating point representation first.  Also,
+  if you plan to use ``pylab.imshow`` don't forget to scale to the range
+  0 - 1 as done in the following example.
+
+.. topic:: Examples:
+
+    * :ref:`example_cluster_plot_vq_china.py`
+
+
 Sample generators
 =================
 
diff --git a/examples/cluster/vq_china.py b/examples/cluster/plot_vq_china.py
similarity index 79%
rename from examples/cluster/vq_china.py
rename to examples/cluster/plot_vq_china.py
index 1c19e10339da6..8518f1b1e623b 100644
--- a/examples/cluster/vq_china.py
+++ b/examples/cluster/plot_vq_china.py
@@ -1,11 +1,11 @@
 # -*- coding: utf-8 -*-
 """
-=========================================
-Vector Quantization of Lena using k-means
-=========================================
+============================================
+Vector Quantization of a photo using k-means
+============================================
 
-Performs a Vector Quantization of an image, reducing the number of colors
-required to show the image.
+Performs a pixel-wise Vector Quantization of an image, reducing the number of
+colors required to show the image while preserving the overall appearance.
 """
 print __doc__
 import numpy as np
@@ -52,13 +52,15 @@ def recreate_image(codebook, labels, w, h):
     return image
 
 # Display all results, alongside original image
-pl.figure()
-ax = pl.axes([0, 0, 1, 1], frameon=False)
-ax.set_axis_off()
+pl.figure(1)
+pl.clf()
+ax = pl.axes([0, 0, 1, 1])
+pl.axis('off')
 pl.imshow(china)
 
-pl.figure()
-ax = pl.axes([0, 0, 1, 1], frameon=False)
-ax.set_axis_off()
+pl.figure(2)
+pl.clf()
+ax = pl.axes([0, 0, 1, 1])
+pl.axis('off')
 pl.imshow(recreate_image(kmeans.cluster_centers_, labels, w, h))
 pl.show()