diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py index f731e2041c2b8..a3d725e4bc62b 100644 --- a/benchmarks/bench_plot_omp_lars.py +++ b/benchmarks/bench_plot_omp_lars.py @@ -98,8 +98,8 @@ def compute_bench(samples_range, features_range): if __name__ == '__main__': - samples_range = np.linspace(1000, 3000, 5).astype(np.int) - features_range = np.linspace(1000, 3000, 5).astype(np.int) + samples_range = np.linspace(1000, 5000, 5).astype(np.int) + features_range = np.linspace(1000, 5000, 5).astype(np.int) results = compute_bench(samples_range, features_range) max_time = max(np.max(t) for t in results.itervalues()) @@ -107,7 +107,8 @@ def compute_bench(samples_range, features_range): fig = pl.figure() for i, (label, timings) in enumerate(sorted(results.iteritems())): ax = fig.add_subplot(1, 2, i) - pl.matshow(timings, fignum=False, cmap='OrRd') + vmax = max(1 - timings.min(), -1 + timings.max()) + pl.matshow(timings, fignum=False, vmin=1-vmax, vmax=1+vmax) ax.set_xticklabels([''] + map(str, samples_range)) ax.set_yticklabels([''] + map(str, features_range)) pl.xlabel('n_samples') diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst index 8e2b3531db545..18c66b8975cbc 100644 --- a/doc/datasets/index.rst +++ b/doc/datasets/index.rst @@ -47,6 +47,31 @@ These datasets are useful to quickly illustrate the behavior of the various algorithms implemented in the scikit. They are however often too small to be representative of real world machine learning tasks. +Sample images +============= + +The scikit also embed a couple of sample JPEG images published under Creative +Commons license by their authors. Those image can be useful to test algorithms +and pipeline on 2D data. + +.. autosummary:: + + load_sample_images + load_sample_image + +.. note:: + + The default coding of images is based on the ``uint8`` dtype to + spare memory. Often machine learning algorithms work best if the + input is converted to a floating point representation first. Also, + if you plan to use ``pylab.imshow`` don't forget to scale to the range + 0 - 1 as done in the following example. + +.. topic:: Examples: + + * :ref:`example_cluster_plot_vq_china.py` + + Sample generators ================= diff --git a/doc/developers/index.rst b/doc/developers/index.rst index 23dd7dfff623c..c2fb3caceb7ee 100644 --- a/doc/developers/index.rst +++ b/doc/developers/index.rst @@ -328,6 +328,11 @@ classifier or a regressor. All estimators implement the fit method:: estimator.fit(X, y) +All built-in estimators also have a ``set_params`` method, which sets +data-independent parameters (overriding previous parameter values passed +to ``__init__``). This method is not required for an object to be an +estimator. + Instantiation ^^^^^^^^^^^^^ diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index bfd4c440fa48d..76b856de140af 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -561,6 +561,7 @@ Manifold learning :template: class.rst manifold.LocallyLinearEmbedding + manifold.Isomap .. autosummary:: diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 0479a35820a20..418aaf6771bfa 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -406,7 +406,7 @@ homogeneous but not complete:: Mathematical formulation ------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~ Homogeneity and completeness scores are formally given by: diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index e718a52165d03..3fab5008aa43c 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -466,7 +466,7 @@ By default :math:`\alpha_1 = \alpha_2 = \lambda_1 = \lambda_2 = 1.e^{-6}`, *i.e >>> clf = linear_model.BayesianRidge() >>> clf.fit (X, Y) BayesianRidge(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06, - fit_intercept=True, eps=0.001, alpha_2=1e-06, alpha_1=1e-06, + fit_intercept=True, alpha_2=1e-06, tol=0.001, alpha_1=1e-06, compute_score=False) After being fitted, the model can then be used to predict new values:: diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst index 5949e420000d0..ecf22534a8133 100644 --- a/doc/modules/manifold.rst +++ b/doc/modules/manifold.rst @@ -20,52 +20,344 @@ Manifold learning +.. figure:: ../auto_examples/manifold/images/plot_compare_methods_1.png + :target: ../auto_examples/manifold/plot_compare_methods.html + :align: center + :scale: 60 + Manifold learning is an approach to nonlinear dimensionality reduction. Algorithms for this task are based on the idea that the dimensionality of many data sets is only artificially high. +Introduction +============ + +High-dimensional datasets can be very difficult to visualize. While data +in two or three dimensions can be plotted to show the inherent +structure of the data, equivalent high-dimensional plots are much less +intuitive. To aid visualization of the structure of a dataset, the +dimension must be reduced in some way. + +The simplest way to accomplish this dimensionality reduction is by taking +a random projection of the data. Though this allows some degree of +visualization of the data structure, the randomness of the choice leaves much +to be desired. In a random projection, it is likely that the more +interesting structure within the data will be lost. + + +.. |digits_img| image:: ../auto_examples/manifold/images/plot_lle_digits_1.png + :target: ../auto_examples/manifold/plot_lle_digits.html + :scale: 50 + +.. |projected_img| image:: ../auto_examples/manifold/images/plot_lle_digits_2.png + :target: ../auto_examples/manifold/plot_lle_digits.html + :scale: 50 + +.. centered:: |digits_img| |projected_img| + + +To address this concern, a number of supervised and unsupervised linear +dimensionality reduction frameworks have been designed, such as Principal +Component Analysis (PCA), Independent Component Analysis, Linear +Discriminant Analysis, and others. These algorithms define specific +rubrics to choose an "interesting" linear projection of the data. +These methods can be powerful, but often miss important nonlinear +structure in the data. + + +.. |PCA_img| image:: ../auto_examples/manifold/images/plot_lle_digits_3.png + :target: ../auto_examples/manifold/plot_lle_digits.html + :scale: 50 + +.. |LDA_img| image:: ../auto_examples/manifold/images/plot_lle_digits_4.png + :target: ../auto_examples/manifold/plot_lle_digits.html + :scale: 50 + +.. centered:: |PCA_img| |LDA_img| + +Manifold Learning can be thought of as an attempt to generalize linear +frameworks like PCA to be sensitive to nonlinear structure in data. Though +supervised variants exist, the typical manifold learning problem is +unsupervised: it learns the high-dimensional structure of the data +from the data itself, without the use of predetermined classifications. + + +.. topic:: Examples: + + * See :ref:`example_manifold_plot_lle_digits.py` for an example of + dimensionality reduction on handwritten digits. + + * See :ref:`example_manifold_plot_compare_methods.py` for an example of + dimensionality reduction on a toy "S-curve" dataset. + +The manifold learning implementations available in scikits-learn are +summarized below + +Isomap +====== + +One of the earliest approaches to manifold learning is the Isomap +algorithm, short for Isometric Mapping. Isomap can be viewed as an +extension of Multi-dimensional Scaling (MDS) or Kernel PCA. +Isomap seeks a lower-dimensional embedding which maintains geodesic +distances between all points. Isomap can be performed with the object +:class:`Isomap`. + +.. figure:: ../auto_examples/manifold/images/plot_lle_digits_5.png + :target: ../auto_examples/manifold/plot_lle_digits.html + :align: center + :scale: 50 + +Complexity +---------- +The Isomap algorithm comprises three stages: + +1. **Nearest neighbor search.** Isomap uses + :class:`scikits.learn.neighbors.BallTree` for efficient neighbor search. + The cost is approximately :math:`O[D \log(k) N \log(N)]`, for :math:`k` + nearest neighbors of :math:`N` points in :math:`D` dimensions. + +2. **Shortest-path graph search.** The most efficient known algorithms + for this are *Dijkstra's Algorithm*, which is approximately + :math:`O[N^2(k + \log(N))]`, or the *Floyd-Warshall algorithm*, which + is :math:`O[N^3]`. The algorithm can be selected by the user with + the ``path_method`` keyword of ``Isomap``. If unspecified, the code + attempts to choose the best algorithm for the input data. + +3. **Partial eigenvalue decomposition.** The embedding is encoded in the + eigenvectors corresponding to the :math:`d` largest eigenvalues of the + :math:`N \times N` isomap kernel. For a dense solver, the cost is + approximately :math:`O[d N^2]`. This cost can often be improved using + the ``ARPACK`` solver. The eigensolver can be specified by the user + with the ``path_method`` keyword of ``Isomap``. If unspecified, the + code attempts to choose the best algorithm for the input data. + +The overall complexity of Isomap is +:math:`O[D \log(k) N \log(N)] + O[N^2(k + \log(N))] + O[d N^2]`. + +* :math:`N` : number of training data points +* :math:`D` : input dimension +* :math:`k` : number of nearest neighbors +* :math:`d` : output dimension + +.. topic:: References: + + * `"A global geometric framework for nonlinear dimensionality reduction" + `_ + Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. Science 290 (5500) + Locally Linear Embedding ------------------------- +======================== + +Locally linear embedding (LLE) seeks a lower-dimensional projection of the data +which preserves distances within local neighborhoods. It can be thought +of as a series of local Principal Component Analyses which are globally +compared to find the best nonlinear embedding. Locally linear embedding can be performed with function :func:`locally_linear_embedding` or its object-oriented counterpart :class:`LocallyLinearEmbedding`. -These take as input a set of points in a high-dimensional space and return -those points embedded in a manifold of dimension specified by parameter -``out_dim``. +.. figure:: ../auto_examples/manifold/images/plot_lle_digits_6.png + :target: ../auto_examples/manifold/plot_lle_digits.html + :align: center + :scale: 50 + +Complexity +---------- + +The standard LLE algorithm comprises three stages: + +1. **Nearest Neighbors Search**. See discussion under Isomap above. + +2. **Weight Matrix Construction**. :math:`O[D N k^3]`. + The construction of the LLE weight matrix involves the solution of a + :math:`k \times k` linear equation for each of the :math:`N` local + neighborhoods -.. figure:: ../auto_examples/manifold/images/plot_lle_digits_4.png +3. **Partial Eigenvalue Decomposition**. See discussion under Isomap above. + +The overall complexity of standard LLE is +:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. + +* :math:`N` : number of training data points +* :math:`D` : input dimension +* :math:`k` : number of nearest neighbors +* :math:`d` : output dimension + +.. topic:: References: + + * `"Nonlinear dimensionality reduction by locally linear embedding" + `_ + Roweis, S. & Saul, L. Science 290:2323 (2000) + + +Modified Locally Linear Embedding +================================= + +One well-known issue with LLE is the regularization problem. When the number +of neighbors is greater than the number of input dimensions, the matrix +defining each local neighborhood is rank-deficient. To address this, standard +LLE applies an arbitrary regularization parameter :math:`r`, which is chosen +relative to the trace of the local weight matrix. Though it can be shown +formally that as :math:`r \to 0`, the solution coverges to the desired +embedding, there is no guarantee that the optimal solution will be found +for :math:`r > 0`. This problem manifests itself in embeddings which distort +the underlying geometry of the manifold. + +One method to address the regularization problem is to use multiple weight +vectors in each neighborhood. This is the essence of *modified locally +linear embedding* (MLLE). MLLE can be performed with function +:func:`locally_linear_embedding` or its object-oriented counterpart +:class:`LocallyLinearEmbedding`, with the keyword ``method = 'modified'``. +It requires ``n_neighbors > out_dim``. + +.. figure:: ../auto_examples/manifold/images/plot_lle_digits_7.png :target: ../auto_examples/manifold/plot_lle_digits.html :align: center + :scale: 50 + +Complexity +---------- +The MLLE algorithm comprises three stages: -.. topic:: Examples: +1. **Nearest Neighbors Search**. Same as standard LLE - * See :ref:`example_manifold_plot_lle_digits.py` for an example of - dimensionality reduction on handwritten digits. +2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[N (k-D) k^2]`. The first term is exactly equivalent + to that of standard LLE. The second term has to do with constructing the + weight matrix from multiple weights. In practice, the added cost of + constructing the MLLE weight matrix is relatively small compared to the + cost of steps 1 and 3. + +3. **Partial Eigenvalue Decomposition**. Same as standard LLE + +The overall complexity of MLLE is +:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N (k-D) k^2] + O[d N^2]`. + +* :math:`N` : number of training data points +* :math:`D` : input dimension +* :math:`k` : number of nearest neighbors +* :math:`d` : output dimension + +.. topic:: References: + + * `"MLLE: Modified Locally Linear Embedding Using Multiple Weights" + `_ + Zhang, Z. & Wang, J. - * See :ref:`example_manifold_plot_swissroll.py` for an example of - locally linear embedding on the swiss roll. +Hessian Eigenmapping +==================== +Hessian Eigenmapping (also known as Hessian-based LLE: HLLE) is another method +of solving the regularization problem of LLE. It revolves around a +hessian-based quadratic form at each neighborhood which is used to recover +the locally linear structure. Though other implementations note its poor +scaling with data size, ``scikits-learn`` implements some algorithmic +improvements which make its cost comparable to that of other LLE variants +for small output dimension. HLLE can be performed with function +:func:`locally_linear_embedding` or its object-oriented counterpart +:class:`LocallyLinearEmbedding`, with the keyword ``method = 'hessian'``. +It requires ``n_neighbors > out_dim * (out_dim + 3) / 2``. + +.. figure:: ../auto_examples/manifold/images/plot_lle_digits_8.png + :target: ../auto_examples/manifold/plot_lle_digits.html + :align: center + :scale: 50 + Complexity ---------- -The complete algorithm scales using the `dense` eigensolver scales as -:math:`O(N log(N)) + O(D N K^3) + O(d N^2)`, where N is the number of samples, -D is the input dimension, d the output dimension and K the number of -neighbors. If the `lobcpg` solver is used, the last term can be reduced to -sub-quadratic in N. +The HLLE algorithm comprises three stages: + +1. **Nearest Neighbors Search**. Same as standard LLE + +2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[N d^6]`. The first term reflects a similar + cost to that of standard LLE. The second term comes from a QR + decomposition of the local hessian estimator. + +3. **Partial Eigenvalue Decomposition**. Same as standard LLE + +The overall complexity of standard HLLE is +:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N d^6] + O[d N^2]`. + +* :math:`N` : number of training data points +* :math:`D` : input dimension +* :math:`k` : number of nearest neighbors +* :math:`d` : output dimension + +.. topic:: References: + + * `"Hessian Eigenmaps: Locally linear embedding techniques for + high-dimensional data" `_ + Donoho, D. & Grimes, C. Proc Natl Acad Sci USA. 100:5591 (2003) + + +Local Tangent Space Alignment +============================= + +Though not technically a variant of LLE, Local tangent space alignment (LTSA) +is algorithmically similar enough to LLE that it can be put in this category. +Rather than focusing on preserving neighborhood distances as in LLE, LTSA +seeks to characterize the local geometry at each neighborhood via its +tangent space, and performs a global optimization to align these local +tangent spaces to learn the embedding. LTSA can be performed with function +:func:`locally_linear_embedding` or its object-oriented counterpart +:class:`LocallyLinearEmbedding`, with the keyword ``method = 'ltsa'``. + +.. figure:: ../auto_examples/manifold/images/plot_lle_digits_9.png + :target: ../auto_examples/manifold/plot_lle_digits.html + :align: center + :scale: 50 + +Complexity +---------- + +The LTSA algorithm comprises three stages: + +1. **Nearest Neighbors Search**. Same as standard LLE + +2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[k^2 d]`. The first term reflects a similar + cost to that of standard LLE. + +3. **Partial Eigenvalue Decomposition**. Same as standard LLE + +The overall complexity of standard LTSA is +:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[k^2 d] + O[d N^2]`. + +* :math:`N` : number of training data points +* :math:`D` : input dimension +* :math:`k` : number of nearest neighbors +* :math:`d` : output dimension + +.. topic:: References: + + * `"Principal manifolds and nonlinear dimensionality reduction via + tangent space alignment" + `_ + Zhang, Z. & Zha, H. Journal of Shanghai Univ. 8:406 (2004) Tips on practical use ---------------------- +===================== + +* Make sure the same scale is used over all features. Because manifold + learning methods are based on a nearest-neighbor search, the algorithm + may perform poorly otherwise. See :ref:`Scaler ` + for convenient ways of scaling heterogeneous data. -* Make sure the same scale is used over all features. Being this a - nearest-neighbors method it will behave poorly otherwise. +* The reconstruction error computed by each routine can be used to choose + the optimal output dimension. For a :math:`d`-dimensional manifold embedded + in a :math:`D`-dimensional parameter space, the reconstruction error will + decrease as ``out_dim`` is increased until ``out_dim == d``. -* On certain problems, the `lobcpg` solver might converge slowly. Supply a - generous value for `max_iter` if big oscillations are detected between runs. +* Note that noisy data can "short-circuit" the manifold, in essence acting + as a bridge between parts of the manifold that would otherwise be + well-separated. Manifold learning on noisy and/or incomplete data is + an active area of research. \ No newline at end of file diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst index 8079bf6f21040..443a276acc817 100644 --- a/doc/modules/preprocessing.rst +++ b/doc/modules/preprocessing.rst @@ -10,6 +10,7 @@ The ``scikits.learn.preprocessing`` package provides several common utility functions and transformer classes to change raw feature vectors into a representation that is more suitable for the downstream estimators. +.. _preprocessing_scaler: Standardization or Mean Removal and Variance Scaling ==================================================== diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py index 8ae07f9c9b312..e810c25b2bfd4 100644 --- a/examples/cluster/plot_segmentation_toy.py +++ b/examples/cluster/plot_segmentation_toy.py @@ -68,7 +68,9 @@ # dependant from the gradient the segmentation is close to a voronoi graph.data = np.exp(-graph.data/graph.data.std()) -labels = spectral_clustering(graph, k=4) +# Force the solver to be arpack, since amg is numerically +# unstable on this example +labels = spectral_clustering(graph, k=4, mode='arpack') label_im = -np.ones(mask.shape) label_im[mask] = labels @@ -86,7 +88,7 @@ graph = image.img_to_graph(img, mask=mask) graph.data = np.exp(-graph.data/graph.data.std()) -labels = spectral_clustering(graph, k=2) +labels = spectral_clustering(graph, k=2, mode='arpack') label_im = -np.ones(mask.shape) label_im[mask] = labels diff --git a/examples/cluster/plot_vq_china.py b/examples/cluster/plot_vq_china.py new file mode 100644 index 0000000000000..8518f1b1e623b --- /dev/null +++ b/examples/cluster/plot_vq_china.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +============================================ +Vector Quantization of a photo using k-means +============================================ + +Performs a pixel-wise Vector Quantization of an image, reducing the number of +colors required to show the image while preserving the overall appearance. +""" +print __doc__ +import numpy as np +import pylab as pl +from scikits.learn.cluster import KMeans +from scikits.learn.datasets import load_sample_image +from scikits.learn.utils import shuffle +from time import time + +# Load the Summer Palace photo +china = load_sample_image("china.jpg") + +# Convert to floats instead of the default 8 bits integer coding. Dividing by +# 255 is important so that pl.imshow behaves works well on foat data (need to be +# in the range [0-1] +china = np.array(china, dtype=np.float64) / 255 + +# Load Image and transform to a 2D numpy array. +w, h, d = original_shape = tuple(china.shape) +assert d == 3 +image_array = np.reshape(china, (w * h, d)) + +print "Fitting estimator on a small sub-sample of the data" +t0 = time() +image_array_sample = shuffle(image_array, random_state=0)[:1000] +kmeans = KMeans(k=10, random_state=0).fit(image_array_sample) +print "done in %0.3fs." % (time() - t0) + +# Get labels for all points +print "Predicting labels on the full image" +t0 = time() +labels = kmeans.predict(image_array) +print "done in %0.3fs." % (time() - t0) + +def recreate_image(codebook, labels, w, h): + """Recreate the (compressed) image from the code book & labels""" + d = codebook.shape[1] + image = np.zeros((w, h, d)) + label_idx = 0 + for i in range(w): + for j in range(h): + image[i][j] = codebook[labels[label_idx]] + label_idx += 1 + return image + +# Display all results, alongside original image +pl.figure(1) +pl.clf() +ax = pl.axes([0, 0, 1, 1]) +pl.axis('off') +pl.imshow(china) + +pl.figure(2) +pl.clf() +ax = pl.axes([0, 0, 1, 1]) +pl.axis('off') +pl.imshow(recreate_image(kmeans.cluster_centers_, labels, w, h)) +pl.show() diff --git a/examples/cluster/vq_china.py b/examples/cluster/vq_china.py deleted file mode 100644 index c0b048af1f932..0000000000000 --- a/examples/cluster/vq_china.py +++ /dev/null @@ -1,69 +0,0 @@ -# -*- coding: utf-8 -*- -""" -========================================= -Vector Quantization of Lena using k-means -========================================= - -Performs a Vector Quatization of an image, reducing the -number of colors required to show the image. -""" -print __doc__ -import os -import numpy as np -import pylab as pl -from scikits.learn.cluster import KMeans -from scikits.learn.datasets import load_sample_images - -# Get all sample images and obtain just china.jpg -sample_image_name = "china.jpg" -sample_images = load_sample_images() -index = None -for i, filename in enumerate(sample_images.filenames): - if filename.endswith(sample_image_name): - index = i - break -if index is None: - raise AttributeError("Cannot find sample image: %s" % sample_image_name) -image_data = sample_images.images[index] - -# Load Image and transform to a 2D numpy array. -w, h, d = original_shape = tuple(image_data.shape) -image_array = np.reshape(image_data, (w * h, 3)) - -# Take a sample of the data. -sample_indices = range(len(image_array)) -np.random.shuffle(sample_indices) -sample_indices = sample_indices[:int(len(image_array) * 0.2)] -sample_data = image_array[sample_indices] - -# Perform Vector Quantisation with 256 clusters. -k = 256 -kmeans = KMeans(k=k) -kmeans.fit(sample_data) -# Get labels for all points -labels = kmeans.predict(image_array) -# Save the reduced dataset. Only the centroids and labels need to be saved. -reduced_image = (kmeans.cluster_centers_, labels) - -def recreate_image(centroids, labels, w, h): - # Recreates the (compressed) image from centroids, labels and dimensions - d = len(centroids[0]) - image = np.zeros((w, h, d)) - label_num = 0 - for i in range(w): - for j in range(h): - image[i][j] = centroids[labels[label_num]] - label_num += 1 - print np.histogram(labels) - print set(labels) - return image - -# Display all results, alongside original image -pl.figure() -ax = pl.axes([0,0,1,1], frameon=False) -ax.set_axis_off() -centroids, labels = reduced_image -im = pl.imshow(recreate_image(centroids, labels, w, h)) - -pl.show() - diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py index e7944c63bd56c..d6416f51b259d 100644 --- a/examples/manifold/plot_compare_methods.py +++ b/examples/manifold/plot_compare_methods.py @@ -1,21 +1,10 @@ -r""" +""" ========================================= Comparison of Manifold Learning methods ========================================= An illustration of dimensionality reduction on the S-curve dataset -with various manifold learning methods. The methods are as follows: - -* LLE : Standard Locally Linear Embedding. - :func:`scikits.learn.manifold.locally_linear`, ``method = 'standard'`` -* LTSA : Local Tangent Space Alignment. - :func:`scikits.learn.manifold.locally_linear`, ``method = 'ltsa'`` -* Hessian LLE : Hessian Eigenmapping. - :func:`scikits.learn.manifold.locally_linear`, ``method = 'hessian`` -* Modified LLE : Modified Locally Linear Embedding with multiple weights. - :func:`scikits.learn.manifold.locally_linear`, ``method = 'modified'`` -* Isomap : Isometric Mapping. - :func:`scikits.learn.manifold.Isomap` +with various manifold learning methods. For a discussion and comparison of these algorithms, see the :ref:`manifold module page ` @@ -33,13 +22,11 @@ from scikits.learn import manifold, datasets -X, color = datasets.samples_generator.make_s_curve(1000) +n_points = 1000 +X, color = datasets.samples_generator.make_s_curve(n_points) n_neighbors = 10 out_dim = 2 -methods = ['standard', 'ltsa', 'hessian', 'modified'] -labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE'] - fig = pl.figure(figsize=(12, 8)) pl.suptitle("Manifold Learning with %i points, %i neighbors" % (1000, n_neighbors), fontsize=14) @@ -53,7 +40,8 @@ ax = fig.add_subplot(231, projection='3d') pl.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral) -ax.set_title('Original Data') +methods = ['standard', 'ltsa', 'hessian', 'modified'] +labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE'] for i, method in enumerate(methods): t0 = time() @@ -79,5 +67,6 @@ pl.title("Isomap (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) +pl.axis('tight') pl.show() diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py index 760bb9b534079..03148070d2d77 100644 --- a/examples/manifold/plot_lle_digits.py +++ b/examples/manifold/plot_lle_digits.py @@ -26,6 +26,7 @@ n_samples, n_features = X.shape n_neighbors = 30 + #---------------------------------------------------------------------- # Scale and visualize the embedding vectors def plot_embedding(X, title=None): @@ -57,6 +58,21 @@ def plot_embedding(X, title=None): pl.title(title) +#---------------------------------------------------------------------- +# Plot images of the digits +N = 20 +img = np.zeros((10 * N, 10 * N)) +for i in range(N): + ix = 10*i + 1 + for j in range(N): + iy = 10 * j + 1 + img[ix:ix + 8, iy:iy + 8] = X[i*N+j].reshape((8,8)) +pl.imshow(img, cmap=pl.cm.binary) +pl.xticks([]) +pl.yticks([]) +pl.title('A selection from the 64-dimensional digits dataset') + + #---------------------------------------------------------------------- # Random 2D projection using a random unitary matrix print "Computing random projection" @@ -89,6 +105,17 @@ def plot_embedding(X, title=None): (time() - t0)) +#---------------------------------------------------------------------- +# Isomap projection of the digits dataset +print "Computing Isomap embedding" +t0 = time() +X_iso = manifold.Isomap(n_neighbors, out_dim=2).fit_transform(X) +print "Done." +plot_embedding(X_iso, + "Isomap projection of the digits (time %.2fs)" % + (time() - t0)) + + #---------------------------------------------------------------------- # Locally linear embedding of the digits dataset print "Computing LLE embedding" @@ -115,19 +142,6 @@ def plot_embedding(X, title=None): (time() - t0)) -#---------------------------------------------------------------------- -# LTSA embedding of the digits dataset -print "Computing LTSA embedding" -clf = manifold.LocallyLinearEmbedding(n_neighbors, out_dim=2, - method='ltsa') -t0 = time() -X_ltsa = clf.fit_transform(X) -print "Done. Reconstruction error: %g" % clf.reconstruction_error_ -plot_embedding(X_ltsa, - "Local Tangent Space Alignment of the digits (time %.2fs)" % - (time() - t0)) - - #---------------------------------------------------------------------- # HLLE embedding of the digits dataset print "Computing Hessian LLE embedding" @@ -142,15 +156,15 @@ def plot_embedding(X, title=None): #---------------------------------------------------------------------- -# Isomap projection of the digits dataset -print "Computing Isomap embedding" +# LTSA embedding of the digits dataset +print "Computing LTSA embedding" +clf = manifold.LocallyLinearEmbedding(n_neighbors, out_dim=2, + method='ltsa') t0 = time() -X_iso = manifold.Isomap(n_neighbors, out_dim=2).fit_transform(X) -print "Done." -plot_embedding(X_iso, - "Isomap projection of the digits (time %.2fs)" % +X_ltsa = clf.fit_transform(X) +print "Done. Reconstruction error: %g" % clf.reconstruction_error_ +plot_embedding(X_ltsa, + "Local Tangent Space Alignment of the digits (time %.2fs)" % (time() - t0)) - - pl.show() diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index 6187afe801218..100cd0593f67c 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -40,7 +40,7 @@ percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100) for percentile in percentiles: - clf._set_params(anova__percentile=percentile) + clf.set_params(anova__percentile=percentile) # Compute cross-validation score using all CPUs this_scores = cross_val.cross_val_score(clf, X, y, n_jobs=1) score_means.append(this_scores.mean()) diff --git a/scikits/learn/base.py b/scikits/learn/base.py index 255b995240c75..944fa82209129 100644 --- a/scikits/learn/base.py +++ b/scikits/learn/base.py @@ -6,8 +6,10 @@ import inspect import numpy as np from scipy import sparse +import warnings from .metrics import r2_score +from .utils import deprecated ############################################################################### @@ -172,13 +174,17 @@ def _get_params(self, deep=True): out[key] = value return out - def _set_params(self, **params): + def set_params(self, **params): """ Set the parameters of the estimator. The method works on simple estimators as well as on nested objects (such as pipelines). The former have parameters of the - form __ so that the its possible to - update each component of the nested object. + form __ so that it's possible to update + each component of a nested object. + + Returns + ------- + self """ if not params: # Simple optimisation to gain speed (inspect is slow) @@ -198,7 +204,7 @@ def _set_params(self, **params): 'sub parameter %s' % (sub_name, self.__class__.__name__, sub_name) ) - sub_object._set_params(**{sub_name: value}) + sub_object.set_params(**{sub_name: value}) else: # simple objects case assert key in valid_params, ('Invalid parameter %s ' @@ -207,6 +213,13 @@ def _set_params(self, **params): setattr(self, key, value) return self + def _set_params(self, **params): + if params != {}: + warnings.warn("Passing estimator parameters to fit is deprecated;" + " use set_params instead", + category=DeprecationWarning) + return self.set_params(**params) + def __repr__(self): class_name = self.__class__.__name__ return '%s(%s)' % ( diff --git a/scikits/learn/cluster/affinity_propagation_.py b/scikits/learn/cluster/affinity_propagation_.py index 6db00c3cc07b1..a96d70d25eee9 100644 --- a/scikits/learn/cluster/affinity_propagation_.py +++ b/scikits/learn/cluster/affinity_propagation_.py @@ -210,7 +210,7 @@ def __init__(self, damping=.5, max_iter=200, convit=30, copy=True): self.convit = convit self.copy = copy - def fit(self, S, p=None, **params): + def fit(self, S, p=None): """compute MeanShift Parameters @@ -227,7 +227,6 @@ def fit(self, S, p=None, **params): algorithm, for memory efficiency """ - self._set_params(**params) self.cluster_centers_indices_, self.labels_ = affinity_propagation(S, p, max_iter=self.max_iter, convit=self.convit, damping=self.damping, diff --git a/scikits/learn/cluster/dbscan_.py b/scikits/learn/cluster/dbscan_.py index 7ba2c0cc680a3..00b68b2d63f2b 100644 --- a/scikits/learn/cluster/dbscan_.py +++ b/scikits/learn/cluster/dbscan_.py @@ -188,7 +188,7 @@ def fit(self, X, **params): Overwrite keywords from __init__. """ - self._set_params(**params) + self.set_params(**params) self.core_sample_indices_, self.labels_ = dbscan(X, **self._get_params()) self.components_ = X[self.core_sample_indices_].copy() diff --git a/scikits/learn/cluster/hierarchical.py b/scikits/learn/cluster/hierarchical.py index 5451fd49aff1f..ba98de6d262e8 100644 --- a/scikits/learn/cluster/hierarchical.py +++ b/scikits/learn/cluster/hierarchical.py @@ -292,7 +292,7 @@ def __init__(self, n_clusters=2, memory=Memory(cachedir=None, verbose=0), self.n_components = n_components self.connectivity = connectivity - def fit(self, X, **params): + def fit(self, X): """Fit the hierarchical clustering on the data Parameters @@ -304,8 +304,6 @@ def fit(self, X, **params): ------- self """ - self._set_params(**params) - memory = self.memory if isinstance(memory, basestring): memory = Memory(cachedir=memory) diff --git a/scikits/learn/cluster/k_means_.py b/scikits/learn/cluster/k_means_.py index f5056dbcfbcee..d709657caba5a 100644 --- a/scikits/learn/cluster/k_means_.py +++ b/scikits/learn/cluster/k_means_.py @@ -16,10 +16,11 @@ from ..base import BaseEstimator from ..metrics.pairwise import euclidean_distances -from ..utils import check_random_state from ..utils import check_arrays -from ..utils import shuffle +from ..utils import check_random_state from ..utils import gen_even_slices +from ..utils import shuffle +from ..utils import warn_if_not_float from . import _k_means @@ -229,7 +230,7 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0, for i in range(max_iter): centers_old = centers.copy() labels, inertia = _e_step(X, centers, - x_squared_norms=x_squared_norms) + x_squared_norms=x_squared_norms) centers = _m_step(X, labels, k) if verbose: @@ -483,25 +484,22 @@ def __init__(self, k=8, init='k-means++', n_init=10, max_iter=300, self.random_state = random_state self.copy_x = copy_x - def _check_data(self, X, **params): - """ - Set parameters and check the sample given is larger than k - """ + def _check_data(self, X): + """Verify that the number of samples given is larger than k""" if sp.issparse(X): raise ValueError("K-Means does not support sparse input matrices.") X = np.asanyarray(X) if X.shape[0] < self.k: - raise ValueError("n_samples=%d should be larger than k=%d" % ( + raise ValueError("n_samples=%d should be >= k=%d" % ( X.shape[0], self.k)) - self._set_params(**params) return X - def fit(self, X, **params): - """Fit k-means""" - + def fit(self, X, y=None): + """Compute k-means""" self.random_state = check_random_state(self.random_state) - X = self._check_data(X, **params) + X = self._check_data(X) + warn_if_not_float(X, self) self.cluster_centers_, self.labels_, self.inertia_ = k_means( X, k=self.k, init=self.init, n_init=self.n_init, @@ -509,7 +507,7 @@ def fit(self, X, **params): tol=self.tol, random_state=self.random_state, copy_x=self.copy_x) return self - def transform(self, X): + def transform(self, X, y=None): """ Transform the data to a cluster-distance space In the new space, each dimension is the distance to the cluster centers. @@ -537,7 +535,7 @@ def transform(self, X): return euclidean_distances(X, self.cluster_centers_) def predict(self, X): - """ Predict the closest cluster each sample belongs to + """Predict the closest cluster each sample in X belongs to. In the vector quantization literature, `cluster_centers_` is called the code book and each value returned by `predict` is the index of @@ -550,17 +548,17 @@ def predict(self, X): Returns ------- - Y : array, shape [n_samples, ] + Y : array, shape [n_samples,] Index of the closest center each sample belongs to. """ if not hasattr(self, "cluster_centers_"): raise AttributeError("Model has not been trained yet. " "Fit k-means before using predict.") - cluster_shape = self.cluster_centers_.shape[1] - if not X.shape[1] == cluster_shape: + expected_n_features = self.cluster_centers_.shape[1] + if not X.shape[1] == expected_n_features: raise ValueError("Incorrect number of features. " - "Got %d features, expected %d" % (X.shape[1], - cluster_shape)) + "Got %d features, expected %d" % ( + X.shape[1], expected_n_features)) return _e_step(X, self.cluster_centers_)[0] @@ -633,8 +631,7 @@ def _mini_batch_step_sparse(X, batch_slice, centers, counts, x_squared_norms): class MiniBatchKMeans(KMeans): - """ - Mini-Batch K-Means clustering + """Mini-Batch K-Means clustering Parameters ---------- @@ -702,18 +699,17 @@ def __init__(self, k=8, init='random', max_iter=100, self.cluster_centers_ = None self.chunk_size = chunk_size - def fit(self, X, y=None, **params): - """ - Calculates the centroids on a batch X + def fit(self, X, y=None): + """Compute the centroids on X by chunking it into mini-batches. Parameters ---------- X: array-like, shape = [n_samples, n_features] Coordinates of the data points to cluster """ - self._set_params(**params) self.random_state = check_random_state(self.random_state) X = check_arrays(X, sparse_format="csr", copy=False)[0] + warn_if_not_float(X, self) n_samples, n_features = X.shape if n_samples < self.k: raise ValueError("Number of samples smaller than number "\ @@ -764,7 +760,7 @@ def fit(self, X, y=None, **params): return self - def partial_fit(self, X, y=None, **params): + def partial_fit(self, X, y=None): """Update k means estimate on a single mini-batch X. Parameters diff --git a/scikits/learn/cluster/mean_shift_.py b/scikits/learn/cluster/mean_shift_.py index 9f554487a795c..7e2edbd177d22 100644 --- a/scikits/learn/cluster/mean_shift_.py +++ b/scikits/learn/cluster/mean_shift_.py @@ -191,7 +191,7 @@ class MeanShift(BaseEstimator): def __init__(self, bandwidth=None): self.bandwidth = bandwidth - def fit(self, X, **params): + def fit(self, X): """ Compute MeanShift Parameters @@ -200,6 +200,5 @@ def fit(self, X, **params): Input points """ - self._set_params(**params) self.cluster_centers_, self.labels_ = mean_shift(X, self.bandwidth) return self diff --git a/scikits/learn/cluster/spectral.py b/scikits/learn/cluster/spectral.py index 82a4a01ca992b..20b75e8da6753 100644 --- a/scikits/learn/cluster/spectral.py +++ b/scikits/learn/cluster/spectral.py @@ -252,7 +252,7 @@ def __init__(self, k=8, mode=None, random_state=None): self.mode = mode self.random_state = random_state - def fit(self, X, **params): + def fit(self, X): """Compute the spectral clustering from the affinity matrix Parameters @@ -281,7 +281,6 @@ def fit(self, X, **params): speeds up computation. """ self.random_state = check_random_state(self.random_state) - self._set_params(**params) self.labels_ = spectral_clustering(X, k=self.k, mode=self.mode, random_state=self.random_state) return self diff --git a/scikits/learn/cluster/tests/test_dbscan.py b/scikits/learn/cluster/tests/test_dbscan.py index 8b05d98f3cd9b..7c36e710e7cd2 100644 --- a/scikits/learn/cluster/tests/test_dbscan.py +++ b/scikits/learn/cluster/tests/test_dbscan.py @@ -55,9 +55,8 @@ def test_dbscan_feature(): n_clusters_1 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_1, n_clusters) - db = DBSCAN() - labels = db.fit(X, metric=metric, - eps=eps, min_samples=min_samples).labels_ + db = DBSCAN(metric=metric) + labels = db.fit(X, eps=eps, min_samples=min_samples).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_2, n_clusters) @@ -80,9 +79,8 @@ def test_dbscan_callable(): n_clusters_1 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_1, n_clusters) - db = DBSCAN() - labels = db.fit(X, metric=metric, - eps=eps, min_samples=min_samples).labels_ + db = DBSCAN(metric=metric) + labels = db.fit(X, eps=eps, min_samples=min_samples).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_2, n_clusters) diff --git a/scikits/learn/cluster/tests/test_k_means.py b/scikits/learn/cluster/tests/test_k_means.py index e17c739340080..e7ba15410017e 100644 --- a/scikits/learn/cluster/tests/test_k_means.py +++ b/scikits/learn/cluster/tests/test_k_means.py @@ -14,12 +14,12 @@ n_clusters = 3 X = generate_clustered_data(n_clusters=n_clusters, std=.1) -S = sp.csr_matrix(X) +X_csr = sp.csr_matrix(X) def test_k_means_pp_init(): np.random.seed(1) - k_means = KMeans(init="k-means++").fit(X, k=n_clusters) + k_means = KMeans(init="k-means++", k=n_clusters).fit(X) centers = k_means.cluster_centers_ assert_equal(centers.shape, (n_clusters, 2)) @@ -31,7 +31,7 @@ def test_k_means_pp_init(): assert_equal(np.unique(labels[40:]).size, 1) # check error on dataset being too small - assert_raises(ValueError, k_means.fit, [[0., 1.]], k=n_clusters) + assert_raises(ValueError, k_means.fit, [[0., 1.]]) def test_mini_batch_k_means_pp_init(): @@ -49,20 +49,20 @@ def test_mini_batch_k_means_pp_init(): def test_sparse_mini_batch_k_means_pp_init(): np.random.seed(1) - sample = S[0:S.shape[0] / 2] + sample = X_csr[0:X_csr.shape[0] / 2] km = MiniBatchKMeans(init="random").partial_fit(sample) # Let's recalculate the inertia on the whole dataset - km.partial_fit(S) + km.partial_fit(X_csr) inertia = km.inertia_ - km.partial_fit(S[S.shape[0] / 2:]) + km.partial_fit(X_csr[X_csr.shape[0] / 2:]) # And again - km.partial_fit(S) + km.partial_fit(X_csr) assert(km.inertia_ < inertia) def test_k_means_pp_random_init(): np.random.seed(1) - k_means = KMeans(init="random").fit(X, k=n_clusters) + k_means = KMeans(init="random", k=n_clusters).fit(X) centers = k_means.cluster_centers_ assert_equal(centers.shape, (n_clusters, 2)) @@ -74,13 +74,13 @@ def test_k_means_pp_random_init(): assert_equal(np.unique(labels[40:]).size, 1) # check error on dataset being too small - assert_raises(ValueError, k_means.fit, [[0., 1.]], k=n_clusters) + assert_raises(ValueError, k_means.fit, [[0., 1.]]) def test_k_means_fixed_array_init(): np.random.seed(1) init_array = np.vstack([X[5], X[25], X[45]]) - k_means = KMeans(init=init_array, n_init=1).fit(X, k=n_clusters) + k_means = KMeans(init=init_array, n_init=1, k=n_clusters).fit(X) centers = k_means.cluster_centers_ assert_equal(centers.shape, (n_clusters, 2)) @@ -92,20 +92,20 @@ def test_k_means_fixed_array_init(): assert_equal(np.unique(labels[40:]).size, 1) # check error on dataset being too small - assert_raises(ValueError, k_means.fit, [[0., 1.]], k=n_clusters) + assert_raises(ValueError, k_means.fit, [[0., 1.]]) def test_k_means_invalid_init(): np.random.seed(1) - k_means = KMeans(init="invalid", n_init=1) - assert_raises(ValueError, k_means.fit, X, k=n_clusters) + k_means = KMeans(init="invalid", n_init=1, k=n_clusters) + assert_raises(ValueError, k_means.fit, X) def test_k_means_copyx(): """Check if copy_x=False returns nearly equal X after de-centering.""" np.random.seed(1) my_X = X.copy() - k_means = KMeans(copy_x=False).fit(my_X, k=n_clusters) + k_means = KMeans(copy_x=False, k=n_clusters).fit(my_X) centers = k_means.cluster_centers_ assert_equal(centers.shape, (n_clusters, 2)) @@ -124,7 +124,7 @@ def test_k_means_singleton(): np.random.seed(1) my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 0.9]]) array_init = np.array([[1.0, 1.0], [5.0, 5.0]]) - k_means = KMeans(init=array_init).fit(my_X, k=2) + k_means = KMeans(init=array_init, k=2).fit(my_X) # must be singleton clustering assert_equal(np.unique(k_means.labels_).size, 1) @@ -133,7 +133,7 @@ def test_k_means_singleton(): def test_mbk_means_fixed_array_init(): np.random.seed(1) init_array = np.vstack([X[5], X[25], X[45]]) - mbk_means = MiniBatchKMeans(init=init_array).fit(X) + mbk_means = MiniBatchKMeans(init=init_array, k=n_clusters).fit(X) centers = mbk_means.cluster_centers_ assert_equal(centers.shape, (n_clusters, 2)) @@ -141,13 +141,13 @@ def test_mbk_means_fixed_array_init(): labels = mbk_means.labels_ assert_equal(np.unique(labels).size, 3) - assert_raises(ValueError, mbk_means.fit, [[0., 1.]], k=n_clusters) + assert_raises(ValueError, mbk_means.fit, [[0., 1.]]) def test_sparse_mbk_means_fixed_array_init(): np.random.seed(1) init_array = np.vstack([X[5], X[25], X[45]]) - mbk_means = MiniBatchKMeans(init=init_array).fit(S) + mbk_means = MiniBatchKMeans(init=init_array).fit(X_csr) centers = mbk_means.cluster_centers_ assert_equal(centers.shape, (n_clusters, 2)) @@ -155,13 +155,13 @@ def test_sparse_mbk_means_fixed_array_init(): labels = mbk_means.labels_ assert_equal(np.unique(labels).size, 3) - assert_raises(ValueError, mbk_means.fit, [[0., 1.]], k=n_clusters) + assert_raises(ValueError, mbk_means.fit, [[0., 1.]]) def test_sparse_mbk_means_pp_init(): np.random.seed(1) - mbk_means = MiniBatchKMeans(init="k-means++") - assert_raises(ValueError, mbk_means.fit, S, k=n_clusters) + mbk_means = MiniBatchKMeans(init="k-means++", k=n_clusters) + assert_raises(ValueError, mbk_means.fit, X_csr) def test_sparse_mbk_means_callable_init(): @@ -169,7 +169,7 @@ def test_sparse_mbk_means_callable_init(): def test_init(Xbar, k, random_state): return np.vstack([X[5], X[25], X[45]]) - mbk_means = MiniBatchKMeans(init=test_init).fit(S) + mbk_means = MiniBatchKMeans(init=test_init).fit(X_csr) centers = mbk_means.cluster_centers_ assert_equal(centers.shape, (n_clusters, 2)) @@ -177,17 +177,18 @@ def test_init(Xbar, k, random_state): labels = mbk_means.labels_ assert_equal(np.unique(labels).size, 3) - assert_raises(ValueError, mbk_means.fit, [[0., 1.]], k=n_clusters) + assert_raises(ValueError, mbk_means.fit, [[0., 1.]]) def test_k_means_fixed_array_init_fit(): np.random.seed(1) init_array = np.vstack([X[5], X[25], X[45]]) - k_means = KMeans(init=init_array, n_init=1).fit(X, k=n_clusters) + k_means = KMeans(init=init_array, n_init=1, k=n_clusters).fit(X) another_init_array = np.vstack([X[1], X[30], X[50]]) - other_k_means = KMeans(init=init_array, n_init=1) - other_k_means.fit(X, init=another_init_array, k=n_clusters) + other_k_means = KMeans(init=init_array, n_init=1, k=n_clusters) + other_k_means.set_params(init=another_init_array) + other_k_means.fit(X) assert_true(not np.allclose(k_means.init, other_k_means.init), "init attributes must be different") @@ -195,11 +196,11 @@ def test_k_means_fixed_array_init_fit(): def test_mbkm_fixed_array_init_fit(): np.random.seed(1) init_array = np.vstack([X[5], X[25], X[45]]) - k_means = MiniBatchKMeans(init=init_array).fit(X, k=n_clusters) + k_means = MiniBatchKMeans(init=init_array, k=n_clusters).fit(X) another_init_array = np.vstack([X[1], X[30], X[50]]) - other_k_means = MiniBatchKMeans(init=init_array) - other_k_means.fit(X, init=another_init_array, k=n_clusters) + other_k_means = MiniBatchKMeans(init=another_init_array, k=n_clusters) + other_k_means.fit(X) assert_true(not np.allclose(k_means.init, other_k_means.init), "init attributes must be different") @@ -209,12 +210,13 @@ def test_mbk_means(): true_labels = np.zeros((n_samples,), dtype=np.int32) true_labels[20:40] = 1 true_labels[40:] = 2 - chunk_size = n_samples * 2 + chunk_size = n_samples / 10 # make sure init clusters are in different clusters init_array = np.vstack([X[5], X[25], X[45]]) # shuffle original data - X_shuffled, S_shuffled, true_labels = shuffle(X, S, true_labels, random_state=1) + X_shuffled, X_csr_shuffled, true_labels = shuffle(X, X_csr, true_labels, + random_state=1) mbk_means = MiniBatchKMeans(init=init_array, chunk_size=chunk_size, k=n_clusters, random_state=1) @@ -223,16 +225,50 @@ def test_mbk_means(): mbk_means = MiniBatchKMeans(init=init_array, chunk_size=chunk_size, k=n_clusters, random_state=1) - mbk_means.fit(S_shuffled) + mbk_means.fit(X_csr_shuffled) assert_equal(true_labels, mbk_means.labels_) def test_predict(): - k_means = KMeans(k=n_clusters) - k_means.fit(X) + k_means = KMeans(k=n_clusters).fit(X) + + # sanity check: predict centroid labels pred = k_means.predict(k_means.cluster_centers_) assert_array_equal(pred, np.arange(n_clusters)) + # sanity check: re-predict labeling for training set samples + pred = k_means.predict(X) + assert_array_equal(k_means.predict(X), k_means.labels_) + + +def test_predict_minibatch(): + mbk_means = MiniBatchKMeans(k=n_clusters).fit(X) + + # sanity check: predict centroid labels + pred = mbk_means.predict(mbk_means.cluster_centers_) + assert_array_equal(pred, np.arange(n_clusters)) + + # sanity check: re-predict labeling for training set samples + pred = mbk_means.predict(X) + assert_array_equal(mbk_means.predict(X), mbk_means.labels_) + + +def test_predict_minibatch_sparse_input(): + mbk_means = MiniBatchKMeans(k=n_clusters).fit(X_csr) + + # sanity check: predict centroid labels + pred = mbk_means.predict(mbk_means.cluster_centers_) + assert_array_equal(pred, np.arange(n_clusters)) + + # sanity check: re-predict labeling for training set samples + pred = mbk_means.predict(X_csr) + assert_array_equal(mbk_means.predict(X), mbk_means.labels_) + + # check that models trained on sparse input also works for dense input at + # predict time + pred = mbk_means.predict(X) + assert_array_equal(mbk_means.predict(X), mbk_means.labels_) + def test_transform(): k_means = KMeans(k=n_clusters) diff --git a/scikits/learn/cluster/tests/test_spectral.py b/scikits/learn/cluster/tests/test_spectral.py index 3846f53ad293b..a2663160c760d 100644 --- a/scikits/learn/cluster/tests/test_spectral.py +++ b/scikits/learn/cluster/tests/test_spectral.py @@ -21,7 +21,7 @@ def test_spectral_clustering(): ]) for mat in (S, sparse.csr_matrix(S)): - model = SpectralClustering(random_state=0).fit(mat, k=2) + model = SpectralClustering(random_state=0, k=2).fit(mat) labels = model.labels_ if labels[0] == 0: labels = 1 - labels diff --git a/scikits/learn/covariance/empirical_covariance_.py b/scikits/learn/covariance/empirical_covariance_.py index 80860ef56150d..ff488a9ce85fb 100644 --- a/scikits/learn/covariance/empirical_covariance_.py +++ b/scikits/learn/covariance/empirical_covariance_.py @@ -108,7 +108,7 @@ def _set_estimates(self, covariance): else: self.precision_ = None - def fit(self, X, assume_centered=False, **params): + def fit(self, X, assume_centered=False): """ Fits the Maximum Likelihood Estimator covariance model according to the given training data and parameters. @@ -130,7 +130,6 @@ def fit(self, X, assume_centered=False, **params): Returns self. """ - self._set_params(**params) covariance = empirical_covariance(X, assume_centered=assume_centered) self._set_estimates(covariance) diff --git a/scikits/learn/covariance/shrunk_covariance_.py b/scikits/learn/covariance/shrunk_covariance_.py index 60f83ab8e5aa4..bd083663bd5c6 100644 --- a/scikits/learn/covariance/shrunk_covariance_.py +++ b/scikits/learn/covariance/shrunk_covariance_.py @@ -99,7 +99,7 @@ def __init__(self, store_precision=True, shrinkage=0.1): self.store_precision = store_precision self.shrinkage = shrinkage - def fit(self, X, assume_centered=False, **params): + def fit(self, X, assume_centered=False): """ Fits the shrunk covariance model according to the given training data and parameters. @@ -121,7 +121,6 @@ def fit(self, X, assume_centered=False, **params): Returns self. """ - self._set_params(**params) empirical_cov = empirical_covariance(X, assume_centered=assume_centered) covariance = shrunk_covariance(empirical_cov, self.shrinkage) diff --git a/scikits/learn/datasets/__init__.py b/scikits/learn/datasets/__init__.py index 9d06df2a0c9ba..4fec421c4ea9d 100644 --- a/scikits/learn/datasets/__init__.py +++ b/scikits/learn/datasets/__init__.py @@ -7,6 +7,7 @@ from .base import get_data_home from .base import clear_data_home from .base import load_sample_images +from .base import load_sample_image from .mlcomp import load_mlcomp from .lfw import load_lfw_pairs from .lfw import load_lfw_people diff --git a/scikits/learn/datasets/base.py b/scikits/learn/datasets/base.py index 6077374cc0917..8cc60fd2184da 100644 --- a/scikits/learn/datasets/base.py +++ b/scikits/learn/datasets/base.py @@ -10,7 +10,6 @@ import os import csv import shutil -import textwrap from os import environ from os.path import dirname from os.path import join @@ -348,9 +347,8 @@ def load_boston(): feature_names=feature_names, DESCR=fdescr.read()) - def load_sample_images(): - """ Load sample images for image manipulation. + """Load sample images for image manipulation. Return ------ @@ -366,20 +364,27 @@ def load_sample_images(): To load the data and visualize the images:: >>> from scikits.learn.datasets import load_sample_images - >>> images = load_sample_images() + >>> dataset = load_sample_images() + >>> len(dataset.images) + 2 + >>> first_img_data = dataset.images[0] + >>> first_img_data.shape # height, width, channels + (427, 640, 3) + >>> first_img_data.dtype + dtype('uint8') >>> # import pylab as pl >>> # pl.gray() - >>> # pl.matshow(images.images[0]) # Visualize the first image + >>> # pl.matshow(dataset.images[0]) # Visualize the first image >>> # pl.show() """ - # Try to import Image and imresize from PIL. We do this here to prevent + # Try to import imread from scipy. We do this lazily here to prevent # this module from depending on PIL. try: try: - from scipy.misc import Image + from scipy.misc import imread except ImportError: - from scipy.misc.pilutil import Image + from scipy.misc.pilutil import imread except ImportError: raise ImportError("The Python Imaging Library (PIL)" "is required to load data from jpeg files") @@ -389,9 +394,33 @@ def load_sample_images(): for filename in os.listdir(module_path) if filename.endswith(".jpg")] # Load image data for each image in the source folder. - images = [np.asarray(Image.open(filename)) - for filename in filenames] + images = [imread(filename) for filename in filenames] + return Bunch(images=images, filenames=filenames, DESCR=descr) - + +def load_sample_image(image_name): + """Load the numpy array of a single sample image + + >>> china = load_sample_image('china.jpg') + >>> china.dtype + dtype('uint8') + >>> china.shape + (427, 640, 3) + + >>> flower = load_sample_image('flower.jpg') + >>> flower.dtype + dtype('uint8') + >>> flower.shape + (427, 640, 3) + """ + images = load_sample_images() + index = None + for i, filename in enumerate(images.filenames): + if filename.endswith(image_name): + index = i + break + if index is None: + raise AttributeError("Cannot find sample image: %s" % image_name) + return images.images[index] diff --git a/scikits/learn/datasets/samples_generator.py b/scikits/learn/datasets/samples_generator.py index cdd4ea6693085..8539543ebb009 100644 --- a/scikits/learn/datasets/samples_generator.py +++ b/scikits/learn/datasets/samples_generator.py @@ -55,7 +55,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, weights : list of floats or None (default=None) The proportions of samples assigned to each class. If None, then - classes are balanced. + classes are balanced. Note that if `len(weights) == n_classes - 1`, + then the last class weight is automatically inferred. flip_y : float, optional (default=0.01) The fraction of samples whose class are randomly exchanged. @@ -306,7 +307,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, bias=0.0, n_features=n_features, effective_rank=effective_rank, tail_strength=tail_strength, - seed=generator) + random_state=generator) # Generate a ground truth model with only n_informative features being non # zeros (the other features are not correlated to y and should be ignored @@ -352,7 +353,7 @@ def make_blobs(n_samples=100, n_features=2, centers=3, cluster_std=1.0, n_features : int, optional (default=2) The number of features for each sample. - centers : int or array of shape [n_centers, n_features], optinal (default=3) + centers : int or array of shape [n_centers, n_features], optional (default=3) The number of centers to generate, or the fixed center locations. cluster_std: float or sequence of floats, optional (default=1.0) diff --git a/scikits/learn/datasets/tests/test_samples_generator.py b/scikits/learn/datasets/tests/test_samples_generator.py index a083a17b2fd7c..50e5202ff0fbd 100644 --- a/scikits/learn/datasets/tests/test_samples_generator.py +++ b/scikits/learn/datasets/tests/test_samples_generator.py @@ -1,5 +1,5 @@ import numpy as np -from numpy.testing import assert_equal, assert_almost_equal, \ +from numpy.testing import assert_equal, assert_approx_equal, \ assert_array_almost_equal, assert_array_less from .. import make_classification @@ -18,8 +18,10 @@ def test_make_classification(): X, y = make_classification(n_samples=100, n_features=20, n_informative=5, - n_classes=3, n_clusters_per_class=1, - weights=[0.1, 0.25, 0.65], random_state=0) + n_redundant=1, n_repeated=1, n_classes=3, + n_clusters_per_class=1, hypercube=False, + shift=None, scale=None, weights=[0.1, 0.25], + random_state=0) assert_equal(X.shape, (100, 20), "X shape mismatch") assert_equal(y.shape, (100,), "y shape mismatch") @@ -30,20 +32,25 @@ def test_make_classification(): def test_make_regression(): - X, y, c = make_regression(n_samples=50, n_features=10, n_informative=3, - coef=True, bias=0.0, random_state=0) + X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3, + effective_rank=5, coef=True, bias=0.0, + noise=1.0, random_state=0) - assert_equal(X.shape, (50, 10), "X shape mismatch") - assert_equal(y.shape, (50,), "y shape mismatch") + assert_equal(X.shape, (100, 10), "X shape mismatch") + assert_equal(y.shape, (100,), "y shape mismatch") assert_equal(c.shape, (10,), "coef shape mismatch") assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features") - assert_array_almost_equal(y, np.dot(X, c)) + + # Test that y ~= np.dot(X, c) + bias + N(0, 1.0) + assert_approx_equal(np.std(y - np.dot(X, c)), 1.0, significant=2) def test_make_blobs(): - X, y = make_blobs(n_samples=50, n_features=5, centers=3, random_state=0) + X, y = make_blobs(n_samples=50, n_features=2, + centers=[[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]], + random_state=0) - assert_equal(X.shape, (50, 5), "X shape mismatch") + assert_equal(X.shape, (50, 2), "X shape mismatch") assert_equal(y.shape, (50,), "y shape mismatch") assert_equal(np.unique(y).shape, (3,), "Unexpected number of blobs") diff --git a/scikits/learn/decomposition/fastica_.py b/scikits/learn/decomposition/fastica_.py index 2c13cd33dbc92..cb8746cca14bb 100644 --- a/scikits/learn/decomposition/fastica_.py +++ b/scikits/learn/decomposition/fastica_.py @@ -356,8 +356,7 @@ def __init__(self, n_components=None, algorithm='parallel', whiten=True, self.tol = tol self.w_init = w_init - def fit(self, X, **params): - self._set_params(**params) + def fit(self, X): whitening_, unmixing_, sources_ = fastica(X, self.n_components, self.algorithm, self.whiten, self.fun, self.fun_prime, self.fun_args, self.max_iter, diff --git a/scikits/learn/decomposition/kernel_pca.py b/scikits/learn/decomposition/kernel_pca.py index 3e4de77191bcc..de5f8ba7f7274 100644 --- a/scikits/learn/decomposition/kernel_pca.py +++ b/scikits/learn/decomposition/kernel_pca.py @@ -174,7 +174,7 @@ def _fit_inverse_transform(self, X_transformed, X): self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True) self.X_transformed_fit_ = X_transformed - def fit(self, X, y=None, **params): + def fit(self, X, y=None): """Fit the model from data in X. Parameters @@ -188,7 +188,6 @@ def fit(self, X, y=None, **params): self : object Returns the instance itself. """ - self._set_params(**params) self._fit_transform(X) if self.fit_inverse_transform: diff --git a/scikits/learn/decomposition/nmf.py b/scikits/learn/decomposition/nmf.py index 7ab69774ef69c..3db28b394263b 100644 --- a/scikits/learn/decomposition/nmf.py +++ b/scikits/learn/decomposition/nmf.py @@ -344,7 +344,7 @@ def __init__(self, n_components=None, init="nndsvdar", sparseness=None, self.max_iter = max_iter self.nls_max_iter = nls_max_iter - def fit_transform(self, X, y=None, **params): + def fit_transform(self, X, y=None): """Learn a NMF model for the data X and returns the transformed data. This is more efficient than calling fit followed by transform. @@ -360,7 +360,6 @@ def fit_transform(self, X, y=None, **params): data: array, [n_samples, n_components] Transformed data """ - self._set_params(**params) X = np.atleast_2d(X) if (X < 0).any(): raise ValueError("Negative data passed to NMF.fit.") diff --git a/scikits/learn/decomposition/pca.py b/scikits/learn/decomposition/pca.py index 81029e5e9c0ca..bd112fcde07d1 100644 --- a/scikits/learn/decomposition/pca.py +++ b/scikits/learn/decomposition/pca.py @@ -215,8 +215,7 @@ def fit_transform(self, X, y=None, **params): return U - def _fit(self, X, **params): - self._set_params(**params) + def _fit(self, X): X = np.atleast_2d(X) n_samples, n_features = X.shape if self.copy: @@ -426,7 +425,7 @@ def __init__(self, n_components, copy=True, iterated_power=3, self.whiten = whiten self.mean_ = None - def fit(self, X, y=None, **params): + def fit(self, X, y=None): """Fit the model to the data X. Parameters @@ -440,7 +439,6 @@ def fit(self, X, y=None, **params): self : object Returns the instance itself. """ - self._set_params(**params) if not hasattr(X, 'todense'): X = np.atleast_2d(X) diff --git a/scikits/learn/decomposition/sparse_pca.py b/scikits/learn/decomposition/sparse_pca.py index bad147cff0ad6..f6b02dc1d4ac7 100644 --- a/scikits/learn/decomposition/sparse_pca.py +++ b/scikits/learn/decomposition/sparse_pca.py @@ -77,13 +77,14 @@ def _update_code(dictionary, Y, alpha, code=None, Gram=None, method='lars', finally: np.seterr(**err_mgt) elif method == 'cd': - clf = Lasso(alpha=alpha, fit_intercept=False, precompute=Gram) + clf = Lasso(alpha=alpha, fit_intercept=False, precompute=Gram, + max_iter=1000, tol=tol) for k in range(n_features): # A huge amount of time is spent in this loop. It needs to be # tight. if code is not None: clf.coef_ = code[:, k] # Init with previous value of Vk - clf.fit(dictionary, Y[:, k], max_iter=1000, tol=tol) + clf.fit(dictionary, Y[:, k]) new_code[:, k] = clf.coef_ else: raise NotImplemented("Lasso method %s is not implemented." % method) @@ -601,7 +602,7 @@ def __init__(self, n_components, alpha=1, ridge_alpha=0.01, max_iter=1000, self.verbose = verbose self.random_state = random_state - def fit(self, X, y=None, **params): + def fit(self, X, y=None): """Fit the model from data in X. Parameters @@ -615,7 +616,6 @@ def fit(self, X, y=None, **params): self : object Returns the instance itself. """ - self._set_params(**params) self.random_state = check_random_state(self.random_state) X = np.asanyarray(X) code_init = self.V_init.T if self.V_init is not None else None @@ -726,7 +726,7 @@ def __init__(self, n_components, alpha=1, ridge_alpha=0.01, n_iter=100, self.method = method self.random_state = random_state - def fit(self, X, y=None, **params): + def fit(self, X, y=None): """Fit the model from data in X. Parameters @@ -740,7 +740,6 @@ def fit(self, X, y=None, **params): self : object Returns the instance itself. """ - self._set_params(**params) self.random_state = check_random_state(self.random_state) X = np.asanyarray(X) Vt, _ = dict_learning_online(X.T, self.n_components, alpha=self.alpha, diff --git a/scikits/learn/feature_extraction/image.py b/scikits/learn/feature_extraction/image.py index 56685530d9866..0a6525e116241 100644 --- a/scikits/learn/feature_extraction/image.py +++ b/scikits/learn/feature_extraction/image.py @@ -88,9 +88,6 @@ def _to_graph(n_x, n_y, n_z, mask=None, img=None, else: dtype = img.dtype - if dtype == np.bool: - dtype = np.int - if img is not None: img = np.atleast_3d(img) weights = _compute_gradient_3d(edges, img) @@ -247,7 +244,7 @@ def extract_patches_2d(image, patch_size, max_patches=None, random_state=None): if isinstance(max_patches, int) and max_patches < all_patches: n_patches = max_patches elif isinstance(max_patches, float) and 0 < max_patches < 1: - n_patches = max_patches * n_patches + n_patches = int(max_patches * all_patches) else: raise ValueError("Invalid value for max_patches: %r" % max_patches) diff --git a/scikits/learn/feature_extraction/tests/test_image.py b/scikits/learn/feature_extraction/tests/test_image.py index 5650b0b3d2f64..d5204ecdd9494 100644 --- a/scikits/learn/feature_extraction/tests/test_image.py +++ b/scikits/learn/feature_extraction/tests/test_image.py @@ -7,6 +7,7 @@ from scipy import ndimage from nose.tools import assert_equal +from numpy.testing import assert_raises from ..image import img_to_graph, grid_to_graph from ..image import extract_patches_2d, reconstruct_from_patches_2d, \ @@ -37,12 +38,21 @@ def test_grid_to_graph(): mask[-roi_size:, -roi_size:] = True mask = mask.reshape(size ** 2) A = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray) + assert(cs_graph_components(A)[0] == 2) # Checking that the function works whatever the type of mask is mask = np.ones((size, size), dtype=np.int16) A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask) assert(cs_graph_components(A)[0] == 1) + # Checking dtype of the graph + mask = np.ones((size, size)) + A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.bool) + assert A.dtype == np.bool + A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.int) + assert A.dtype == np.int + A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.float) + assert A.dtype == np.float def test_connect_regions(): lena = sp.lena() @@ -133,6 +143,17 @@ def test_extract_patches_max_patches(): patches = extract_patches_2d(lena, (p_h, p_w), max_patches=100) assert_equal(patches.shape, (100, p_h, p_w)) + expected_n_patches = int(0.5 * (i_h - p_h + 1) * (i_w - p_w + 1)) + patches = extract_patches_2d(lena, (p_h, p_w), max_patches=0.5) + assert_equal(patches.shape, (expected_n_patches, p_h, p_w)) + + assert_raises(ValueError, extract_patches_2d, lena, + (p_h, p_w), + max_patches=2.0) + assert_raises(ValueError, extract_patches_2d, lena, + (p_h, p_w), + max_patches=-1.0) + def test_reconstruct_patches_perfect(): lena = downsampled_lena @@ -152,6 +173,12 @@ def test_reconstruct_patches_perfect_color(): np.testing.assert_array_equal(lena, lena_reconstructed) +def test_patch_extractor_fit(): + lenas = lena_collection + extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0) + assert extr == extr.fit(lenas) + + def test_patch_extractor_max_patches(): lenas = lena_collection extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0) diff --git a/scikits/learn/feature_extraction/tests/test_text.py b/scikits/learn/feature_extraction/tests/test_text.py index de8e16de08903..429c54b283f0e 100644 --- a/scikits/learn/feature_extraction/tests/test_text.py +++ b/scikits/learn/feature_extraction/tests/test_text.py @@ -16,8 +16,10 @@ assert_false, assert_not_equal from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_equal +from numpy.testing import assert_raises import pickle +from StringIO import StringIO JUNK_FOOD_DOCS = ( "the pizza pizza beer copyright", @@ -100,6 +102,11 @@ def test_word_analyzer_unigrams(): u'yesterday'] assert_equal(wa.analyze(text), expected) + text = StringIO("This is a test with a file-like object!") + expected = [u'this', u'is', u'test', u'with', u'file', u'like', + u'object'] + assert_equal(wa.analyze(text), expected) + def test_word_analyzer_unigrams_and_bigrams(): wa = WordNGramAnalyzer(min_n=1, max_n=2, stop_words=None) @@ -127,6 +134,10 @@ def test_char_ngram_analyzer(): expected = [u' yeste', u'yester', u'esterd', u'sterda', u'terday'] assert_equal(cnga.analyze(text)[-5:], expected) + text = StringIO("This is a test with a file-like object!") + expected = [u'thi', u'his', u'is ', u's i', u' is'] + assert_equal(cnga.analyze(text)[:5], expected) + def test_countvectorizer_custom_vocabulary(): what_we_like = ["pizza", "beer"] @@ -224,6 +235,10 @@ def test_vectorizer(): tfidf_test2 = toarray(tv.transform(test_data)) assert_array_almost_equal(tfidf_test, tfidf_test2) + # test empty vocabulary + v3 = CountVectorizer(vocabulary=None) + assert_raises(ValueError, v3.transform, train_data) + def test_vectorizer_max_features(): vec_factories = ( diff --git a/scikits/learn/feature_extraction/text.py b/scikits/learn/feature_extraction/text.py index 5872e446680f6..1f0d8fd3c0e24 100644 --- a/scikits/learn/feature_extraction/text.py +++ b/scikits/learn/feature_extraction/text.py @@ -148,9 +148,7 @@ def analyze(self, text_document): original_tokens = tokens tokens = [] n_original_tokens = len(original_tokens) - for n in xrange(self.min_n, self.max_n + 1): - if n_original_tokens < n: - continue + for n in xrange(self.min_n, min(self.max_n + 1, n_original_tokens + 1)): for i in xrange(n_original_tokens - n + 1): tokens.append(u" ".join(original_tokens[i: i + n])) @@ -196,9 +194,7 @@ def analyze(self, text_document): text_len = len(text_document) ngrams = [] - for n in xrange(self.min_n, self.max_n + 1): - if text_len < n: - continue + for n in xrange(self.min_n, min(self.max_n + 1, text_len + 1)): for i in xrange(text_len - n + 1): ngrams.append(text_document[i: i + n]) return ngrams diff --git a/scikits/learn/feature_selection/univariate_selection.py b/scikits/learn/feature_selection/univariate_selection.py index 534e15398a85c..3350795ea2f90 100644 --- a/scikits/learn/feature_selection/univariate_selection.py +++ b/scikits/learn/feature_selection/univariate_selection.py @@ -228,11 +228,10 @@ def __init__(self, score_func): "was passed." % (score_func, type(score_func))) self.score_func = score_func - def fit(self, X, y, **params): + def fit(self, X, y): """ Evaluate the function """ - self._set_params(**params) _scores = self.score_func(X, y) self._scores = _scores[0] self._pvalues = _scores[1] @@ -245,18 +244,16 @@ def get_support(self, indices=False): mask = self._get_support_mask() return mask if not indices else np.where(mask)[0] - def transform(self, X, **params): + def transform(self, X): """ Transform a new matrix using the selected features """ - self._set_params(**params) return safe_asanyarray(X)[:, self.get_support(indices=issparse(X))] - def inverse_transform(self, X, **params): + def inverse_transform(self, X): """ Transform a new matrix using the selected features """ - self._set_params(**params) support_ = self.get_support() if X.ndim == 1: X = X[None, :] @@ -446,9 +443,9 @@ def _get_support_mask(self): selector = self._selection_modes[self.mode](lambda x: x) selector._pvalues = self._pvalues selector._scores = self._scores - # Now make some acrobaties to set the right named parameter in + # Now perform some acrobatics to set the right named parameter in # the selector possible_params = selector._get_param_names() possible_params.remove('score_func') - selector._set_params(**{possible_params[0]: self.param}) + selector.set_params(**{possible_params[0]: self.param}) return selector._get_support_mask() diff --git a/scikits/learn/grid_search.py b/scikits/learn/grid_search.py index 9056996b762e4..9c45af853b103 100644 --- a/scikits/learn/grid_search.py +++ b/scikits/learn/grid_search.py @@ -67,9 +67,11 @@ def fit_grid_point(X, y, base_clf, clf_params, train, test, loss_func, msg = '%s' % (', '.join('%s=%s' % (k, v) for k, v in clf_params.iteritems())) print "[GridSearchCV] %s %s" % (msg, (64 - len(msg)) * '.') + # update parameters of the classifier after a copy of its base structure + # FIXME we should be doing a clone here clf = copy.deepcopy(base_clf) - clf._set_params(**clf_params) + clf.set_params(**clf_params) if isinstance(X, list) or isinstance(X, tuple): X_train = [X[i] for i, cond in enumerate(train) if cond] diff --git a/scikits/learn/lda.py b/scikits/learn/lda.py index 8d208164e62f2..54c5e19fa8cc9 100644 --- a/scikits/learn/lda.py +++ b/scikits/learn/lda.py @@ -66,7 +66,7 @@ def __init__(self, n_components=None, priors=None): print 'warning: the priors do not sum to 1. Renormalizing' self.priors = self.priors / self.priors.sum() - def fit(self, X, y, store_covariance=False, tol=1.0e-4, **params): + def fit(self, X, y, store_covariance=False, tol=1.0e-4): """ Fit the LDA model according to the given training data and parameters. @@ -81,7 +81,6 @@ def fit(self, X, y, store_covariance=False, tol=1.0e-4, **params): If True the covariance matrix (shared by all classes) is computed and stored in self.covariance_ attribute. """ - self._set_params(**params) X = np.asanyarray(X) y = np.asanyarray(y) if y.dtype.char.lower() not in ('b', 'h', 'i'): diff --git a/scikits/learn/linear_model/base.py b/scikits/learn/linear_model/base.py index d71701b2c122f..ddc5cd0a37e4f 100644 --- a/scikits/learn/linear_model/base.py +++ b/scikits/learn/linear_model/base.py @@ -12,11 +12,13 @@ # License: BSD Style. import numpy as np +import scipy.sparse as sp from ..base import BaseEstimator, RegressorMixin, ClassifierMixin from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber from ..utils.extmath import safe_sparse_dot from ..utils import safe_asanyarray +from ..utils import as_float_array ### @@ -47,30 +49,42 @@ def predict(self, X): return safe_sparse_dot(X, self.coef_.T) + self.intercept_ @staticmethod - def _center_data(X, y, fit_intercept): + def _center_data(X, y, fit_intercept, normalize=False): """ Centers data to have mean zero along axis 0. This is here because nearly all linear models will want their data to be centered. + + WARNING : This function modifies X inplace : + Use scikits.learn.utils.as_float_array before to convert X to np.float. + You can specify an argument overwrite_X (default is False). """ - import scipy.sparse # importing scipy.sparse just for this is overkill if fit_intercept: - if scipy.sparse.issparse(X): - Xmean = np.zeros(X.shape[1]) + if sp.issparse(X): + X_mean = np.zeros(X.shape[1]) + X_std = np.ones(X.shape[1]) else: - Xmean = X.mean(axis=0) - X = X - Xmean - ymean = y.mean() - y = y - ymean + X_mean = X.mean(axis=0) + X -= X_mean + if normalize: + X_std = np.sqrt(np.sum(X ** 2, axis=0)) + X_std[X_std==0] = 1 + X /= X_std + else: + X_std = np.ones(X.shape[1]) + y_mean = y.mean() + y = y - y_mean else: - Xmean = np.zeros(X.shape[1]) - ymean = 0. - return X, y, Xmean, ymean + X_mean = np.zeros(X.shape[1]) + X_std = np.ones(X.shape[1]) + y_mean = 0. + return X, y, X_mean, y_mean, X_std - def _set_intercept(self, Xmean, ymean): + def _set_intercept(self, X_mean, y_mean, X_std): """Set the intercept_ """ if self.fit_intercept: - self.intercept_ = ymean - np.dot(Xmean, self.coef_.T) + self.coef_ = self.coef_ / X_std + self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T) else: self.intercept_ = 0 @@ -94,10 +108,12 @@ class LinearRegression(LinearModel): """ - def __init__(self, fit_intercept=True): + def __init__(self, fit_intercept=True, normalize=False, overwrite_X=False): self.fit_intercept = fit_intercept + self.normalize = normalize + self.overwrite_X = overwrite_X - def fit(self, X, y, **params): + def fit(self, X, y): """ Fit linear model. @@ -111,21 +127,25 @@ def fit(self, X, y, **params): wether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered). + normalize : boolean, optional + If True, the regressors X are normalized Returns ------- self : returns an instance of self. """ - self._set_params(**params) X = np.asanyarray(X) y = np.asanyarray(y) - X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + X = as_float_array(X, self.overwrite_X) + + X, y, X_mean, y_mean, X_std = self._center_data(X, y, + self.fit_intercept, self.normalize) self.coef_, self.residues_, self.rank_, self.singular_ = \ np.linalg.lstsq(X, y) - self._set_intercept(Xmean, ymean) + self._set_intercept(X_mean, y_mean, X_std) return self ## @@ -299,7 +319,7 @@ def _set_class_weight(self, class_weight, classes, y): self.class_weight = weight def fit(self, X, y, coef_init=None, intercept_init=None, - class_weight=None, sample_weight=None, **params): + class_weight=None, sample_weight=None): """Fit linear model with Stochastic Gradient Descent. Parameters @@ -330,7 +350,6 @@ def fit(self, X, y, coef_init=None, intercept_init=None, ------- self : returns an instance of self. """ - self._set_params(**params) # check only y because X might be dense or sparse y = np.asanyarray(y, dtype=np.float64, order='C') @@ -443,7 +462,7 @@ def _set_loss_function(self, loss): raise ValueError("The loss %s is not supported. " % loss) def fit(self, X, y, coef_init=None, intercept_init=None, - sample_weight=None, **params): + sample_weight=None): """Fit linear model with Stochastic Gradient Descent. Parameters @@ -467,7 +486,6 @@ def fit(self, X, y, coef_init=None, intercept_init=None, ------- self : returns an instance of self. """ - self._set_params(**params) y = np.asanyarray(y, dtype=np.float64, order="C") # make sure X has shape diff --git a/scikits/learn/linear_model/bayes.py b/scikits/learn/linear_model/bayes.py index 4a263801c19e3..c9b8111b85d60 100644 --- a/scikits/learn/linear_model/bayes.py +++ b/scikits/learn/linear_model/bayes.py @@ -10,6 +10,7 @@ from scipy import linalg from .base import LinearModel +from ..utils import as_float_array from ..utils.extmath import fast_logdet @@ -33,7 +34,7 @@ class BayesianRidge(LinearModel): n_iter : int, optional Maximum number of iterations. Default is 300. - eps : float, optional + tol : float, optional Stop the algorithm if w has converged. Default is 1.e-3. alpha_1 : float, optional @@ -64,6 +65,18 @@ class BayesianRidge(LinearModel): (e.g. data is expected to be already centered). Default is True. + normalize : boolean, optional + If True, the regressors X are normalized + Default is False + + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + + verbose : boolean, optional + Verbose mode when fitting the model. Default is False. + + Attributes ---------- `coef_` : array, shape = (n_features) @@ -91,9 +104,9 @@ class BayesianRidge(LinearModel): >>> from scikits.learn import linear_model >>> clf = linear_model.BayesianRidge() >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2]) - BayesianRidge(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06, - fit_intercept=True, eps=0.001, alpha_2=1e-06, alpha_1=1e-06, - compute_score=False) + BayesianRidge(normalize=False, n_iter=300, verbose=False, lambda_1=1e-06, + lambda_2=1e-06, fit_intercept=True, alpha_2=1e-06, tol=0.001, + alpha_1=1e-06, overwrite_X=False, compute_score=False) >>> clf.predict([[1, 1]]) array([ 1.]) @@ -102,20 +115,23 @@ class BayesianRidge(LinearModel): See examples/linear_model/plot_bayesian_ridge.py for an example. """ - def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, + def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, - fit_intercept=True, verbose=False): + fit_intercept=True, normalize=False, + overwrite_X=False, verbose=False): self.n_iter = n_iter - self.eps = eps + self.tol = tol self.alpha_1 = alpha_1 self.alpha_2 = alpha_2 self.lambda_1 = lambda_1 self.lambda_2 = lambda_2 self.compute_score = compute_score self.fit_intercept = fit_intercept + self.normalize = normalize + self.overwrite_X = overwrite_X self.verbose = verbose - def fit(self, X, y, **params): + def fit(self, X, y): """Fit the model Parameters @@ -129,10 +145,11 @@ def fit(self, X, y, **params): ------- self : returns an instance of self. """ - self._set_params(**params) X = np.asanyarray(X, dtype=np.float) y = np.asanyarray(y, dtype=np.float) - X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + X = as_float_array(X, self.overwrite_X) + X, y, X_mean, y_mean, X_std = self._center_data(X, y, + self.fit_intercept, self.normalize) n_samples, n_features = X.shape ### Initialization of the values of the parameters @@ -196,7 +213,7 @@ def fit(self, X, y, **params): self.scores_.append(s) ### Check for convergence - if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.eps: + if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol: if verbose: print "Convergence after ", str(iter_), " iterations" break @@ -206,7 +223,7 @@ def fit(self, X, y, **params): self.lambda_ = lambda_ self.coef_ = coef_ - self._set_intercept(Xmean, ymean) + self._set_intercept(X_mean, y_mean, X_std) return self @@ -234,7 +251,7 @@ class ARDRegression(LinearModel): n_iter : int, optional Maximum number of iterations. Default is 300 - eps : float, optional + tol : float, optional Stop the algorithm if w has converged. Default is 1.e-3. alpha_1 : float, optional @@ -267,6 +284,13 @@ class ARDRegression(LinearModel): (e.g. data is expected to be already centered). Default is True. + normalize : boolean, optional + If True, the regressors X are normalized + + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + verbose : boolean, optional Verbose mode when fitting the model. Default is False. @@ -300,9 +324,10 @@ class ARDRegression(LinearModel): >>> from scikits.learn import linear_model >>> clf = linear_model.ARDRegression() >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2]) - ARDRegression(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06, - fit_intercept=True, eps=0.001, threshold_lambda=10000.0, - alpha_2=1e-06, alpha_1=1e-06, compute_score=False) + ARDRegression(normalize=False, n_iter=300, verbose=False, lambda_1=1e-06, + lambda_2=1e-06, fit_intercept=True, threshold_lambda=10000.0, + alpha_2=1e-06, tol=0.001, alpha_1=1e-06, overwrite_X=False, + compute_score=False) >>> clf.predict([[1, 1]]) array([ 1.]) @@ -311,21 +336,24 @@ class ARDRegression(LinearModel): See examples/linear_model/plot_ard.py for an example. """ - def __init__(self, n_iter=300, eps=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, + def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, - threshold_lambda=1.e+4, fit_intercept=True, verbose=False): + threshold_lambda=1.e+4, fit_intercept=True, + normalize=False, overwrite_X=False, verbose=False): self.n_iter = n_iter - self.eps = eps + self.tol = tol self.fit_intercept = fit_intercept + self.normalize = normalize self.alpha_1 = alpha_1 self.alpha_2 = alpha_2 self.lambda_1 = lambda_1 self.lambda_2 = lambda_2 self.compute_score = compute_score self.threshold_lambda = threshold_lambda + self.overwrite_X = overwrite_X self.verbose = verbose - def fit(self, X, y, **params): + def fit(self, X, y): """Fit the ARDRegression model according to the given training data and parameters. @@ -343,7 +371,6 @@ def fit(self, X, y, **params): ------- self : returns an instance of self. """ - self._set_params(**params) X = np.asanyarray(X, dtype=np.float) y = np.asanyarray(y, dtype=np.float) @@ -351,7 +378,10 @@ def fit(self, X, y, **params): n_samples, n_features = X.shape coef_ = np.zeros(n_features) - X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + X = as_float_array(X, self.overwrite_X) + + X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept, + self.normalize) ### Launch the convergence loop keep_lambda = np.ones(n_features, dtype=bool) @@ -407,7 +437,7 @@ def fit(self, X, y, **params): self.scores_.append(s) ### Check for convergence - if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.eps: + if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol: if verbose: print "Converged after %s iterations" % iter_ break @@ -417,5 +447,5 @@ def fit(self, X, y, **params): self.alpha_ = alpha_ self.sigma_ = sigma_ - self._set_intercept(Xmean, ymean) + self._set_intercept(X_mean, y_mean, X_std) return self diff --git a/scikits/learn/linear_model/coordinate_descent.py b/scikits/learn/linear_model/coordinate_descent.py index 55af2b422acd1..f11498a61b17a 100644 --- a/scikits/learn/linear_model/coordinate_descent.py +++ b/scikits/learn/linear_model/coordinate_descent.py @@ -8,6 +8,7 @@ import numpy as np from .base import LinearModel +from ..utils import as_float_array from ..cross_val import check_cv from . import cd_fast @@ -30,13 +31,16 @@ class ElasticNet(LinearModel): rho : float The ElasticNet mixing parameter, with 0 < rho <= 1. For rho = 0 - the penalty is an L1 penalty. For rho = 1 it is an L2 penalty. + the penalty is an L1 penalty. For rho = 1 it is an L2 penalty. For 0 < rho < 1, the penalty is a combination of L1 and L2 fit_intercept: bool Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. + normalize : boolean, optional + If True, the regressors X are normalized + precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up calculations. If set to 'auto' let us decide. The Gram @@ -45,6 +49,10 @@ class ElasticNet(LinearModel): max_iter: int, optional The maximum number of iterations + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + tol: float, optional The tolerance for the optimization: if the updates are smaller than 'tol', the optimization code checks the @@ -68,22 +76,24 @@ class ElasticNet(LinearModel): a*L1 + b*L2 for:: - + alpha = a + b and rho = a/(a+b) """ - def __init__(self, alpha=1.0, rho=0.5, fit_intercept=True, - precompute='auto', max_iter=1000, tol=1e-4): + normalize=False, precompute='auto', max_iter=1000, + overwrite_X=False, tol=1e-4): self.alpha = alpha self.rho = rho self.coef_ = None self.fit_intercept = fit_intercept + self.normalize = normalize self.precompute = precompute self.max_iter = max_iter + self.overwrite_X = overwrite_X self.tol = tol - def fit(self, X, y, Xy=None, coef_init=None, **params): + def fit(self, X, y, Xy=None, coef_init=None): """Fit Elastic Net model with coordinate descent Parameters @@ -108,18 +118,27 @@ def fit(self, X, y, Xy=None, coef_init=None, **params): To avoid memory re-allocation it is advised to allocate the initial data in memory directly using that format. """ - self._set_params(**params) X = np.asanyarray(X, dtype=np.float64) y = np.asanyarray(y, dtype=np.float64) + X = as_float_array(X, self.overwrite_X) + + n_samples, n_features = X.shape - X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + X_init = X + X, y, X_mean, y_mean, X_std = self._center_data(X, y, + self.fit_intercept, + self.normalize) + precompute = self.precompute + if X_init is not X and hasattr(precompute, '__array__'): + precompute = 'auto' # recompute Gram + if X_init is not X and Xy is not None: + Xy = None # recompute Xy if coef_init is None: - self.coef_ = np.zeros(X.shape[1], dtype=np.float64) + self.coef_ = np.zeros(n_features, dtype=np.float64) else: self.coef_ = coef_init - n_samples = X.shape[0] alpha = self.alpha * self.rho * n_samples beta = self.alpha * (1.0 - self.rho) * n_samples @@ -127,9 +146,9 @@ def fit(self, X, y, Xy=None, coef_init=None, **params): # precompute if n_samples > n_features if hasattr(self.precompute, '__array__'): - Gram = self.precompute - elif self.precompute == True or \ - (self.precompute == 'auto' and X.shape[0] > X.shape[1]): + Gram = precompute + elif precompute == True or \ + (precompute == 'auto' and n_samples > n_features): Gram = np.dot(X.T, X) else: Gram = None @@ -146,7 +165,7 @@ def fit(self, X, y, Xy=None, coef_init=None, **params): cd_fast.enet_coordinate_descent_gram(self.coef_, alpha, beta, Gram, Xy, y, self.max_iter, self.tol) - self._set_intercept(Xmean, ymean) + self._set_intercept(X_mean, y_mean, X_std) if self.dual_gap_ > self.eps_: warnings.warn('Objective did not converge, you might want' @@ -175,6 +194,13 @@ class Lasso(ElasticNet): to false, no intercept will be used in calculations (e.g. data is expected to be already centered). + normalize : boolean, optional + If True, the regressors X are normalized + + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up calculations. If set to 'auto' let us decide. The Gram @@ -203,8 +229,8 @@ class Lasso(ElasticNet): >>> from scikits.learn import linear_model >>> clf = linear_model.Lasso(alpha=0.1) >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2]) - Lasso(precompute='auto', alpha=0.1, max_iter=1000, tol=0.0001, - fit_intercept=True) + Lasso(normalize=False, fit_intercept=True, max_iter=1000, precompute='auto', + tol=0.0001, alpha=0.1, overwrite_X=False) >>> print clf.coef_ [ 0.85 0. ] >>> print clf.intercept_ @@ -222,19 +248,22 @@ class Lasso(ElasticNet): should be directly passed as a fortran contiguous numpy array. """ - def __init__(self, alpha=1.0, fit_intercept=True, - precompute='auto', max_iter=1000, tol=1e-4): + def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, + precompute='auto', overwrite_X=False, max_iter=1000, + tol=1e-4): super(Lasso, self).__init__(alpha=alpha, rho=1.0, - fit_intercept=fit_intercept, - precompute=precompute, max_iter=max_iter, - tol=tol) + fit_intercept=fit_intercept, normalize=normalize, + precompute=precompute, overwrite_X=overwrite_X, + max_iter=max_iter, tol=tol) ############################################################################### # Classes to store linear models along a regularization path -def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True, - verbose=False, **fit_params): +def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, + precompute='auto', Xy=None, fit_intercept=True, + normalize=False, overwrite_X=False, verbose=False, + **params): """Compute Lasso path with coordinate descent Parameters @@ -257,8 +286,30 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True, List of alphas where to compute the models. If None alphas are set automatically - fit_params : kwargs - keyword arguments passed to the Lasso fit method + precompute : True | False | 'auto' | array-like + Whether to use a precomputed Gram matrix to speed up + calculations. If set to 'auto' let us decide. The Gram + matrix can also be passed as argument. + + Xy : array-like, optional + Xy = np.dot(X.T, y) that can be precomputed. It is useful + only when the Gram matrix is precomuted. + + fit_intercept : bool + Fit or not an intercept + + normalize : boolean, optional + If True, the regressors X are normalized + + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + + verbose : bool + Verbose computation or not. + + params : kwargs + keyword arguments passed to the Lasso objects Returns ------- @@ -272,11 +323,15 @@ def lasso_path(X, y, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True, should be directly passed as a fortran contiguous numpy array. """ return enet_path(X, y, rho=1., eps=eps, n_alphas=n_alphas, alphas=alphas, - fit_intercept=fit_intercept, verbose=verbose, **fit_params) + precompute='auto', Xy=None, + fit_intercept=fit_intercept, normalize=normalize, + overwrite_X=overwrite_X, verbose=verbose, **params) def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None, - fit_intercept=True, verbose=False, **fit_params): + precompute='auto', Xy=None, fit_intercept=True, + normalize=False, overwrite_X=False, verbose=False, + **params): """Compute Elastic-Net path with coordinate descent Parameters @@ -303,8 +358,30 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None, List of alphas where to compute the models. If None alphas are set automatically - fit_params : kwargs - keyword arguments passed to the Lasso fit method + precompute : True | False | 'auto' | array-like + Whether to use a precomputed Gram matrix to speed up + calculations. If set to 'auto' let us decide. The Gram + matrix can also be passed as argument. + + Xy : array-like, optional + Xy = np.dot(X.T, y) that can be precomputed. It is useful + only when the Gram matrix is precomuted. + + fit_intercept : bool + Fit or not an intercept + + normalize : boolean, optional + If True, the regressors X are normalized + + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + + verbose : bool + Verbose computation or not. + + params : kwargs + keyword arguments passed to the Lasso objects Returns ------- @@ -314,12 +391,30 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None, ----- See examples/plot_lasso_coordinate_descent_path.py for an example. """ - X, y, Xmean, ymean = LinearModel._center_data(X, y, fit_intercept) + X = as_float_array(X, overwrite_X) + + X_init = X + X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y, + fit_intercept, + normalize) X = np.asfortranarray(X) # make data contiguous in memory + n_samples, n_features = X.shape + + if X_init is not X and hasattr(precompute, '__array__'): + precompute = 'auto' + if X_init is not X and Xy is not None: + Xy = None + + if 'precompute' is True or \ + ((precompute == 'auto') and (n_samples > n_features)): + precompute = np.dot(X.T, X) + + if Xy is None: + Xy = np.dot(X.T, y) n_samples = X.shape[0] if alphas is None: - alpha_max = np.abs(np.dot(X.T, y)).max() / (n_samples * rho) + alpha_max = np.abs(Xy).max() / (n_samples * rho) alphas = np.logspace(np.log10(alpha_max * eps), np.log10(alpha_max), num=n_alphas)[::-1] else: @@ -327,19 +422,14 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None, coef_ = None # init coef_ models = [] - if not 'precompute' in fit_params \ - or fit_params['precompute'] is True \ - or (fit_intercept and hasattr(fit_params['precompute'], '__array__')): - fit_params['precompute'] = np.dot(X.T, X) - if not 'Xy' in fit_params or fit_params['Xy'] is None: - fit_params['Xy'] = np.dot(X.T, y) - for alpha in alphas: - model = ElasticNet(alpha=alpha, rho=rho, fit_intercept=False) - model.fit(X, y, coef_init=coef_, **fit_params) + model = ElasticNet(alpha=alpha, rho=rho, fit_intercept=False, + precompute=precompute) + model.set_params(**params) + model.fit(X, y, coef_init=coef_, Xy=Xy) if fit_intercept: model.fit_intercept = True - model._set_intercept(Xmean, ymean) + model._set_intercept(X_mean, y_mean, X_std) if verbose: print model coef_ = model.coef_.copy() @@ -350,19 +440,21 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None, class LinearModelCV(LinearModel): """Base class for iterative model fitting along a regularization path""" - def __init__(self, eps=1e-3, n_alphas=100, alphas=None, - fit_intercept=True, precompute='auto', max_iter=1000, - tol=1e-4, cv=None): + def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True, + normalize=False, precompute='auto', max_iter=1000, tol=1e-4, + overwrite_X=False, cv=None): self.eps = eps self.n_alphas = n_alphas self.alphas = alphas self.fit_intercept = fit_intercept + self.normalize = normalize self.precompute = precompute self.max_iter = max_iter self.tol = tol + self.overwrite_X = overwrite_X self.cv = cv - def fit(self, X, y, **fit_params): + def fit(self, X, y): """Fit linear model with coordinate descent along decreasing alphas using cross-validation @@ -380,7 +472,6 @@ def fit(self, X, y, **fit_params): keyword arguments passed to the Lasso fit method """ - self._set_params(**fit_params) X = np.asfortranarray(X, dtype=np.float64) y = np.asanyarray(y, dtype=np.float64) @@ -454,7 +545,7 @@ class LassoCV(LinearModelCV): cv : integer or crossvalidation generator, optional If an integer is passed, it is the number of fold (default 3). - Specific crossvalidation objects can be passed, see + Specific crossvalidation objects can be passed, see scikits.learn.cross_val module for the list of possible objects Notes @@ -465,7 +556,6 @@ class LassoCV(LinearModelCV): To avoid unnecessary memory duplication the X argument of the fit method should be directly passed as a fortran contiguous numpy array. """ - path = staticmethod(lasso_path) estimator = Lasso @@ -480,7 +570,7 @@ class ElasticNetCV(LinearModelCV): rho : float, optional float between 0 and 1 passed to ElasticNet (scaling between l1 and l2 penalties). For rho = 0 - the penalty is an L1 penalty. For rho = 1 it is an L2 penalty. + the penalty is an L1 penalty. For rho = 1 it is an L2 penalty. For 0 < rho < 1, the penalty is a combination of L1 and L2 eps : float, optional @@ -510,7 +600,7 @@ class ElasticNetCV(LinearModelCV): cv : integer or crossvalidation generator, optional If an integer is passed, it is the number of fold (default 3). - Specific crossvalidation objects can be passed, see + Specific crossvalidation objects can be passed, see scikits.learn.cross_val module for the list of possible objects @@ -534,23 +624,24 @@ class ElasticNetCV(LinearModelCV): a*L1 + b*L2 for:: - + alpha = a + b and rho = a/(a+b) """ - path = staticmethod(enet_path) estimator = ElasticNet def __init__(self, rho=0.5, eps=1e-3, n_alphas=100, alphas=None, - fit_intercept=True, precompute='auto', max_iter=1000, - tol=1e-4, cv=None): + fit_intercept=True, normalize=False, precompute='auto', + max_iter=1000, tol=1e-4, cv=None, overwrite_X=False): self.rho = rho self.eps = eps self.n_alphas = n_alphas self.alphas = alphas self.fit_intercept = fit_intercept + self.normalize = normalize self.precompute = precompute self.max_iter = max_iter self.tol = tol self.cv = cv + self.overwrite_X = overwrite_X diff --git a/scikits/learn/linear_model/least_angle.py b/scikits/learn/linear_model/least_angle.py index 098203a877ba7..2a27c1e69f29a 100644 --- a/scikits/learn/linear_model/least_angle.py +++ b/scikits/learn/linear_model/least_angle.py @@ -15,27 +15,16 @@ from scipy.linalg.lapack import get_lapack_funcs from .base import LinearModel -from ..utils import arrayfuncs +from ..utils import arrayfuncs, as_float_array from ..utils import deprecated from ..cross_val import check_cv from ..externals.joblib import Parallel, delayed -def _safe_normalize(X, overwrite_X): - """Normalize X avoiding divisions by zero""" - norms = np.sqrt(np.sum(X ** 2, axis=0)) - nonzeros = np.flatnonzero(norms) - if not overwrite_X: - X = X.copy() - overwrite_X = True - X[:, nonzeros] /= norms[nonzeros] - return X, norms - - def lars_path(X, y, Xy=None, Gram=None, max_iter=500, alpha_min=0, method='lar', overwrite_X=False, eps=np.finfo(np.float).eps, - overwrite_Gram=False, verbose=False, ): + overwrite_Gram=False, verbose=False): """Compute Least Angle Regression and LASSO path Parameters @@ -332,6 +321,9 @@ class Lars(LinearModel): calculations. If set to 'auto' let us decide. The Gram matrix can also be passed as argument. + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False eps: float, optional The machine-precision regularization in the computation of the @@ -353,11 +345,11 @@ class Lars(LinearModel): -------- >>> from scikits.learn import linear_model >>> clf = linear_model.Lars(n_nonzero_coefs=1) - >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS + >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111]) # doctest: +ELLIPSIS Lars(normalize=True, n_nonzero_coefs=1, verbose=False, fit_intercept=True, - eps=..., precompute='auto') - >>> print clf.coef_ - [ 0. -1.] + eps=2.2204460492503131e-16, precompute='auto', overwrite_X=False) + >>> print clf.coef_ # doctest: +ELLIPSIS + [ 0. -1.1111...] References ---------- @@ -369,7 +361,7 @@ class Lars(LinearModel): """ def __init__(self, fit_intercept=True, verbose=False, normalize=True, precompute='auto', n_nonzero_coefs=500, - eps=np.finfo(np.float).eps): + eps=np.finfo(np.float).eps, overwrite_X=False): self.fit_intercept = fit_intercept self.verbose = verbose self.normalize = normalize @@ -377,6 +369,7 @@ def __init__(self, fit_intercept=True, verbose=False, normalize=True, self.precompute = precompute self.n_nonzero_coefs = n_nonzero_coefs self.eps = eps + self.overwrite_X = overwrite_X def _get_gram(self): # precompute if n_samples > n_features @@ -390,7 +383,7 @@ def _get_gram(self): Gram = None return Gram - def fit(self, X, y, overwrite_X=False, **params): + def fit(self, X, y, overwrite_X=False): """Fit the model using X, y as training data. parameters @@ -406,12 +399,15 @@ def fit(self, X, y, overwrite_X=False, **params): self : object returns an instance of self. """ - self._set_params(**params) X = np.atleast_2d(X) y = np.atleast_1d(y) - X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + X = as_float_array(X, self.overwrite_X) + + X, y, X_mean, y_mean, X_std = self._center_data(X, y, + self.fit_intercept, + self.normalize) alpha = getattr(self, 'alpha', 0.) if hasattr(self, 'n_nonzero_coefs'): alpha = 0. # n_nonzero_coefs parametrization takes priority @@ -419,22 +415,17 @@ def fit(self, X, y, overwrite_X=False, **params): else: max_iter = self.max_iter - if self.normalize: - X, norms = _safe_normalize(X, overwrite_X) - Gram = self._get_gram() self.alphas_, self.active_, self.coef_path_ = lars_path(X, y, - Gram=Gram, overwrite_X=overwrite_X, + Gram=Gram, overwrite_X=self.overwrite_X, overwrite_Gram=True, alpha_min=alpha, method=self.method, verbose=self.verbose, max_iter=max_iter, eps=self.eps) - if self.normalize: - self.coef_path_ /= norms[:, np.newaxis] self.coef_ = self.coef_path_[:, -1] - self._set_intercept(Xmean, ymean) + self._set_intercept(X_mean, y_mean, X_std) return self @@ -458,6 +449,10 @@ class LassoLars(Lars): normalize : boolean, optional If True, the regressors X are normalized + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up calculations. If set to 'auto' let us decide. The Gram @@ -488,9 +483,10 @@ class LassoLars(Lars): >>> clf = linear_model.LassoLars(alpha=0.01) >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS LassoLars(normalize=True, verbose=False, fit_intercept=True, max_iter=500, - eps=..., precompute='auto', alpha=0.01) - >>> print clf.coef_ - [ 0. -0.96325765] + eps=2.2204460492503131e-16, precompute='auto', alpha=0.01, + overwrite_X=False) + >>> print clf.coef_ # doctest: +ELLIPSIS + [ 0. -0.963257...] References ---------- @@ -503,7 +499,7 @@ class LassoLars(Lars): def __init__(self, alpha=1.0, fit_intercept=True, verbose=False, normalize=True, precompute='auto', max_iter=500, - eps=np.finfo(np.float).eps): + eps=np.finfo(np.float).eps, overwrite_X=False): self.alpha = alpha self.fit_intercept = fit_intercept self.max_iter = max_iter @@ -511,6 +507,7 @@ def __init__(self, alpha=1.0, fit_intercept=True, verbose=False, self.normalize = normalize self.method = 'lasso' self.precompute = precompute + self.overwrite_X = overwrite_X self.eps = eps @@ -632,6 +629,10 @@ class LarsCV(LARS): normalize : boolean, optional If True, the regressors X are normalized + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up calculations. If set to 'auto' let us decide. The Gram @@ -674,17 +675,18 @@ class LarsCV(LARS): def __init__(self, fit_intercept=True, verbose=False, max_iter=500, normalize=True, precompute='auto', cv=None, n_jobs=1, - eps=np.finfo(np.float).eps): + eps=np.finfo(np.float).eps, overwrite_X=False): self.fit_intercept = fit_intercept self.max_iter = max_iter self.verbose = verbose self.normalize = normalize self.precompute = precompute + self.overwrite_X = overwrite_X self.cv = cv self.n_jobs = n_jobs self.eps = eps - def fit(self, X, y, **params): + def fit(self, X, y): """Fit the model using X, y as training data. Parameters @@ -700,7 +702,6 @@ def fit(self, X, y, **params): self : object returns an instance of self. """ - self._set_params(**params) X = np.asanyarray(X) n_samples, n_features = X.shape @@ -786,6 +787,10 @@ class LassoLarsCV(LarsCV): Cholesky diagonal factors. Increase this for very ill-conditioned systems. + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + Attributes ---------- @@ -854,6 +859,10 @@ class LassoLarsIC(LassoLars): normalize : boolean, optional If True, the regressors X are normalized + overwrite_X : boolean, optionnal + Default is False. + If True, X will be overwritten + precompute : True | False | 'auto' | array-like Whether to use a precomputed Gram matrix to speed up calculations. If set to 'auto' let us decide. The Gram @@ -883,11 +892,12 @@ class LassoLarsIC(LassoLars): -------- >>> from scikits.learn import linear_model >>> clf = linear_model.LassoLarsIC(criterion='bic') - >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1]) # doctest: +ELLIPSIS + >>> clf.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111]) # doctest: +ELLIPSIS LassoLarsIC(normalize=True, verbose=False, fit_intercept=True, max_iter=500, - eps=..., precompute='auto', criterion='bic') - >>> print clf.coef_ - [ 0. -0.81649658] + eps=... precompute='auto', criterion='bic', + overwrite_X=False) + >>> print clf.coef_ # doctest: +ELLIPSIS + [ 0. -1.1111...] References ---------- @@ -906,7 +916,7 @@ class LassoLarsIC(LassoLars): """ def __init__(self, criterion='aic', fit_intercept=True, verbose=False, normalize=True, precompute='auto', max_iter=500, - eps=np.finfo(np.float).eps): + eps=np.finfo(np.float).eps, overwrite_X=False): if criterion not in ['aic', 'bic']: raise ValueError('criterion should be either bic or aic') self.criterion = criterion @@ -914,10 +924,11 @@ def __init__(self, criterion='aic', fit_intercept=True, verbose=False, self.max_iter = max_iter self.verbose = verbose self.normalize = normalize + self.overwrite_X = overwrite_X self.precompute = precompute self.eps = eps - def fit(self, X, y, overwrite_X=False, **params): + def fit(self, X, y, overwrite_X=False): """Fit the model using X, y as training data. parameters @@ -933,17 +944,15 @@ def fit(self, X, y, overwrite_X=False, **params): self : object returns an instance of self. """ - self._set_params(**params) - X = np.atleast_2d(X) y = np.atleast_1d(y) - X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + X = as_float_array(X, self.overwrite_X) + X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, + self.fit_intercept, + normalize=self.normalize) max_iter = self.max_iter - if self.normalize: - X, norms = _safe_normalize(X, overwrite_X) - Gram = self._get_gram() alphas_, active_, coef_path_ = lars_path(X, y, @@ -980,5 +989,5 @@ def fit(self, X, y, overwrite_X=False, **params): self.alpha_ = alphas_[n_best] self.coef_ = coef_path_[:, n_best] - self._set_intercept(Xmean, ymean) + self._set_intercept(Xmean, ymean, Xstd) return self diff --git a/scikits/learn/linear_model/omp.py b/scikits/learn/linear_model/omp.py index a9afba929ed3e..3c6e7bda6d603 100644 --- a/scikits/learn/linear_model/omp.py +++ b/scikits/learn/linear_model/omp.py @@ -13,13 +13,14 @@ from .base import LinearModel from ..utils.arrayfuncs import solve_triangular +from ..utils import as_float_array premature = """ Orthogonal matching pursuit ended prematurely due to linear dependence in the dictionary. The requested precision might not have been met. """ -def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_x=False): +def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_X=False): """Orthogonal Matching Pursuit step using the Cholesky decomposition. Parameters: @@ -36,7 +37,7 @@ def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_x=False): eps: float Targeted squared error, if not None overrides n_nonzero_coefs. - overwrite_x: bool, + overwrite_X: bool, Whether the design matrix X can be overwritten by the algorithm. This is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. @@ -51,7 +52,7 @@ def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_x=False): vector """ - if not overwrite_x: + if not overwrite_X: X = X.copy('F') else: # even if we are allowed to overwrite, still copy it if bad order X = np.asfortranarray(X) @@ -102,7 +103,7 @@ def _cholesky_omp(X, y, n_nonzero_coefs, eps=None, overwrite_x=False): def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None, - overwrite_gram=False, overwrite_xy=False): + overwrite_gram=False, overwrite_Xy=False): """Orthogonal Matching Pursuit step on a precomputed Gram matrix. This function uses the the Cholesky decomposition method. @@ -129,7 +130,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None, is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. - overwrite_xy: bool, + overwrite_Xy: bool, Whether the covariance vector Xy can be overwritten by the algorithm. Returns: @@ -147,7 +148,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None, else: Gram = np.asfortranarray(Gram) - if not overwrite_xy: + if not overwrite_Xy: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps @@ -202,7 +203,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None, def orthogonal_mp(X, y, n_nonzero_coefs=None, eps=None, precompute_gram=False, - overwrite_x=False): + overwrite_X=False): """Orthogonal Matching Pursuit (OMP) Solves n_targets Orthogonal Matching Pursuit problems. @@ -234,7 +235,7 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, eps=None, precompute_gram=False, Whether to perform precomputations. Improves performance when n_targets or n_samples is very large. - overwrite_x: bool, + overwrite_X: bool, Whether the design matrix X can be overwritten by the algorithm. This is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. @@ -266,13 +267,13 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, eps=None, precompute_gram=False, X, y = map(np.asanyarray, (X, y)) if y.ndim == 1: y = y[:, np.newaxis] - if not overwrite_x: + if not overwrite_X: X = X.copy('F') - overwrite_x = True + overwrite_X = True else: X = np.asfortranarray(X) if y.shape[1] > 1: # subsequent targets will be affected - overwrite_x = False + overwrite_X = False if n_nonzero_coefs == None and eps == None: n_nonzero_coefs = int(0.1 * X.shape[1]) if eps is not None and eps < 0: @@ -293,20 +294,20 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, eps=None, precompute_gram=False, else: norms_squared = None return orthogonal_mp_gram(G, Xy, n_nonzero_coefs, eps, norms_squared, - overwrite_gram=overwrite_x, - overwrite_xy=True) + overwrite_gram=overwrite_X, + overwrite_Xy=True) coef = np.zeros((X.shape[1], y.shape[1])) for k in xrange(y.shape[1]): x, idx = _cholesky_omp(X, y[:, k], n_nonzero_coefs, eps, - overwrite_x=overwrite_x) + overwrite_X=overwrite_X) coef[idx, k] = x return np.squeeze(coef) def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, eps=None, norms_squared=None, overwrite_gram=False, - overwrite_xy=False): + overwrite_Xy=False): """Gram Orthogonal Matching Pursuit (OMP) Solves n_targets Orthogonal Matching Pursuit problems using only @@ -335,7 +336,7 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, eps=None, is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. - overwrite_xy: bool, + overwrite_Xy: bool, Whether the covariance vector Xy can be overwritten by the algorithm. Returns @@ -385,7 +386,7 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, eps=None, x, idx = _gram_omp(Gram, Xy[:, k], n_nonzero_coefs, norms_squared[k] if eps is not None else None, eps, overwrite_gram=overwrite_gram, - overwrite_xy=overwrite_xy) + overwrite_Xy=overwrite_Xy) coef[idx, k] = x return np.squeeze(coef) @@ -416,6 +417,21 @@ class OrthogonalMatchingPursuit(LinearModel): very large. Note that if you already have such matrices, you can pass them directly to the fit method. + overwrite_X: bool, + Whether the design matrix X can be overwritten by the algorithm. + This is only helpful if X is already Fortran-ordered, otherwise a + copy is made anyway. + + overwrite_gram: bool, + Whether the gram matrix can be overwritten by the algorithm. This + is only helpful if it is already Fortran-ordered, otherwise a copy + is made anyway. + + overwrite_Xy: bool, + Whether the covariance vector Xy can be overwritten by the + algorithm. + + Attributes ---------- coef_: array, shape = (n_features,) or (n_features, n_targets) @@ -445,17 +461,20 @@ class OrthogonalMatchingPursuit(LinearModel): LassoLars """ - - def __init__(self, n_nonzero_coefs=None, eps=None, fit_intercept=True, - normalize=True, precompute_gram=False): + def __init__(self, overwrite_X=False, overwrite_gram=False, + overwrite_Xy=False, n_nonzero_coefs=None, eps=None, + fit_intercept=True, normalize=True, precompute_gram=False): self.n_nonzero_coefs = n_nonzero_coefs self.eps = eps self.fit_intercept = fit_intercept self.normalize = normalize self.precompute_gram = precompute_gram + self.overwrite_gram = overwrite_gram + self.overwrite_Xy = overwrite_Xy + self.overwrite_X = overwrite_X + self.eps = eps - def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False, - overwrite_gram=False, overwrite_xy=False, **params): + def fit(self, X, y, Gram=None, Xy=None): """Fit the model using X, y as training data. Parameters @@ -473,38 +492,20 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False, (optional) Input targets multiplied by X: X.T * y - overwrite_x: bool, - Whether the design matrix X can be overwritten by the algorithm. - This is only helpful if X is already Fortran-ordered, otherwise a - copy is made anyway. - - overwrite_gram: bool, - Whether the gram matrix can be overwritten by the algorithm. This - is only helpful if it is already Fortran-ordered, otherwise a copy - is made anyway. - - overwrite_xy: bool, - Whether the covariance vector Xy can be overwritten by the - algorithm. Returns ------- self: object returns an instance of self. """ - self._set_params(**params) - X = np.atleast_2d(X) y = np.atleast_1d(y) + n_features = X.shape[1] + X = as_float_array(X, self.overwrite_X) - n_samples, n_features = X.shape - - X, y, Xmean, ymean = self._center_data(X, y, self.fit_intercept) - - if self.normalize: - norms = np.sqrt(np.sum(X ** 2, axis=0)) - nonzeros = np.flatnonzero(norms) - X[:, nonzeros] /= norms[nonzeros] + X, y, X_mean, y_mean, X_std = self._center_data(X, y, + self.fit_intercept, + self.normalize) if self.n_nonzero_coefs == None and self.eps is None: self.n_nonzero_coefs = int(0.1 * n_features) @@ -512,35 +513,32 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False, if Gram is not None: Gram = np.atleast_2d(Gram) - if not overwrite_gram: + if not self.overwrite_gram: overwrite_gram = True Gram = Gram.copy('F') else: Gram = np.asfortranarray(Gram) + overwrite_gram = self.overwrite_gram if y.shape[1] > 1: # subsequent targets will be affected overwrite_gram = False if Xy is None: Xy = np.dot(X.T, y) else: - if not overwrite_xy: + if not self.overwrite_Xy: Xy = Xy.copy() if self.normalize: - if Xy.ndim > 1: - Xy /= norms.reshape((n_features, -1)) + if len(Xy.shape) == 1: + Xy /= X_std else: - Xy /= norms + Xy /= X_std[:, np.newaxis] if self.normalize: - Gram /= norms - Gram /= norms[:, np.newaxis] - - if self.eps is not None: - norms_sq = np.sum((y ** 2), axis=0) - else: - norms_sq = None + Gram /= X_std + Gram /= X_std[:, np.newaxis] + norms_sq = np.sum(y ** 2, axis=0) if self.eps is not None else None self.coef_ = orthogonal_mp_gram(Gram, Xy, self.n_nonzero_coefs, self.eps, norms_sq, overwrite_gram, True).T @@ -549,10 +547,8 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False, if precompute_gram == 'auto': precompute_gram = X.shape[0] > X.shape[1] self.coef_ = orthogonal_mp(X, y, self.n_nonzero_coefs, self.eps, - precompute_gram=precompute_gram, - overwrite_x=overwrite_x).T + precompute_gram=self.precompute_gram, + overwrite_X=self.overwrite_X).T - if self.normalize: - self.coef_ /= norms - self._set_intercept(Xmean, ymean) + self._set_intercept(X_mean, y_mean, X_std) return self diff --git a/scikits/learn/linear_model/ridge.py b/scikits/learn/linear_model/ridge.py index d295a412b9c5a..e17b1d0b523f8 100644 --- a/scikits/learn/linear_model/ridge.py +++ b/scikits/learn/linear_model/ridge.py @@ -9,7 +9,7 @@ from .base import LinearModel from ..utils.extmath import safe_sparse_dot -from ..utils import safe_asanyarray +from ..utils import safe_asanyarray, as_float_array from ..preprocessing import LabelBinarizer from ..grid_search import GridSearchCV @@ -137,6 +137,13 @@ class Ridge(LinearModel): to false, no intercept will be used in calculations (e.g. data is expected to be already centered). + normalize : boolean, optional + If True, the regressors X are normalized + + overwrite_X : boolean, optionnal + If True, X will not be copied + Default is False + tol: float Precision of the solution. @@ -156,15 +163,19 @@ class Ridge(LinearModel): >>> X = np.random.randn(n_samples, n_features) >>> clf = Ridge(alpha=1.0) >>> clf.fit(X, y) - Ridge(alpha=1.0, tol=0.001, fit_intercept=True) + Ridge(normalize=False, alpha=1.0, overwrite_X=False, tol=0.001, + fit_intercept=True) """ - def __init__(self, alpha=1.0, fit_intercept=True, tol=1e-3): + def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, + overwrite_X=False, tol=1e-3): self.alpha = alpha self.fit_intercept = fit_intercept + self.normalize = normalize + self.overwrite_X = overwrite_X self.tol = tol - def fit(self, X, y, sample_weight=1.0, solver='auto', **params): + def fit(self, X, y, sample_weight=1.0, solver='auto'): """ Fit Ridge regression model @@ -191,17 +202,17 @@ def fit(self, X, y, sample_weight=1.0, solver='auto', **params): ------- self : returns an instance of self. """ - self._set_params(**params) - X = safe_asanyarray(X, dtype=np.float) y = np.asanyarray(y, dtype=np.float) + X = as_float_array(X, self.overwrite_X) - X, y, X_mean, y_mean = \ - LinearModel._center_data(X, y, self.fit_intercept) + X, y, X_mean, y_mean, X_std = \ + self._center_data(X, y, self.fit_intercept, + self.normalize) self.coef_ = ridge_regression(X, y, self.alpha, sample_weight, solver, self.tol) - self._set_intercept(X_mean, y_mean) + self._set_intercept(X_mean, y_mean, X_std) return self @@ -221,6 +232,9 @@ class RidgeClassifier(Ridge): to false, no intercept will be used in calculations (e.g. data is expected to be already centered). + normalize : boolean, optional + If True, the regressors X are normalized + Attributes ---------- @@ -321,12 +335,14 @@ class _RidgeGCV(LinearModel): http://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf """ - def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True, - score_func=None, loss_func=None): + def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True, normalize=False, + score_func=None, loss_func=None, overwrite_X=False): self.alphas = np.asanyarray(alphas) self.fit_intercept = fit_intercept + self.normalize = normalize self.score_func = score_func self.loss_func = loss_func + self.overwrite_X = overwrite_X def _pre_compute(self, X, y): # even if X is very sparse, K is usually very dense @@ -384,8 +400,10 @@ def fit(self, X, y, sample_weight=1.0): y = np.asanyarray(y, dtype=np.float) n_samples = X.shape[0] + X = as_float_array(X, self.overwrite_X) - X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept) + X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y, + self.fit_intercept, self.normalize) K, v, Q = self._pre_compute(X, y) n_y = 1 if len(y.shape) == 1 else y.shape[1] @@ -413,7 +431,7 @@ def fit(self, X, y, sample_weight=1.0): self.dual_coef_ = C[best] self.coef_ = safe_sparse_dot(self.dual_coef_.T, X) - self._set_intercept(Xmean, ymean) + self._set_intercept(X_mean, y_mean, X_std) return self @@ -440,6 +458,9 @@ class RidgeCV(LinearModel): to false, no intercept will be used in calculations (e.g. data is expected to be already centered). + normalize : boolean, optional + If True, the regressors X are normalized + loss_func: callable, optional function that takes 2 arguments and compares them in order to evaluate the performance of prediciton (small is good) @@ -456,14 +477,15 @@ class RidgeCV(LinearModel): """ def __init__(self, alphas=np.array([0.1, 1.0, 10.0]), fit_intercept=True, - score_func=None, loss_func=None, cv=None): + normalize=False, score_func=None, loss_func=None, cv=None): self.alphas = alphas self.fit_intercept = fit_intercept + self.normalize = normalize self.score_func = score_func self.loss_func = loss_func self.cv = cv - def fit(self, X, y, sample_weight=1.0, **params): + def fit(self, X, y, sample_weight=1.0): """Fit Ridge regression model Parameters @@ -485,8 +507,6 @@ def fit(self, X, y, sample_weight=1.0, **params): ------- self : Returns self. """ - self._set_params(**params) - if self.cv is None: estimator = _RidgeGCV(self.alphas, self.fit_intercept, self.score_func, self.loss_func) @@ -512,7 +532,7 @@ def fit(self, X, y, sample_weight=1.0, **params): class RidgeClassifierCV(RidgeCV): - def fit(self, X, y, sample_weight=1.0, class_weight=None, **params): + def fit(self, X, y, sample_weight=1.0, class_weight=None): """ Fit the ridge classifier. @@ -538,15 +558,13 @@ def fit(self, X, y, sample_weight=1.0, class_weight=None, **params): self : object Returns self. """ - self._set_params(**params) if class_weight is None: class_weight = {} sample_weight2 = np.array([class_weight.get(k, 1.0) for k in y]) self.label_binarizer = LabelBinarizer() Y = self.label_binarizer.fit_transform(y) RidgeCV.fit(self, X, Y, - sample_weight=sample_weight * sample_weight2, - cv=self.cv) + sample_weight=sample_weight * sample_weight2) return self def decision_function(self, X): diff --git a/scikits/learn/linear_model/sparse/coordinate_descent.py b/scikits/learn/linear_model/sparse/coordinate_descent.py index 38ef4245f6be7..80e150d54827d 100644 --- a/scikits/learn/linear_model/sparse/coordinate_descent.py +++ b/scikits/learn/linear_model/sparse/coordinate_descent.py @@ -7,7 +7,7 @@ import warnings import numpy as np -from scipy import sparse +import scipy.sparse as sp from ..base import LinearModel from . import cd_fast_sparse @@ -40,14 +40,17 @@ class ElasticNet(LinearModel): The parameter rho corresponds to alpha in the glmnet R package while alpha corresponds to the lambda parameter in glmnet. """ - - def __init__(self, alpha=1.0, rho=0.5, fit_intercept=False): + def __init__(self, alpha=1.0, rho=0.5, fit_intercept=False, + normalize=False, max_iter=1000, tol=1e-4): if fit_intercept: raise NotImplementedError("fit_intercept=True is not implemented") self.alpha = alpha self.rho = rho self.fit_intercept = fit_intercept + self.normalize = normalize self.intercept_ = 0.0 + self.max_iter = max_iter + self.tol = tol self._set_coef(None) def _set_coef(self, coef_): @@ -56,16 +59,15 @@ def _set_coef(self, coef_): self.sparse_coef_ = None else: # sparse representation of the fitted coef for the predict method - self.sparse_coef_ = sparse.csr_matrix(coef_) + self.sparse_coef_ = sp.csr_matrix(coef_) - def fit(self, X, y, max_iter=1000, tol=1e-4, **params): + def fit(self, X, y): """Fit current model with coordinate descent X is expected to be a sparse matrix. For maximum efficiency, use a sparse matrix in CSC format (scipy.sparse.csc_matrix) """ - self._set_params(**params) - X = sparse.csc_matrix(X) + X = sp.csc_matrix(X) y = np.asanyarray(y, dtype=np.float64) # NOTE: we are explicitly not centering the data the naive way to @@ -83,7 +85,7 @@ def fit(self, X, y, max_iter=1000, tol=1e-4, **params): coef_, self.dual_gap_, self.eps_ = \ cd_fast_sparse.enet_coordinate_descent( self.coef_, alpha, beta, X_data, X.indices, X.indptr, y, - max_iter, tol) + self.max_iter, self.tol) # update self.coef_ and self.sparse_coef_ consistently self._set_coef(coef_) @@ -109,8 +111,8 @@ def predict(self, X): array, shape = [n_samples] with the predicted real values """ # np.dot only works correctly if both arguments are sparse matrices - if not sparse.issparse(X): - X = sparse.csr_matrix(X) + if not sp.issparse(X): + X = sp.csr_matrix(X) return np.ravel(np.dot(self.sparse_coef_, X.T).todense() + self.intercept_) @@ -132,7 +134,8 @@ class Lasso(ElasticNet): data is assumed to be already centered. """ - - def __init__(self, alpha=1.0, fit_intercept=False): - super(Lasso, self).__init__(alpha=alpha, rho=1.0, - fit_intercept=fit_intercept) + def __init__(self, alpha=1.0, fit_intercept=False, normalize=False, + max_iter=1000, tol=1e-4): + super(Lasso, self).__init__( + alpha=alpha, rho=1.0, fit_intercept=fit_intercept, + normalize=normalize, max_iter=max_iter, tol=tol) diff --git a/scikits/learn/linear_model/sparse/logistic.py b/scikits/learn/linear_model/sparse/logistic.py index 7b41d2d6cf73e..6f01d7caca9bb 100644 --- a/scikits/learn/linear_model/sparse/logistic.py +++ b/scikits/learn/linear_model/sparse/logistic.py @@ -6,6 +6,7 @@ """ import numpy as np +import scipy.sparse as sp from ...base import ClassifierMixin from ...svm.sparse.base import SparseBaseLibLinear @@ -92,8 +93,7 @@ def predict_proba(self, X): The returned estimates for all classes are ordered by the label of classes. """ - import scipy.sparse - X = scipy.sparse.csr_matrix(X) + X = sp.csr_matrix(X) X.data = np.asanyarray(X.data, dtype=np.float64, order='C') probas = csr_predict_prob(X.shape[1], X.data, X.indices, X.indptr, self.raw_coef_, diff --git a/scikits/learn/linear_model/sparse/stochastic_gradient.py b/scikits/learn/linear_model/sparse/stochastic_gradient.py index fd3db80b2953e..10d8dccbeb605 100644 --- a/scikits/learn/linear_model/sparse/stochastic_gradient.py +++ b/scikits/learn/linear_model/sparse/stochastic_gradient.py @@ -4,7 +4,7 @@ """Implementation of Stochastic Gradient Descent (SGD) with sparse data.""" import numpy as np -from scipy import sparse +import scipy.sparse as sp from ...externals.joblib import Parallel, delayed from ..base import BaseSGDClassifier, BaseSGDRegressor @@ -128,13 +128,13 @@ def _set_coef(self, coef_): self.sparse_coef_ = None else: # sparse representation of the fitted coef for the predict method - self.sparse_coef_ = sparse.csr_matrix(coef_) + self.sparse_coef_ = sp.csr_matrix(coef_) def _fit_binary(self, X, y): """Fit a binary classifier. """ # interprete X as CSR matrix - X = sparse.csr_matrix(X) + X = sp.csr_matrix(X) # encode original class labels as 1 (classes[1]) or -1 (classes[0]). y_new = np.ones(y.shape, dtype=np.float64, order="C") * -1.0 @@ -174,7 +174,7 @@ def _fit_multiclass(self, X, y): all others (OVA: One Versus All). """ # interprete X as CSR matrix - X = sparse.csr_matrix(X) + X = sp.csr_matrix(X) # get sparse matrix datastructures X_data = np.array(X.data, dtype=np.float64, order="C") @@ -217,8 +217,8 @@ def decision_function(self, X): The signed 'distances' to the hyperplane(s). """ # np.dot only works correctly if both arguments are sparse matrices - if not sparse.issparse(X): - X = sparse.csr_matrix(X) + if not sp.issparse(X): + X = sp.csr_matrix(X) scores = np.asarray(np.dot(X, self.sparse_coef_.T).todense() + self.intercept_) if self.classes.shape[0] == 2: @@ -353,11 +353,11 @@ def _set_coef(self, coef_): self.sparse_coef_ = None else: # sparse representation of the fitted coef for the predict method - self.sparse_coef_ = sparse.csr_matrix(coef_) + self.sparse_coef_ = sp.csr_matrix(coef_) def _fit_regressor(self, X, y): # interprete X as CSR matrix - X = sparse.csr_matrix(X) + X = sp.csr_matrix(X) # get sparse matrix datastructures X_data = np.array(X.data, dtype=np.float64, order="C") @@ -400,8 +400,8 @@ def predict(self, X): Array containing the predicted class labels. """ # np.dot only works correctly if both arguments are sparse matrices - if not sparse.issparse(X): - X = sparse.csr_matrix(X) + if not sp.issparse(X): + X = sp.csr_matrix(X) scores = np.asarray(np.dot(X, self.sparse_coef_.T).todense() + self.intercept_).ravel() return scores diff --git a/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py b/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py index 3abecce590af6..90e5cee442698 100644 --- a/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py +++ b/scikits/learn/linear_model/sparse/tests/test_coordinate_descent.py @@ -1,5 +1,5 @@ import numpy as np -from scipy import sparse +import scipy.sparse as sp from numpy.testing import assert_array_almost_equal from numpy.testing import assert_almost_equal from numpy.testing import assert_equal @@ -12,7 +12,7 @@ def test_sparse_predict(): """Check that the predict method works with dense coef_ and sparse X""" - X = sparse.lil_matrix((3, 2)) + X = sp.lil_matrix((3, 2)) X[0, 0] = 1 X[0, 1] = 0.5 X[1, 0] = -1 @@ -25,7 +25,7 @@ def test_sparse_predict(): def test_lasso_zero(): """Check that the sparse lasso can handle zero data without crashing""" - X = sparse.csc_matrix((3, 1)) + X = sp.csc_matrix((3, 1)) y = [0, 0, 0] T = np.array([[1], [2], [3]]) clf = SparseLasso().fit(X, y) @@ -50,8 +50,8 @@ def test_enet_toy_list_input(): assert_array_almost_equal(pred, [2, 3, 4]) assert_almost_equal(clf.dual_gap_, 0) - clf = SparseENet(alpha=0.5, rho=0.3) - clf.fit(X, Y, max_iter=1000) + clf = SparseENet(alpha=0.5, rho=0.3, max_iter=1000) + clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) @@ -69,14 +69,14 @@ def test_enet_toy_explicit_sparse_input(): """Test ElasticNet for various parameters of alpha and rho with sparse X""" # training samples - X = sparse.lil_matrix((3, 1)) + X = sp.lil_matrix((3, 1)) X[0, 0] = -1 # X[1, 0] = 0 X[2, 0] = 1 Y = [-1, 0, 1] # just a straight line (the identity function) # test samples - T = sparse.lil_matrix((3, 1)) + T = sp.lil_matrix((3, 1)) T[0, 0] = 2 T[1, 0] = 3 T[2, 0] = 4 @@ -89,8 +89,8 @@ def test_enet_toy_explicit_sparse_input(): assert_array_almost_equal(pred, [2, 3, 4]) assert_almost_equal(clf.dual_gap_, 0) - clf = SparseENet(alpha=0.5, rho=0.3) - clf.fit(X, Y, max_iter=1000) + clf = SparseENet(alpha=0.5, rho=0.3, max_iter=1000) + clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) @@ -132,14 +132,16 @@ def test_sparse_enet_not_as_toy_dataset(): X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] - s_clf = SparseENet(alpha=0.1, rho=0.8, fit_intercept=False) - s_clf.fit(X_train, y_train, max_iter=max_iter, tol=1e-7) + s_clf = SparseENet(alpha=0.1, rho=0.8, fit_intercept=False, + max_iter=max_iter, tol=1e-7) + s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version - d_clf = DenseENet(alpha=0.1, rho=0.8, fit_intercept=False) - d_clf.fit(X_train, y_train, max_iter=max_iter, tol=1e-7) + d_clf = DenseENet(alpha=0.1, rho=0.8, fit_intercept=False, + max_iter=max_iter, tol=1e-7) + d_clf.fit(X_train, y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert d_clf.score(X_test, y_test) > 0.85 @@ -158,14 +160,15 @@ def test_sparse_lasso_not_as_toy_dataset(): X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] - s_clf = SparseLasso(alpha=0.1, fit_intercept=False) - s_clf.fit(X_train, y_train, max_iter=max_iter, tol=1e-7) + s_clf = SparseLasso(alpha=0.1, fit_intercept=False, + max_iter=max_iter, tol=1e-7) + s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version - d_clf = DenseLasso(alpha=0.1, fit_intercept=False) - d_clf.fit(X_train, y_train, max_iter=max_iter, tol=1e-7) + d_clf = DenseLasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) + d_clf.fit(X_train, y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert d_clf.score(X_test, y_test) > 0.85 diff --git a/scikits/learn/linear_model/tests/test_coordinate_descent.py b/scikits/learn/linear_model/tests/test_coordinate_descent.py index 34c92fc4e7d78..01ab83a6c48dc 100644 --- a/scikits/learn/linear_model/tests/test_coordinate_descent.py +++ b/scikits/learn/linear_model/tests/test_coordinate_descent.py @@ -83,20 +83,23 @@ def test_enet_toy(): assert_array_almost_equal(pred, [2, 3, 4]) assert_almost_equal(clf.dual_gap_, 0) - clf = ElasticNet(alpha=0.5, rho=0.3) - clf.fit(X, Y, max_iter=1000, precompute=False) + clf = ElasticNet(alpha=0.5, rho=0.3, max_iter=1000, + precompute=False) + clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) assert_almost_equal(clf.dual_gap_, 0) - clf.fit(X, Y, max_iter=1000, precompute=True) # with Gram + clf.set_params(max_iter=1000, precompute=True) + clf.fit(X, Y) # with Gram pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) assert_almost_equal(clf.dual_gap_, 0) - clf.fit(X, Y, max_iter=1000, precompute=np.dot(X.T, X)) # with Gram + clf.set_params(max_iter=1000, precompute=np.dot(X.T, X)) + clf.fit(X, Y) # with Gram pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0.50819], decimal=3) assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3) @@ -121,11 +124,11 @@ def test_lasso_path(): X = random_state.randn(n_samples, n_features) y = np.dot(X, w) - clf = LassoCV(n_alphas=100, eps=1e-3).fit(X, y, max_iter=max_iter) + clf = LassoCV(n_alphas=100, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha, 0.011, 2) - clf = LassoCV(n_alphas=100, eps=1e-3) - clf.fit(X, y, max_iter=max_iter, precompute=True) + clf = LassoCV(n_alphas=100, eps=1e-3, max_iter=max_iter, precompute=True) + clf.fit(X, y) assert_almost_equal(clf.alpha, 0.011, 2) # test set @@ -145,12 +148,13 @@ def test_enet_path(): X = random_state.randn(n_samples, n_features) y = np.dot(X, w) - clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95, cv=5) - clf.fit(X, y, max_iter=max_iter) + clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95, cv=5, max_iter=max_iter) + clf.fit(X, y) assert_almost_equal(clf.alpha, 0.00779, 2) - clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95, cv=5) - clf.fit(X, y, max_iter=max_iter, precompute=True) + clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95, cv=5, + max_iter=max_iter, precompute=True) + clf.fit(X, y) assert_almost_equal(clf.alpha, 0.00779, 2) # test set @@ -170,8 +174,9 @@ def test_path_parameters(): X = random_state.randn(n_samples, n_features) y = np.dot(X, w) - clf = ElasticNetCV(n_alphas=100, eps=1e-3, rho=0.95) - clf.fit(X, y, max_iter=max_iter, rho=0.5, n_alphas=50) # new params + clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter, + rho=0.5) + clf.fit(X, y) # new params assert_almost_equal(0.5, clf.rho) assert_equal(50, clf.n_alphas) assert_equal(50, len(clf.alphas)) diff --git a/scikits/learn/linear_model/tests/test_least_angle.py b/scikits/learn/linear_model/tests/test_least_angle.py index 7e84ce37ea56f..ceadcb33cf65c 100644 --- a/scikits/learn/linear_model/tests/test_least_angle.py +++ b/scikits/learn/linear_model/tests/test_least_angle.py @@ -104,20 +104,32 @@ def test_lasso_lars_vs_lasso_cd(verbose=False): X = 3 * diabetes.data alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso') - lasso_cd = linear_model.Lasso(fit_intercept=False) - for (c, a) in zip(lasso_path.T, alphas): + lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8) + for c, a in zip(lasso_path.T, alphas): lasso_cd.alpha = a - lasso_cd.fit(X, y, tol=1e-8) + lasso_cd.fit(X, y) error = np.linalg.norm(c - lasso_cd.coef_) assert error < 0.01 # similar test, with the classifiers for alpha in np.linspace(1e-2, 1 - 1e-2): clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y) - clf2 = linear_model.Lasso(alpha=alpha).fit(X, y, tol=1e-8) + clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8, + normalize=False).fit(X, y) err = np.linalg.norm(clf1.coef_ - clf2.coef_) assert err < 1e-3 + # same test, with normalized data + X = diabetes.data + alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso') + lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True, + tol=1e-8) + for c, a in zip(lasso_path.T, alphas): + lasso_cd.alpha = a + lasso_cd.fit(X, y) + error = np.linalg.norm(c - lasso_cd.coef_) + assert error < 0.01 + def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False): """ @@ -129,12 +141,25 @@ def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False): for alphas_min in alphas_min: alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso', alpha_min=0.9) - lasso_cd = linear_model.Lasso(fit_intercept=False) + lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8) lasso_cd.alpha = alphas[-1] - lasso_cd.fit(X, y, tol=1e-8) + lasso_cd.fit(X, y) error = np.linalg.norm(lasso_path[:, -1] - lasso_cd.coef_) assert error < 0.01 + alphas_min = [10, 0.9, 1e-4] + # same test, with normalization + for alphas_min in alphas_min: + alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso', + alpha_min=0.9) + lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True, + tol=1e-8) + lasso_cd.alpha = alphas[-1] + lasso_cd.fit(X, y) + error = np.linalg.norm(lasso_path[:, -1] - lasso_cd.coef_) + assert error < 0.01 + + def test_lars_add_features(verbose=False): """ diff --git a/scikits/learn/linear_model/tests/test_ridge.py b/scikits/learn/linear_model/tests/test_ridge.py index 1c229308cc04d..d5ca7fb31b849 100644 --- a/scikits/learn/linear_model/tests/test_ridge.py +++ b/scikits/learn/linear_model/tests/test_ridge.py @@ -101,15 +101,15 @@ def test_ridge_vs_lstsq(): y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) - ridge = Ridge(alpha=0.) - ols = LinearRegression() + ridge = Ridge(alpha=0., fit_intercept=False) + ols = LinearRegression(fit_intercept=False) ridge.fit(X, y) ols.fit (X, y) assert_almost_equal(ridge.coef_, ols.coef_) - ridge.fit(X, y, fit_intercept=False) - ols.fit (X, y, fit_intercept=False) + ridge.fit(X, y) + ols.fit (X, y) assert_almost_equal(ridge.coef_, ols.coef_) def _test_ridge_loo(filter_): @@ -182,7 +182,8 @@ def _test_ridge_cv(filter_): assert_equal(type(ridge_cv.intercept_), np.float64) cv = KFold(n_samples, 5) - ridge_cv.fit(filter_(X_diabetes), y_diabetes, cv=cv) + ridge_cv.set_params(cv=cv) + ridge_cv.fit(filter_(X_diabetes), y_diabetes) ridge_cv.predict(filter_(X_diabetes)) assert_equal(len(ridge_cv.coef_.shape), 1) @@ -216,10 +217,10 @@ def _test_ridge_classifiers(filter_): y_pred = clf.predict(filter_(X_iris)) assert np.mean(y_iris == y_pred) >= 0.8 - clf = RidgeClassifierCV() n_samples = X_iris.shape[0] cv = KFold(n_samples, 5) - clf.fit(filter_(X_iris), y_iris, cv=cv) + clf = RidgeClassifierCV(cv=cv) + clf.fit(filter_(X_iris), y_iris) y_pred = clf.predict(filter_(X_iris)) assert np.mean(y_iris == y_pred) >= 0.8 diff --git a/scikits/learn/manifold/isomap.py b/scikits/learn/manifold/isomap.py index df7b298b4eb5b..c78fed1707512 100644 --- a/scikits/learn/manifold/isomap.py +++ b/scikits/learn/manifold/isomap.py @@ -122,7 +122,7 @@ def reconstruction_error(self): evals = self.kernel_pca_.lambdas_ return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0] - def fit(self, X, y=None, **params): + def fit(self, X, y=None): """Compute the embedding vectors for data X Parameters @@ -134,11 +134,10 @@ def fit(self, X, y=None, **params): ------- self : returns an instance of self. """ - self._set_params(**params) self._fit_transform(X) return self - def fit_transform(self, X, y=None, **params): + def fit_transform(self, X, y=None): """Fit the model from data in X and transform X. Parameters @@ -151,7 +150,6 @@ def fit_transform(self, X, y=None, **params): ------- X_new: array-like, shape (n_samples, out_dim) """ - self._set_params(**params) self._fit_transform(X) return self.embedding_ diff --git a/scikits/learn/manifold/locally_linear.py b/scikits/learn/manifold/locally_linear.py index c12e7ed49c190..8daa86a1786a2 100644 --- a/scikits/learn/manifold/locally_linear.py +++ b/scikits/learn/manifold/locally_linear.py @@ -458,7 +458,7 @@ def _fit_transform(self, X): max_iter=self.max_iter, method=self.method, hessian_tol=self.hessian_tol, modified_tol=self.modified_tol) - def fit(self, X, y=None, **params): + def fit(self, X, y=None): """Compute the embedding vectors for data X Parameters @@ -470,11 +470,10 @@ def fit(self, X, y=None, **params): ------- self : returns an instance of self. """ - self._set_params(**params) self._fit_transform(X) return self - def fit_transform(self, X, y=None, **params): + def fit_transform(self, X, y=None): """Compute the embedding vectors for data X and transform X. Parameters @@ -486,11 +485,10 @@ def fit_transform(self, X, y=None, **params): ------- X_new: array-like, shape (n_samples, out_dim) """ - self._set_params(**params) self._fit_transform(X) return self.embedding_ - def transform(self, X, **params): + def transform(self, X): """ Transform new points into embedding space. @@ -507,7 +505,6 @@ def transform(self, X, **params): Because of scaling performed by this method, it is discouraged to use it together with methods that are not scale-invariant (like SVMs) """ - self._set_params(**params) X = np.atleast_2d(X) if not hasattr(self, 'ball_tree_'): raise ValueError('The model is not fitted') diff --git a/scikits/learn/manifold/tests/test_locally_linear.py b/scikits/learn/manifold/tests/test_locally_linear.py index 25bff361dad3e..01007484d72e8 100644 --- a/scikits/learn/manifold/tests/test_locally_linear.py +++ b/scikits/learn/manifold/tests/test_locally_linear.py @@ -31,7 +31,8 @@ def test_lle_simple_grid(): assert_lower(reconstruction_error, tol) for solver in eigen_solvers: - clf.fit(X, eigen_solver=solver) + clf.set_params(eigen_solver=solver) + clf.fit(X) assert clf.embedding_.shape[1] == out_dim reconstruction_error = np.linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 @@ -53,7 +54,7 @@ def test_lle_manifold(): X = np.c_[X, X[:, 0] ** 2 / 20] out_dim = 2 clf = manifold.LocallyLinearEmbedding(n_neighbors=5, out_dim=out_dim) - tol = .5 + tol = 1.5 N = neighbors.kneighbors_graph(X, clf.n_neighbors, mode='barycenter').toarray() @@ -61,7 +62,8 @@ def test_lle_manifold(): assert_lower(reconstruction_error, tol) for solver in eigen_solvers: - clf.fit(X, eigen_solver=solver) + clf.set_params(eigen_solver=solver) + clf.fit(X) assert clf.embedding_.shape[1] == out_dim reconstruction_error = np.linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 diff --git a/scikits/learn/neighbors.py b/scikits/learn/neighbors.py index 04361fbb98e01..616acacf18e8a 100644 --- a/scikits/learn/neighbors.py +++ b/scikits/learn/neighbors.py @@ -111,7 +111,7 @@ def __init__(self, n_neighbors=5, algorithm='auto', leaf_size=20): self.leaf_size = leaf_size self.algorithm = algorithm - def fit(self, X, y, **params): + def fit(self, X, y): """Fit the model using X, y as training data Parameters @@ -122,14 +122,11 @@ def fit(self, X, y, **params): y : {array-like, sparse matrix}, shape = [n_samples] Target values, array of integer values. - params : list of keyword, optional - Overwrite keywords from __init__ """ X = safe_asanyarray(X) if y is None: raise ValueError("y must not be None") self._y = np.asanyarray(y) - self._set_params(**params) if issparse(X): self.ball_tree = None @@ -142,7 +139,7 @@ def fit(self, X, y, **params): self._fit_X = X return self - def kneighbors(self, X, return_distance=True, **params): + def kneighbors(self, X, n_neighbors=None, return_distance=True): """Finds the K-neighbors of a point. Returns distance @@ -193,21 +190,22 @@ class from an array representing our data set and ask who's [2]]...) """ - self._set_params(**params) X = atleast2d_or_csr(X) + if n_neighbors is None: + n_neighbors = self.n_neighbors if self.ball_tree is None: dist = euclidean_distances(X, self._fit_X, squared=True) # XXX: should be implemented with a partial sort - neigh_ind = dist.argsort(axis=1)[:, :self.n_neighbors] + neigh_ind = dist.argsort(axis=1)[:, :n_neighbors] if not return_distance: return neigh_ind else: return dist.T[neigh_ind], neigh_ind else: - return self.ball_tree.query(X, self.n_neighbors, + return self.ball_tree.query(X, n_neighbors, return_distance=return_distance) - def predict(self, X, **params): + def predict(self, X): """Predict the class labels for the provided data Parameters @@ -225,7 +223,6 @@ def predict(self, X, **params): List of class labels (one for each data sample). """ X = atleast2d_or_csr(X) - self._set_params(**params) # get neighbors neigh_ind = self.kneighbors(X, return_distance=False) @@ -295,7 +292,7 @@ def __init__(self, n_neighbors=5, mode='mean', algorithm='auto', self.mode = mode self.algorithm = algorithm - def predict(self, X, **params): + def predict(self, X): """Predict the target for the provided data Parameters @@ -313,7 +310,6 @@ def predict(self, X, **params): List of target values (one for each data sample). """ X = atleast2d_or_csr(X) - self._set_params(**params) # compute nearest neighbors neigh_ind = self.kneighbors(X, return_distance=False) diff --git a/scikits/learn/pls.py b/scikits/learn/pls.py index 126918ed1b8ae..ab7602ed5a062 100644 --- a/scikits/learn/pls.py +++ b/scikits/learn/pls.py @@ -201,8 +201,7 @@ def __init__(self, n_components=2, deflation_mode="canonical", mode="A", self.tol = tol self.copy = copy - def fit(self, X, Y, **params): - self._set_params(**params) + def fit(self, X, Y): # copy since this will contains the residuals (deflated) matrices if self.copy: X = np.asanyarray(X, dtype=np.float).copy() @@ -472,8 +471,8 @@ class PLSRegression(_PLS): >>> from scikits.learn.pls import PLSCanonical, PLSRegression, CCA >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]] >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]] - >>> pls2 = PLSRegression() - >>> pls2.fit(X, Y, n_components=2) + >>> pls2 = PLSRegression(n_components=2) + >>> pls2.fit(X, Y) PLSRegression(scale=True, algorithm='nipals', max_iter=500, n_components=2, tol=1e-06, copy=True) >>> Y_pred = pls2.predict(X) @@ -577,8 +576,8 @@ class PLSCanonical(_PLS): >>> from scikits.learn.pls import PLSCanonical, PLSRegression, CCA >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]] >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]] - >>> plsca = PLSCanonical() - >>> plsca.fit(X, Y, n_components=2) + >>> plsca = PLSCanonical(n_components=2) + >>> plsca.fit(X, Y) PLSCanonical(scale=True, algorithm='nipals', max_iter=500, n_components=2, tol=1e-06, copy=True) >>> X_c, Y_c = plsca.transform(X, Y) @@ -685,8 +684,8 @@ class CCA(_PLS): >>> from scikits.learn.pls import PLSCanonical, PLSRegression, CCA >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]] >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]] - >>> cca = CCA() - >>> cca.fit(X, Y, n_components=1) + >>> cca = CCA(n_components=1) + >>> cca.fit(X, Y) CCA(scale=True, algorithm='nipals', max_iter=500, n_components=1, tol=1e-06, copy=True) >>> X_c, Y_c = cca.transform(X, Y) @@ -762,8 +761,7 @@ def __init__(self, n_components=2, scale=True, copy=True): self.n_components = n_components self.scale = scale - def fit(self, X, Y, **params): - self._set_params(**params) + def fit(self, X, Y): # copy since this will contains the centered data if self.copy: X = np.asanyarray(X).copy() diff --git a/scikits/learn/preprocessing/__init__.py b/scikits/learn/preprocessing/__init__.py index 63606d8067bbe..7b5191b690416 100644 --- a/scikits/learn/preprocessing/__init__.py +++ b/scikits/learn/preprocessing/__init__.py @@ -19,7 +19,7 @@ def _mean_and_std(X, axis=0, with_mean=True, with_std=True): """Compute mean and std dev for centering, scaling - Zero valued std components are reseted to 1.0 to avoid NaNs when scaling. + Zero valued std components are reset to 1.0 to avoid NaNs when scaling. """ X = np.asanyarray(X) Xr = np.rollaxis(X, axis) @@ -178,13 +178,33 @@ def transform(self, X, y=None, copy=None): X = np.asanyarray(X) if copy: X = X.copy() - # We are taking a view of the X array and modifying it if self.with_mean: X -= self.mean_ if self.with_std: X /= self.std_ return X + def inverse_transform(self, X, copy=None): + """Scale back the data to the original representation + + Parameters + ---------- + X : array-like with shape [n_samples, n_features] + The data used to scale along the features axis. + """ + copy = copy if copy is not None else self.copy + if sp.issparse(X): + raise NotImplementedError( + "Scaling is not yet implement for sparse matrices") + X = np.asanyarray(X) + if copy: + X = X.copy() + if self.with_std: + X *= self.std_ + if self.with_mean: + X += self.mean_ + return X + def normalize(X, norm='l2', axis=1, copy=True): """Normalize a dataset along any axis diff --git a/scikits/learn/preprocessing/tests/test_preprocessing.py b/scikits/learn/preprocessing/tests/test_preprocessing.py index 6cba3d33cd285..446a1c63adf54 100644 --- a/scikits/learn/preprocessing/tests/test_preprocessing.py +++ b/scikits/learn/preprocessing/tests/test_preprocessing.py @@ -35,12 +35,17 @@ def test_scaler(): """Test scaling of dataset along all axis""" # First test with 1D data X = np.random.randn(5) + X_orig_copy = X.copy() scaler = Scaler() X_scaled = scaler.fit(X).transform(X, copy=False) assert_array_almost_equal(X_scaled.mean(axis=0), 0.0) assert_array_almost_equal(X_scaled.std(axis=0), 1.0) + # check inverse transform + X_scaled_back = scaler.inverse_transform(X_scaled) + assert_array_almost_equal(X_scaled_back, X_orig_copy) + # Test with 1D list X = [0., 1., 2, 0.4, 1.] scaler = Scaler() @@ -65,6 +70,12 @@ def test_scaler(): # Check that X has not been copied assert X_scaled is not X + # check inverse transform + X_scaled_back = scaler.inverse_transform(X_scaled) + assert X_scaled_back is not X + assert X_scaled_back is not X_scaled + assert_array_almost_equal(X_scaled_back, X) + X_scaled = scale(X, axis=1, with_std=False) assert not np.any(np.isnan(X_scaled)) assert_array_almost_equal(X_scaled.mean(axis=1), 4 * [0.0]) @@ -108,6 +119,11 @@ def test_scaler_without_centering(): # Check that X has not been copied assert X_scaled is not X + X_scaled_back = scaler.inverse_transform(X_scaled) + assert X_scaled_back is not X + assert X_scaled_back is not X_scaled + assert_array_almost_equal(X_scaled_back, X) + X_scaled = scale(X, with_mean=False) assert not np.any(np.isnan(X_scaled)) @@ -117,6 +133,11 @@ def test_scaler_without_centering(): # Check that X has not been copied assert X_scaled is not X + X_scaled_back = scaler.inverse_transform(X_scaled) + assert X_scaled_back is not X + assert X_scaled_back is not X_scaled + assert_array_almost_equal(X_scaled_back, X) + def test_normalizer_l1(): rng = np.random.RandomState(0) diff --git a/scikits/learn/qda.py b/scikits/learn/qda.py index e709d3628b8bd..b275410dc50a0 100644 --- a/scikits/learn/qda.py +++ b/scikits/learn/qda.py @@ -63,7 +63,7 @@ class QDA(BaseEstimator, ClassifierMixin): def __init__(self, priors=None): self.priors = np.asarray(priors) if priors is not None else None - def fit(self, X, y, store_covariances=False, tol=1.0e-4, **params): + def fit(self, X, y, store_covariances=False, tol=1.0e-4): """ Fit the QDA model according to the given training data and parameters. @@ -78,7 +78,6 @@ def fit(self, X, y, store_covariances=False, tol=1.0e-4, **params): If True the covariance matrices are computed and stored in the self.covariances_ attribute. """ - self._set_params(**params) X = np.asanyarray(X) y = np.asanyarray(y) if X.ndim != 2: diff --git a/scikits/learn/svm/base.py b/scikits/learn/svm/base.py index 67883ac94000a..ed9a1d259194b 100644 --- a/scikits/learn/svm/base.py +++ b/scikits/learn/svm/base.py @@ -71,8 +71,7 @@ def _compute_kernel(self, X): dtype=np.float64, order='C') return X - def fit(self, X, y, class_weight=None, sample_weight=None, cache_size=100., - **params): + def fit(self, X, y, class_weight=None, sample_weight=None, cache_size=100.): """ Fit the SVM model according to the given training data and parameters. @@ -111,7 +110,6 @@ class frequencies. copied. """ - self._set_params(**params) X = np.asanyarray(X, dtype=np.float64, order='C') y = np.asanyarray(y, dtype=np.float64, order='C') @@ -349,7 +347,7 @@ def _get_solver_type(self): + solver_type) return self._solver_type_dict[solver_type] - def fit(self, X, y, class_weight=None, **params): + def fit(self, X, y, class_weight=None): """ Fit the model according to the given training data and parameters. @@ -372,7 +370,6 @@ def fit(self, X, y, class_weight=None, **params): self : object Returns self. """ - self._set_params(**params) self.class_weight, self.class_weight_label = \ _get_class_weight(class_weight, y) diff --git a/scikits/learn/svm/classes.py b/scikits/learn/svm/classes.py index c2db5a0e2ff01..a62de3a6716b8 100644 --- a/scikits/learn/svm/classes.py +++ b/scikits/learn/svm/classes.py @@ -275,7 +275,7 @@ class SVR(BaseLibSVM, RegressorMixin): penalty parameter C of the error term. epsilon : float, optional (default=0.1) - epsilon in the epsilon-SVR model. It specifies the espilon-tube + epsilon in the epsilon-SVR model. It specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value. @@ -455,9 +455,9 @@ def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=1e-3): - BaseLibSVM.__init__(self, 'epsilon_svr', kernel, degree, + BaseLibSVM.__init__(self, 'nu_svr', kernel, degree, gamma, coef0, tol, C, nu, - 0.0, shrinking, probability) + None, shrinking, probability) def fit(self, X, y, sample_weight=None, **params): """ diff --git a/scikits/learn/svm/sparse/base.py b/scikits/learn/svm/sparse/base.py index 6bc983719a2cd..df8a822863d73 100644 --- a/scikits/learn/svm/sparse/base.py +++ b/scikits/learn/svm/sparse/base.py @@ -47,8 +47,7 @@ def __init__(self, impl, kernel, degree, gamma, coef0, # only used in classification self.n_support_ = np.empty(0, dtype=np.int32, order='C') - def fit(self, X, y, class_weight=None, sample_weight=[], cache_size=100., - **params): + def fit(self, X, y, class_weight=None, sample_weight=[], cache_size=100.): """ Fit the SVM model according to the given training data and parameters. @@ -84,7 +83,6 @@ def fit(self, X, y, class_weight=None, sample_weight=[], cache_size=100., For maximum effiency, use a sparse matrix in csr format (scipy.sparse.csr_matrix) """ - self._set_params(**params) import scipy.sparse X = scipy.sparse.csr_matrix(X) @@ -222,7 +220,7 @@ def predict_proba(self, X): class SparseBaseLibLinear(BaseLibLinear): - def fit(self, X, y, class_weight=None, **params): + def fit(self, X, y, class_weight=None): """ Fit the model using X, y as training data. @@ -239,7 +237,6 @@ def fit(self, X, y, class_weight=None, **params): self : object Returns an instance of self. """ - self._set_params(**params) import scipy.sparse X = scipy.sparse.csr_matrix(X) diff --git a/scikits/learn/svm/sparse/classes.py b/scikits/learn/svm/sparse/classes.py index 004b65430d196..cbd5697316848 100644 --- a/scikits/learn/svm/sparse/classes.py +++ b/scikits/learn/svm/sparse/classes.py @@ -142,7 +142,7 @@ def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, epsilon=0.1, probability=False, tol=1e-3): - SparseBaseLibSVM.__init__(self, 'epsilon_svr', kernel, + SparseBaseLibSVM.__init__(self, 'nu_svr', kernel, degree, gamma, coef0, tol, C, nu, epsilon, shrinking, probability) diff --git a/scikits/learn/svm/tests/test_svm.py b/scikits/learn/svm/tests/test_svm.py index ded6936b71f97..6f200757c3c89 100644 --- a/scikits/learn/svm/tests/test_svm.py +++ b/scikits/learn/svm/tests/test_svm.py @@ -140,29 +140,13 @@ def test_SVR(): Test Support Vector Regression """ - clf = svm.SVR(kernel='linear') - clf.fit(X, Y) - pred = clf.predict(T) - - assert_array_almost_equal(clf.dual_coef_, [[-0.1, 0.1]]) - assert_array_almost_equal(clf.coef_, [[0.2, 0.2]]) - assert_array_almost_equal(clf.support_vectors_, [[-1, -1], [1, 1]]) - assert_array_equal(clf.support_, [1, 3]) - assert_array_almost_equal(clf.intercept_, [1.5]) - assert_array_almost_equal(pred, [1.1, 2.3, 2.5]) - - # the same with kernel='rbf' - clf = svm.SVR(kernel='rbf') - clf.fit(X, Y) - pred = clf.predict(T) - - assert_array_almost_equal(clf.dual_coef_, - [[-0.014, -0.515, -0.013, 0.515, 0.013, 0.013]], - decimal=3) - assert_raises(NotImplementedError, lambda: clf.coef_) - assert_array_almost_equal(clf.support_vectors_, X) - assert_array_almost_equal(clf.intercept_, [1.49997261]) - assert_array_almost_equal(pred, [1.10001274, 1.86682485, 1.73300377]) + diabetes = datasets.load_diabetes() + for clf in (svm.NuSVR(kernel='linear', nu=.4), + svm.SVR(kernel='linear', C=10.), + svm.sparse.NuSVR(kernel='linear', nu=.4), + svm.sparse.SVR(kernel='linear', C=10.)): + clf.fit(diabetes.data, diabetes.target) + assert clf.score(diabetes.data, diabetes.target) > 0.02 def test_oneclass(): diff --git a/scikits/learn/tests/test_base.py b/scikits/learn/tests/test_base.py index 53a422f5cbc0c..4d1fb4ab572b3 100644 --- a/scikits/learn/tests/test_base.py +++ b/scikits/learn/tests/test_base.py @@ -93,9 +93,9 @@ def test_get_params(): assert_true('a__d' in test._get_params(deep=True)) assert_true('a__d' not in test._get_params(deep=False)) - test._set_params(a__d=2) + test.set_params(a__d=2) assert test.a.d == 2 - assert_raises(AssertionError, test._set_params, a__a=2) + assert_raises(AssertionError, test.set_params, a__a=2) def test_is_classifier(): diff --git a/scikits/learn/tests/test_cross_val.py b/scikits/learn/tests/test_cross_val.py index c849ae1328748..c130d4178825d 100644 --- a/scikits/learn/tests/test_cross_val.py +++ b/scikits/learn/tests/test_cross_val.py @@ -23,8 +23,7 @@ class MockClassifier(BaseEstimator): def __init__(self, a=0): self.a = a - def fit(self, X, Y, **params): - self._set_params(**params) + def fit(self, X, Y): return self def predict(self, T): diff --git a/scikits/learn/tests/test_grid_search.py b/scikits/learn/tests/test_grid_search.py index e52e06ba2a082..83e18ab46766d 100644 --- a/scikits/learn/tests/test_grid_search.py +++ b/scikits/learn/tests/test_grid_search.py @@ -21,8 +21,7 @@ class MockClassifier(BaseEstimator): def __init__(self, foo_param=0): self.foo_param = foo_param - def fit(self, X, Y, **params): - self._set_params(**params) + def fit(self, X, Y): return self def predict(self, T): @@ -88,7 +87,7 @@ def test_grid_search_sparse_score_func(): clf = LinearSVC() cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score) # XXX: set refit to False due to a random bug when True (default) - cv.fit(X_[:180], y_[:180], refit=False) + cv.set_params(refit=False).fit(X_[:180], y_[:180]) y_pred = cv.predict(X_[180:]) C = cv.best_estimator.C @@ -96,7 +95,7 @@ def test_grid_search_sparse_score_func(): clf = SparseLinearSVC() cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score) # XXX: set refit to False due to a random bug when True (default) - cv.fit(X_[:180], y_[:180], refit=False) + cv.set_params(refit=False).fit(X_[:180], y_[:180]) y_pred2 = cv.predict(X_[180:]) C2 = cv.best_estimator.C diff --git a/scikits/learn/tests/test_naive_bayes.py b/scikits/learn/tests/test_naive_bayes.py index 81321931d1122..730b7e5866ab2 100644 --- a/scikits/learn/tests/test_naive_bayes.py +++ b/scikits/learn/tests/test_naive_bayes.py @@ -123,7 +123,8 @@ def test_discretenb_uniform_prior(): when fit_prior=False and class_prior=None""" for cls in [BernoulliNB, MultinomialNB]: - clf = cls(fit_prior=False) + clf = cls() + clf.set_params(fit_prior=False) clf.fit([[0], [0], [1]], [0, 0, 1]) prior = np.exp(clf.class_log_prior_) assert prior[0] == prior[1] diff --git a/scikits/learn/tests/test_neighbors.py b/scikits/learn/tests/test_neighbors.py index 14567e4dac5ad..66a459ab95757 100644 --- a/scikits/learn/tests/test_neighbors.py +++ b/scikits/learn/tests/test_neighbors.py @@ -101,16 +101,17 @@ def test_neighbors_iris(): """ for s in ('auto', 'ball_tree', 'brute', 'inplace'): - clf = neighbors.NeighborsClassifier() - clf.fit(iris.data, iris.target, n_neighbors=1, algorithm=s) + clf = neighbors.NeighborsClassifier(n_neighbors=1, algorithm=s) + clf.fit(iris.data, iris.target) assert_array_equal(clf.predict(iris.data), iris.target) - clf.fit(iris.data, iris.target, n_neighbors=9, algorithm=s) + clf.set_params(n_neighbors=9, algorithm=s) + clf.fit(iris.data, iris.target) assert np.mean(clf.predict(iris.data) == iris.target) > 0.95 for m in ('barycenter', 'mean'): - rgs = neighbors.NeighborsRegressor() - rgs.fit(iris.data, iris.target, mode=m, algorithm=s) + rgs = neighbors.NeighborsRegressor(mode=m) + rgs.fit(iris.data, iris.target) assert np.mean( rgs.predict(iris.data).round() == iris.target) > 0.95 @@ -125,8 +126,8 @@ def _test_neighbors_sparse_regression(): for sparse1 in SPARSE_TYPES: for m in ('barycenter', 'mean'): - rgs = neighbors.NeighborsRegressor() - rgs.fit(sparse1(iris.data), iris.target, mode=m) + rgs = neighbors.NeighborsRegressor(mode=m) + rgs.fit(sparse1(iris.data), iris.target) for sparse2 in SPARSE_OR_DENSE: data2 = sparse2(iris.data) assert np.mean(rgs.predict(data2).round() == iris.target) > 0.95 diff --git a/scikits/learn/tests/test_pipeline.py b/scikits/learn/tests/test_pipeline.py index 98911a44c0a6b..42e1f7f78fa1e 100644 --- a/scikits/learn/tests/test_pipeline.py +++ b/scikits/learn/tests/test_pipeline.py @@ -44,7 +44,7 @@ def test_pipeline_init(): dict(svc__a=None, svc__b=None, svc=clf)) # Check that params are set - pipe._set_params(svc__a=0.1) + pipe.set_params(svc__a=0.1) assert_equal(clf.a, 0.1) # Smoke test the repr: repr(pipe) @@ -55,13 +55,13 @@ def test_pipeline_init(): pipe = Pipeline([('anova', filter1), ('svc', clf)]) # Check that params are set - pipe._set_params(svc__C=0.1) + pipe.set_params(svc__C=0.1) assert_equal(clf.C, 0.1) # Smoke test the repr: repr(pipe) # Check that params are not set when naming them wrong - assert_raises(AssertionError, pipe._set_params, anova__C=0.1) + assert_raises(AssertionError, pipe.set_params, anova__C=0.1) # Test clone pipe2 = clone(pipe) diff --git a/scikits/learn/tests/test_pls.py b/scikits/learn/tests/test_pls.py index 75f9c4e506877..e66ea38dac226 100644 --- a/scikits/learn/tests/test_pls.py +++ b/scikits/learn/tests/test_pls.py @@ -14,10 +14,10 @@ def test_pls(): # =========================================================== # Compare 2 algo.: nipals vs. svd # ------------------------------ - pls_bynipals = pls.PLSCanonical() - pls_bynipals.fit(X, Y, n_components=n_components) - pls_bysvd = pls.PLSCanonical(algorithm="svd") - pls_bysvd.fit(X, Y, n_components=n_components) + pls_bynipals = pls.PLSCanonical(n_components=n_components) + pls_bynipals.fit(X, Y) + pls_bysvd = pls.PLSCanonical(algorithm="svd", n_components=n_components) + pls_bysvd.fit(X, Y) # check that the loading vectors are highly correlated assert_array_almost_equal( [np.abs(np.corrcoef(pls_bynipals.x_loadings_[:, k], @@ -35,8 +35,8 @@ def test_pls(): # Check PLS properties (with n_components=X.shape[1]) # --------------------------------------------------- - plsca = pls.PLSCanonical() - plsca.fit(X, Y, n_components=X.shape[1]) + plsca = pls.PLSCanonical(n_components=X.shape[1]) + plsca.fit(X, Y) T = plsca.x_scores_ P = plsca.x_loadings_ Wx = plsca.x_weights_ @@ -81,8 +81,8 @@ def check_ortho(M, err_msg): # "Non regression test" on canonical PLS # -------------------------------------- - pls_bynipals = pls.PLSCanonical() - pls_bynipals.fit(X, Y, n_components=n_components) + pls_bynipals = pls.PLSCanonical(n_components=n_components) + pls_bynipals.fit(X, Y) pls_ca = pls_bynipals x_loadings = np.array( @@ -111,8 +111,8 @@ def check_ortho(M, err_msg): # 2) Regression PLS (PLS2): "Non regression test" # =============================================== - pls2 = pls.PLSRegression() - pls2.fit(X, Y, n_components=n_components) + pls2 = pls.PLSRegression(n_components=n_components) + pls2.fit(X, Y) x_loadings = np.array( [[-0.66591531, -0.01976472], diff --git a/scikits/learn/utils/__init__.py b/scikits/learn/utils/__init__.py index ec31dc4e61da8..4921a34e80175 100644 --- a/scikits/learn/utils/__init__.py +++ b/scikits/learn/utils/__init__.py @@ -23,6 +23,41 @@ def safe_asanyarray(X, dtype=None, order=None): return X +def as_float_array(X, overwrite_X=False): + """ + Converts a numpy array to type np.float + + The new dtype will be float32 or np.float64, depending on the original type. + The function can create a copy or modify the argument depending + of the argument overwrite_X + + WARNING : If X is not of type float, then a copy of X with the right type + will be returned + + Parameters + ---------- + X : array + + overwrite_X : bool, optional + if False, a copy of X will be created + + Returns + ------- + X : array + An array of type np.float + """ + if X.dtype in [np.float32, np.float64]: + if overwrite_X: + return X + else: + return X.copy() + if X.dtype == np.int32: + X = X.astype(np.float32) + else: + X = X.astype(np.float64) + return X + + def atleast2d_or_csr(X): """Like numpy.atleast_2d, but converts sparse matrices to CSR format""" X = X.tocsr() if sp.issparse(X) else np.atleast_2d(X) @@ -112,6 +147,15 @@ def check_arrays(*arrays, **options): return checked_arrays +def warn_if_not_float(X, estimator='This algorithm'): + """Warning utility function to check that data type is floating point""" + if not isinstance(estimator, basestring): + estimator = estimator.__class__.__name__ + if X.dtype.kind != 'f': + warnings.warn("%s assumes floating point values as input, " + "got %s" % (estimator, X.dtype)) + + class deprecated(object): """Decorator to mark a function or class as deprecated. diff --git a/scikits/learn/utils/tests/test___init__.py b/scikits/learn/utils/tests/test___init__.py new file mode 100644 index 0000000000000..fc0368ece2374 --- /dev/null +++ b/scikits/learn/utils/tests/test___init__.py @@ -0,0 +1,23 @@ +import numpy as np +from .. import as_float_array + + +def test_as_float_array(): + """ + Test function for as_float_array + """ + X = np.ones((3, 10), dtype=np.int32) + X = X + np.arange(10, dtype=np.int32) + # Checks that the return type is ok + X2 = as_float_array(X, overwrite_X=True) + np.testing.assert_equal(X2.dtype, np.float32) + # Another test + X = X.astype(np.int64) + X2 = as_float_array(X, overwrite_X=False) + # Checking that the array wasn't overwritten + assert as_float_array(X, False) is not X + # Checking that the new type is ok + np.testing.assert_equal(X2.dtype, np.float64) + # Here, X is of the right type, it shouldn't be modified + X = np.ones((3, 2), dtype=np.float32) + assert as_float_array(X, overwrite_X=True) is X