Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MRG: Speed up euclidean_distances with Cython #1006

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def _k_init(X, n_clusters, n_local_trials=None, random_state=None,
if x_squared_norms is None:
x_squared_norms = _squared_norms(X)
closest_dist_sq = euclidean_distances(
centers[0], X, Y_norm_squared=x_squared_norms, squared=True)
centers[0], X, Y_norm_precomputed=x_squared_norms, squared=True)
current_pot = closest_dist_sq.sum()

# Pick the remaining n_clusters-1 points
Expand All @@ -106,7 +106,8 @@ def _k_init(X, n_clusters, n_local_trials=None, random_state=None,

# Compute distances to center candidates
distance_to_candidates = euclidean_distances(
X[candidate_ids], X, Y_norm_squared=x_squared_norms, squared=True)
X[candidate_ids], X, Y_norm_precomputed=x_squared_norms,
squared=True)

# Decide which candidate is the best
best_candidate = None
Expand Down Expand Up @@ -415,7 +416,8 @@ def _squared_norms(X):
def _labels_inertia_precompute_dense(X, x_squared_norms, centers):
n_samples = X.shape[0]
k = centers.shape[0]
distances = euclidean_distances(centers, X, x_squared_norms,
distances = euclidean_distances(centers, X,
Y_norm_precomputed=x_squared_norms,
squared=True)
labels = np.empty(n_samples, dtype=np.int)
labels.fill(-1)
Expand Down
9 changes: 5 additions & 4 deletions sklearn/manifold/mds.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@ def _smacof_single(similarities, metric=True, n_components=2, init=None,
X = init

old_stress = None
dis = np.empty((n_samples, n_samples), dtype=np.float64)
for it in range(max_iter):
# Compute distance and monotonic regression
dis = euclidean_distances(X)
euclidean_distances(X, out=dis)

if metric:
disparities = similarities
Expand Down Expand Up @@ -120,16 +121,16 @@ def _smacof_single(similarities, metric=True, n_components=2, init=None,
B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)
X = 1. / n_samples * np.dot(B, X)

dis = np.sqrt((X ** 2).sum(axis=1)).sum()
dis_sum = np.sqrt((X ** 2).sum(axis=1)).sum()
if verbose == 2:
print 'it: %d, stress %s' % (it, stress)
if old_stress is not None:
if(old_stress - stress / dis) < eps:
if(old_stress - stress / dis_sum) < eps:
if verbose:
print 'breaking at iteration %d with stress %s' % (it,
stress)
break
old_stress = stress / dis
old_stress = stress / dis_sum

return X, stress

Expand Down