@@ -131,38 +131,38 @@ function smart_init(X::Array{Float64, 2}, k::Int, mode::T = SingleThread();
131131 rand_idx = rand (1 : n_row)
132132 rand_indices[1 ] = rand_idx
133133 centroids[1 , :] .= X[rand_idx, :]
134+ centroids[k, :] .= 0.0
134135 distances = Array {Float64} (undef, n_row, 1 )
135136 new_distances = Array {Float64} (undef, n_row, 1 )
136137
138+ # TODO : Add `colwise` function (or use it from `Distances` package)
137139 # compute distances from the first centroid chosen to all the other data points
138140 first_centroid_matrix = convert (Matrix, centroids[1 , :]' )
139141
140142 # flatten distances
141- # distances = vec(pairwise(SqEuclidean(), X, first_centroid_matrix, dims = 1))
142143 pairwise! (distances, X, first_centroid_matrix, mode)
144+ distances[rand_idx] = 0.0
143145
144146 for i = 2 : k
145147 # choose the next centroid, the probability for each data point to be chosen
146148 # is directly proportional to its squared distance from the nearest centroid
147- r_idx = sample (1 : n_row, ProbabilityWeights ( vec (distances) ))
149+ r_idx = wsample (1 : n_row, vec (distances))
148150 rand_indices[i] = r_idx
149151 centroids[i, :] .= X[r_idx, :]
150152
151- # Ignore setting the last centroid to help the separation of centroids
152- if i == (k- 1 )
153- break
154- end
153+ # no need for final distance update
154+ i == k && break
155155
156156 # compute distances from the centroids to all data points
157157 current_centroid_matrix = convert (Matrix, centroids[i, :]' )
158158 # new_distances = vec(pairwise(SqEuclidean(), X, current_centroid_matrix, dims = 1))
159159 pairwise! (new_distances, X, first_centroid_matrix, mode)
160160
161161 # and update the squared distance as the minimum distance to all centroid
162- # distances = minimum([distances, new_distances])
163162 for i in 1 : n_row
164163 distances[i, 1 ] = distances[i, 1 ] < new_distances[i, 1 ] ? distances[i, 1 ] : new_distances[i, 1 ]
165164 end
165+ distances[r_idx, 1 ] = 0.0
166166 end
167167
168168 else
0 commit comments