finalised TODO requests

PyDataBlog · PyDataBlog · commit 64e20f95679f · 2020-03-20T12:58:14.000+01:00
diff --git a/README.md b/README.md
@@ -25,23 +25,38 @@ ________________________________________________________________________________
 _________________________________________________________________________________________________________
 
 ### Motivation
+It's a funny story actually led to the development of this package.
+What started off as a personal toy project trying to re-construct the K-Means algorithm in  native Julia blew up after into a heated discussion on the Julia Discourse forums after I asked for Julia optimizaition tips. Long story short, Julia community is an amazing one! Andrey Oskin offered his help and together, we decided to push the speed limits of Julia with a parallel implementation of the most famous clustering algorithm. The initial results were mind blowing so we have decided to tidy up the implementation and share with the world. 
 
+Say hello to our baby, `ParallelKMeans`!
 _________________________________________________________________________________________________________
 
 ### Installation
+You can grab the latest stable version of this package by simply running in Julia.
+Don't forget to Julia's package manager with `]`
 
-```bash
+```julia
+pkg> add TextAnalysis
+```
+
+For the few (and selected) brave ones, one can simply grab the current experimental features by simply adding the experimental branch to your development environment after invoking the package manager with `]`:
 
+```julia
+dev git@github.com:PyDataBlog/ParallelKMeans.jl.git
 ```
 
+Don't forget to checkout the experimental branch and you are good to go with bleeding edge features and breaks!
+```bash
+git checkout experimental
+```
 _________________________________________________________________________________________________________
 
 ### Features
 
 - Lightening fast implementation of Kmeans clustering algorithm even on a single thread in native Julia.
 - Support for multi-theading implementation of Kmeans clustering algorithm.
 - Kmeans++ initialization for faster and better convergence.
-- Feature 4
+- Modified version of Elkan's Triangle inequality to speed up K-Means algorithm.
 
 _________________________________________________________________________________________________________
 
@@ -51,7 +66,7 @@ ________________________________________________________________________________
 
 ### Pending Features
 - [X] Implementation of Triangle inequality based on [Elkan C. (2003) "Using the Triangle Inequality to Accelerate
--Mean"](https://www.aaai.org/Papers/ICML/2003/ICML03-022.pdf)
+K-Means"](https://www.aaai.org/Papers/ICML/2003/ICML03-022.pdf)
 - [ ] Support for DataFrame inputs.
 - [ ] Refactoring and finalizaiton of API desgin.
 - [ ] GPU support.
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -1,4 +1,17 @@
-# ParallelKMeans.jl
+# ParallelKMeans.jl Documentation
+
+```@contents
+```
+
+## Installation
+
+
+## Features
+
+
+## How To Use
+
+
 
 ```@index
 ```
diff --git a/src/ParallelKMeans.jl b/src/ParallelKMeans.jl
@@ -84,7 +84,6 @@ end
 MultiThread() = MultiThread(Threads.nthreads())  # Uses all avaialble cores by default
 
 
-
 """
     colwise!(target, x, y, mode)
 
@@ -98,8 +97,6 @@ following modes supported:
 
 This dispatch handles the colwise calculation for single threads.
 """
-colwise!(target, x, y) = colwise!(target, x, y, SingleThread())
-
 function colwise!(target, x, y, mode::SingleThread)
     @inbounds for j in axes(x, 2)
         res = 0.0
@@ -111,6 +108,10 @@ function colwise!(target, x, y, mode::SingleThread)
 end
 
 
+# TODO: Why is this being dispatched here and not in a function?
+colwise!(target, x, y) = colwise!(target, x, y, SingleThread())
+
+
 """
     spliiter(n, k)
 
@@ -182,8 +183,7 @@ design matrix (X) and desired groups (k) that a user supplies.
 `k-means++` algorithm is used by default with the normal random selection
 of centroids from X used if any other string is attempted.
 
-A tuple representing the centroids, number of rows, & columns respecitively
-is returned.
+A named tuple representing centroids and indices respecitively is returned.
 """
 function smart_init(X::Array{Float64, 2}, k::Int, mode::T = SingleThread();
         init::String="k-means++") where {T <: CalculationMode}
@@ -366,7 +366,9 @@ kmeans(alg::Lloyd, design_matrix::Array{Float64, 2}, k::Int, mode::T = SingleThr
 """
 function kmeans(alg::LightElkan, design_matrix::Array{Float64, 2}, k::Int, mode::T = SingleThread();
                 k_init::String = "k-means++", max_iters::Int = 300, tol = 1e-6, verbose::Bool = true, init = nothing) where {T <: CalculationMode}
+    # Get the dimensions of the design_matrix
     nrow, ncol = size(design_matrix)
+
     centroids = init == nothing ? smart_init(design_matrix, k, mode, init=k_init).centroids : deepcopy(init)
     new_centroids, centroids_cnt = create_containers(k, nrow, mode)
     # new_centroids = similar(centroids)
@@ -437,6 +439,7 @@ end
 """
 function update_centroids!(centroids, new_centroids, centroids_cnt, labels,
         design_matrix, mode::MultiThread)
+
     mode.n == 1 && return update_centroids!(centroids, new_centroids[1], centroids_cnt[1], labels,
             design_matrix, SingleThread())