From 48755cb573f368e1509fb0450bb2d2a956334f44 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 22 Oct 2025 08:36:30 +1300 Subject: [PATCH 1/8] bump StatsBase="0.32, 0.33, 0.34" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index bd48f3c..8446f3f 100644 --- a/Project.toml +++ b/Project.toml @@ -16,7 +16,7 @@ CategoricalDistributions = "0.1.9" Distances = "0.9,0.10" MLJModelInterface = "1.4" MultivariateStats = "0.10" -StatsBase = "0.32, 0.33" +StatsBase = "0.32, 0.33, 0.34" julia = "1.6" [extras] From 1f3de3c2df80d45915b532de8649ccad6c5edd95 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 22 Oct 2025 08:37:38 +1300 Subject: [PATCH 2/8] bump 0.5.3 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8446f3f..86e80ae 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJMultivariateStatsInterface" uuid = "1b6a4a23-ba22-4f51-9698-8599985d3728" authors = ["Anthony D. Blaom ", "Thibaut Lienart ", "Okon Samuel "] -version = "0.5.2" +version = "0.5.3" [deps] CategoricalDistributions = "af321ab8-2d2e-40a6-b165-3d674595d28e" From 51be8018abd0152b256bbad464ebfd805bb1ef6b Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 22 Oct 2025 08:39:37 +1300 Subject: [PATCH 3/8] bump CategoricalDistributions = "0.2" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 86e80ae..781e2e5 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,7 @@ MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -CategoricalDistributions = "0.1.9" +CategoricalDistributions = "0.2" Distances = "0.9,0.10" MLJModelInterface = "1.4" MultivariateStats = "0.10" From a279a5bfbababddf56c9e1c38e3dc32d8dc1f202 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 22 Oct 2025 08:40:55 +1300 Subject: [PATCH 4/8] whitespace --- src/utils.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 619be15..7d15b96 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,14 +1,14 @@ # internal method essentially the same as Base.replace!(y, (z .=> r)...) # but more efficient. -# Similar to the behaviour of `Base.replace!` if `z` contain repetions of values in +# Similar to the behaviour of `Base.replace!` if `z` contain repetions of values in # `y` then only the transformation corresponding to the first occurence is performed # i.e `_replace!([1,5,3], [1,4], 4:5)` would return `[4,5,3]` rather than `[5,5,3]` # (which replaces `1=>4` and then `4=>5`) function _replace!(y::AbstractVector, z::AbstractVector, r::AbstractVector) - length(r) == length(z) || + length(r) == length(z) || throw(DimensionMismatch("`z` and `r` has to be of the same length")) @inbounds for i in eachindex(y) - for j in eachindex(z) + for j in eachindex(z) isequal(z[j], y[i]) && (y[i] = r[j]; break) end end @@ -35,7 +35,7 @@ Implementation taken from NNlib.jl. """ function softmax!(X::AbstractMatrix{<:Real}) max_ = maximum(X, dims=2) - X .= exp.(X .- max_) + X .= exp.(X .- max_) X ./= sum(X, dims=2) - return X + return X end From 1c0b061559519af133154ce0e2fe04c1cbe5d7a2 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 22 Oct 2025 09:54:51 +1300 Subject: [PATCH 5/8] add StatisticalMeasures as test dep --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 781e2e5..7f7655f 100644 --- a/Project.toml +++ b/Project.toml @@ -24,7 +24,8 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +StatisticalMeasures = "a19d573c-0a75-4610-95b3-7071388c7541" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Dates", "MLJBase", "Random", "StableRNGs", "Test"] +test = ["Dates", "MLJBase", "Random", "StableRNGs", "StatisticalMeasures", "Test"] From 95af562e74cb4530cecd4e7659b23698f5e58be8 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 22 Oct 2025 09:55:29 +1300 Subject: [PATCH 6/8] adapt to changes in levels behaviour in CategoricalDistributions --- src/models/discriminant_analysis.jl | 23 +++++++++++++---------- test/models/discriminant_analysis.jl | 20 ++++++++++---------- test/runtests.jl | 2 ++ 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/models/discriminant_analysis.jl b/src/models/discriminant_analysis.jl index 92d041f..13e608f 100644 --- a/src/models/discriminant_analysis.jl +++ b/src/models/discriminant_analysis.jl @@ -20,7 +20,8 @@ const ERR_LONE_TARGET_CLASS = ArgumentError( ) function _check_lda_data(model, X, y) - pool = MMI.classes(y[1]) # Class list containing entries in pool of `y`. + pool = CategoricalDistributions.levels(y[1]) # Class list containing entries in pool + # of `y`. classes_seen = unique(y) # Class list of actual entries in seen in `y`. nc = length(classes_seen) # Number of actual classes seen in `y`. @@ -109,7 +110,7 @@ function MMI.predict(m::LDA, (core_res, classes_seen, pool), Xnew) Pr .*= -1 # apply a softmax transformation softmax!(Pr) - return MMI.UnivariateFinite(classes_seen, Pr, pool=pool) + return MMI.UnivariateFinite(classes_seen, Pr) end metadata_model( @@ -160,7 +161,7 @@ function _check_prob01(priors) end @inline function _check_lda_priors(priors::UnivariateFinite, classes_seen, pool) - if MMI.classes(priors) != pool + if CategoricalDistributions.levels(priors) != pool throw( ArgumentError( "UnivariateFinite `priors` must have common pool with training target." @@ -236,7 +237,7 @@ function MMI.fitted_params(::BayesianLDA, (core_res, classes_seen, pool, priors return ( classes = classes_seen, projection_matrix=MS.projection(core_res), - priors=MMI.UnivariateFinite(classes_seen, priors, pool=pool) + priors=MMI.UnivariateFinite(classes_seen, priors) ) end @@ -261,7 +262,7 @@ function MMI.predict(m::BayesianLDA, (core_res, classes_seen, pool, priors, n), # apply a softmax transformation to convert Pr to a probability matrix softmax!(Pr) - return MMI.UnivariateFinite(classes_seen, Pr, pool=pool) + return MMI.UnivariateFinite(classes_seen, Pr) end function MMI.transform(m::T, (core_res, ), X) where T<:Union{LDA, BayesianLDA} @@ -353,7 +354,7 @@ function MMI.predict(m::SubspaceLDA, (core_res, outdim, classes_seen, pool), Xne Pr .*= -1 # apply a softmax transformation softmax!(Pr) - return MMI.UnivariateFinite(classes_seen, Pr, pool=pool) + return MMI.UnivariateFinite(classes_seen, Pr) end metadata_model( @@ -430,7 +431,7 @@ function MMI.fitted_params( return ( classes = classes_seen, projection_matrix=core_res.projw * view(core_res.projLDA, :, 1:outdim), - priors=MMI.UnivariateFinite(classes_seen, priors, pool=pool) + priors=MMI.UnivariateFinite(classes_seen, priors) ) end @@ -470,7 +471,7 @@ function MMI.predict( # apply a softmax transformation to convert Pr to a probability matrix softmax!(Pr) - return MMI.UnivariateFinite(classes_seen, Pr, pool=pool) + return MMI.UnivariateFinite(classes_seen, Pr) end function MMI.transform( @@ -724,7 +725,8 @@ The fields of `fitted_params(mach)` are: section below). - `priors`: The class priors for classification. As inferred from training target `y`, if - not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`. + not user-specified. A `UnivariateFinite` object with levels (classes) consistent with + `levels(y)`. # Report @@ -954,7 +956,8 @@ The fields of `fitted_params(mach)` are: section below). - `priors`: The class priors for classification. As inferred from training target `y`, if - not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`. + not user-specified. A `UnivariateFinite` object with levels (classes) consistent with + `levels(y)`. # Report diff --git a/test/models/discriminant_analysis.jl b/test/models/discriminant_analysis.jl index 7c689e0..9c504aa 100644 --- a/test/models/discriminant_analysis.jl +++ b/test/models/discriminant_analysis.jl @@ -73,15 +73,15 @@ end ytest = selectrows(y, test) BLDA_model = BayesianLDA(regcoef=0) - + ## Check model `fit` fitresult, cache, report = fit(BLDA_model, 1, Xtrain, ytrain) classes_seen, projection_matrix, priors = fitted_params(BLDA_model, fitresult) - @test classes(priors) == classes(y) + @test levels(priors) == levels(y) @test pdf.(priors, support(priors)) == [491/998, 507/998] @test classes_seen == ["Up", "Down"] @test round.((report.class_means)', sigdigits = 3) == [-0.0395 -0.0313; 0.0428 0.0339] #[0.0428 0.0339; -0.0395 -0.0313] - + ## Check model `predict` preds = predict(BLDA_model, fitresult, Xtest) mce = cross_entropy(preds, ytest) |> mean @@ -94,7 +94,7 @@ end fitresult1, cache1, report1 = fit(BLDA_model1, 1, Xtrain, ytrain) classes_seen1, projection_matrix1, priors1 = fitted_params(BLDA_model1, fitresult1) BLDA_model2 = BayesianLDA( - regcoef=0, priors=UnivariateFinite(classes(ytrain), [491/998, 507/998]) + regcoef=0, priors=UnivariateFinite(levels(ytrain), [491/998, 507/998]) ) fitresult2, cache2, report2 = fit(BLDA_model2, 1, Xtrain, ytrain) classes_seen2, projection_matrix2, priors2 = fitted_params(BLDA_model2, fitresult2) @@ -156,7 +156,7 @@ end LDA_model1, fitresult1 ) LDA_model2 = BayesianSubspaceLDA( - priors=UnivariateFinite(classes(y), [1/3, 1/3, 1/3]) + priors=UnivariateFinite(levels(y), [1/3, 1/3, 1/3]) ) fitresult2, cache2, report2 = fit(LDA_model2, 1, X, y) classes_seen2, projection_matrix2, priors2 = fitted_params( @@ -231,24 +231,24 @@ end y2 = y[[1,2,1,2]] @test_throws ArgumentError fit(model, 1, X, y2) - ## Check to make sure error is thrown if UnivariateFinite `priors` doesn't have + ## Check to make sure error is thrown if UnivariateFinite `priors` doesn't have ## common pool with target vector used in training. model = BayesianLDA(priors=UnivariateFinite([0.1, 0.5, 0.4], pool=missing)) @test_throws ArgumentError fit(model, 1, X, y) - ## Check to make sure error is thrown if keys used in `priors` dictionary are in pool + ## Check to make sure error is thrown if keys used in `priors` dictionary are in pool ## of training target used in training. model = BayesianLDA(priors=Dict("apples" => 0.1, "oranges"=>0.5, "bannana"=>0.4)) @test_throws ArgumentError fit(model, 1, X, y) ## Check to make sure error is thrown if sum(`priors`) isn't approximately equal to 1. - model = BayesianLDA(priors=UnivariateFinite(classes(y), [0.1, 0.5, 0.4, 0.2])) + model = BayesianLDA(priors=UnivariateFinite(levels(y), [0.1, 0.5, 0.4, 0.2])) @test_throws ArgumentError fit(model, 1, X, y) ## Check to make sure error is thrown if `priors .< 0` or `priors .> 1`. - model = BayesianLDA(priors=Dict(classes(y) .=> [-0.1, 0.0, 1.0, 0.1])) + model = BayesianLDA(priors=Dict(levels(y) .=> [-0.1, 0.0, 1.0, 0.1])) @test_throws ArgumentError fit(model, 1, X, y) - model = BayesianLDA(priors=Dict(classes(y) .=> [1.1, 0.0, 0.0, -0.1])) + model = BayesianLDA(priors=Dict(levels(y) .=> [1.1, 0.0, 0.0, -0.1])) @test_throws ArgumentError fit(model, 1, X, y) X2 = (x=rand(5),) diff --git a/test/runtests.jl b/test/runtests.jl index 5a1097f..933491b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,9 +2,11 @@ import Dates import MLJMultivariateStatsInterface: _replace! import MultivariateStats import Random +import CategoricalDistributions.levels using LinearAlgebra using MLJBase +using StatisticalMeasures using MLJMultivariateStatsInterface using StableRNGs using Test From 263bf6195251e31db5b35eefe540150b1a042e3f Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 22 Oct 2025 09:58:30 +1300 Subject: [PATCH 7/8] update to julia-actions/cache@v2 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b5d87f..6dbd159 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ jobs: with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 + - uses: julia-actions/cache@v2 env: cache-name: cache-artifacts with: From 4c81c1a16f982c9ecac8116e8c89198f183963da Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 22 Oct 2025 10:00:56 +1300 Subject: [PATCH 8/8] drop support for julia < 1.10; bump version 0.6.0 --- .github/workflows/ci.yml | 2 +- Project.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6dbd159..621c572 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: version: - - '1.6' + - '1.10' - '1' os: - ubuntu-latest diff --git a/Project.toml b/Project.toml index 7f7655f..f229786 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJMultivariateStatsInterface" uuid = "1b6a4a23-ba22-4f51-9698-8599985d3728" authors = ["Anthony D. Blaom ", "Thibaut Lienart ", "Okon Samuel "] -version = "0.5.3" +version = "0.6.0" [deps] CategoricalDistributions = "af321ab8-2d2e-40a6-b165-3d674595d28e" @@ -17,7 +17,7 @@ Distances = "0.9,0.10" MLJModelInterface = "1.4" MultivariateStats = "0.10" StatsBase = "0.32, 0.33, 0.34" -julia = "1.6" +julia = "1.10" [extras] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"