diff --git a/Project.toml b/Project.toml
index d55346aca..c8317a599 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,8 +13,10 @@ LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NLSolversBase = "d41bc354-129a-5804-8e4c-c37616107c6c"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -46,9 +48,12 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 test = ["Test"]
 
 [weakdeps]
+BlackBoxOptim = "a134a8b2-14d6-55f6-9291-3336d3ab0209"
 NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9"
 
 [extensions]
 SEMNLOptExt = "NLopt"
 SEMProximalOptExt = "ProximalAlgorithms"
+SEMBlackBoxOptimExt = ["BlackBoxOptim", "Optimisers"]
diff --git a/docs/src/tutorials/concept.md b/docs/src/tutorials/concept.md
index b8d094abc..755642e3b 100644
--- a/docs/src/tutorials/concept.md
+++ b/docs/src/tutorials/concept.md
@@ -49,8 +49,8 @@ Available loss functions are
 - [`SemRidge`](@ref): ridge regularization
 
 ## The optimizer part aka `SemOptimizer`
-The optimizer part of a model connects to the numerical optimization backend used to fit the model. 
-It can be used to control options like the optimization algorithm, linesearch, stopping criteria, etc. 
+The optimizer part of a model connects to the numerical optimization backend used to fit the model.
+It can be used to control options like the optimization algorithm, linesearch, stopping criteria, etc.
 There are currently two available backends, [`SemOptimizerOptim`](@ref) connecting to the [Optim.jl](https://github.com/JuliaNLSolvers/Optim.jl) backend, and [`SemOptimizerNLopt`](@ref) connecting to the [NLopt.jl](https://github.com/JuliaOpt/NLopt.jl) backend.
 For more information about the available options see also the tutorials about [Using Optim.jl](@ref) and [Using NLopt.jl](@ref), as well as [Constrained optimization](@ref).
 
diff --git a/docs/src/tutorials/construction/build_by_parts.md b/docs/src/tutorials/construction/build_by_parts.md
index 071750a8c..82513ec9c 100644
--- a/docs/src/tutorials/construction/build_by_parts.md
+++ b/docs/src/tutorials/construction/build_by_parts.md
@@ -40,7 +40,7 @@ end
 
 partable = ParameterTable(
     graph,
-    latent_vars = latent_vars, 
+    latent_vars = latent_vars,
     observed_vars = observed_vars)
 ```
 
diff --git a/docs/src/tutorials/fitting/fitting.md b/docs/src/tutorials/fitting/fitting.md
index b534ad754..1bafc4bd4 100644
--- a/docs/src/tutorials/fitting/fitting.md
+++ b/docs/src/tutorials/fitting/fitting.md
@@ -7,19 +7,19 @@ model_fit = sem_fit(model)
 
 # output
 
-Fitted Structural Equation Model 
-=============================================== 
---------------------- Model ------------------- 
+Fitted Structural Equation Model
+===============================================
+--------------------- Model -------------------
 
-Structural Equation Model 
-- Loss Functions 
+Structural Equation Model
+- Loss Functions
    SemML
-- Fields 
-   observed:  SemObservedData 
-   implied:   RAM 
-   optimizer: SemOptimizerOptim 
+- Fields
+   observed:  SemObservedData
+   implied:   RAM
+   optimizer: SemOptimizerOptim
 
-------------- Optimization result ------------- 
+------------- Optimization result -------------
 
  * Status: success
 
diff --git a/docs/src/tutorials/meanstructure.md b/docs/src/tutorials/meanstructure.md
index 692f6cebc..3c669a173 100644
--- a/docs/src/tutorials/meanstructure.md
+++ b/docs/src/tutorials/meanstructure.md
@@ -40,7 +40,7 @@ end
 
 partable = ParameterTable(
     graph,
-    latent_vars = latent_vars, 
+    latent_vars = latent_vars,
     observed_vars = observed_vars)
 ```
 
@@ -78,7 +78,7 @@ end
 
 partable = ParameterTable(
     graph,
-    latent_vars = latent_vars, 
+    latent_vars = latent_vars,
     observed_vars = observed_vars)
 ```
 
diff --git a/docs/src/tutorials/regularization/regularization.md b/docs/src/tutorials/regularization/regularization.md
index 02d3b3bac..40199ae06 100644
--- a/docs/src/tutorials/regularization/regularization.md
+++ b/docs/src/tutorials/regularization/regularization.md
@@ -2,7 +2,7 @@
 
 ## Setup
 
-For ridge regularization, you can simply use `SemRidge` as an additional loss function 
+For ridge regularization, you can simply use `SemRidge` as an additional loss function
 (for example, a model with the loss functions `SemML` and `SemRidge` corresponds to ridge-regularized maximum likelihood estimation).
 
 For lasso, elastic net and (far) beyond, we provide the `ProximalSEM` package. You can install it and load it alongside `StructuralEquationModels`:
@@ -39,7 +39,7 @@ using ProximalOperators
 ## `SemOptimizerProximal`
 
 `ProximalSEM` provides a new "building block" for the optimizer part of a model, called `SemOptimizerProximal`.
-It connects our package to the [`ProximalAlgorithms.jl`](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl) optimization backend, providing so-called proximal optimization algorithms. 
+It connects our package to the [`ProximalAlgorithms.jl`](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl) optimization backend, providing so-called proximal optimization algorithms.
 Those can handle, amongst other things, various forms of regularization.
 
 It can be used as
@@ -87,7 +87,7 @@ end
 
 partable = ParameterTable(
     graph,
-    latent_vars = latent_vars, 
+    latent_vars = latent_vars,
     observed_vars = observed_vars
 )
 
diff --git a/docs/src/tutorials/specification/graph_interface.md b/docs/src/tutorials/specification/graph_interface.md
index 609c844c3..ad46b7678 100644
--- a/docs/src/tutorials/specification/graph_interface.md
+++ b/docs/src/tutorials/specification/graph_interface.md
@@ -1,6 +1,6 @@
 # Graph interface
 
-## Workflow 
+## Workflow
 As discussed before, when using the graph interface, you can specify your model as a graph
 
 ```julia
@@ -17,7 +17,7 @@ latent_vars   = ...
 
 partable = ParameterTable(
     graph,
-    latent_vars = latent_vars, 
+    latent_vars = latent_vars,
     observed_vars = observed_vars)
 
 model = Sem(
@@ -32,7 +32,7 @@ In general, there are two different types of parameters: **directed** and **indi
 We allow multiple variables on both sides of an arrow, for example `x → [y z]` or `[a b] → [c d]`. The later specifies element wise edges; that is its the same as `a → c; b → d`. If you want edges corresponding to the cross-product, we have the double lined arrow `[a b] ⇒ [c d]`, corresponding to `a → c; a → d; b → c; b → d`. The undirected arrows ↔ (element-wise) and ⇔ (crossproduct) behave the same way.
 
 !!! note "Unicode symbols in julia"
-    The `→` symbol is a unicode symbol allowed in julia (among many others; see this [list](https://docs.julialang.org/en/v1/manual/unicode-input/)). You can enter it in the julia REPL or the vscode IDE by typing `\to` followed by hitting `tab`. Similarly, 
+    The `→` symbol is a unicode symbol allowed in julia (among many others; see this [list](https://docs.julialang.org/en/v1/manual/unicode-input/)). You can enter it in the julia REPL or the vscode IDE by typing `\to` followed by hitting `tab`. Similarly,
     - `←` = `\leftarrow`,
     - `↔` = `\leftrightarrow`,
     - `⇒` = `\Rightarrow`,
@@ -54,7 +54,7 @@ graph = @StenoGraph begin
     ξ₃ ↔ fixed(1.0)*ξ₃
 end
 ```
-would 
+would
 - fix the directed effects from `ξ₁` to `x1` and from `ξ₂` to `x2` to `1`
 - leave the directed effect from `ξ₃` to `x7` free but instead restrict the variance of `ξ₃` to `1`
 - give the effect from `ξ₁` to `x3` the label `:a` (which can be convenient later if you want to retrieve information from your model about that specific parameter)
@@ -66,7 +66,7 @@ As you saw above and in the [A first model](@ref) example, the graph object need
 ```julia
 partable = ParameterTable(
     graph,
-    latent_vars = latent_vars, 
+    latent_vars = latent_vars,
     observed_vars = observed_vars)
 ```
 
@@ -85,7 +85,7 @@ The variable names (`:x1`) have to be symbols, the syntax `:something` creates a
     _(latent_vars) ⇔ _(latent_vars)
 end
 ```
-creates undirected effects coresponding to 
+creates undirected effects coresponding to
 1. the variances of all observed variables and
 2. the variances plus covariances of all latent variables
 So if you want to work with a subset of variables, simply specify a vector of symbols `somevars = [...]`, and inside the graph specification, refer to them as `_(somevars)`.
diff --git a/docs/src/tutorials/specification/parameter_table.md b/docs/src/tutorials/specification/parameter_table.md
index c328a3b1a..62c45c9a4 100644
--- a/docs/src/tutorials/specification/parameter_table.md
+++ b/docs/src/tutorials/specification/parameter_table.md
@@ -5,5 +5,5 @@ As lavaan also uses parameter tables to store model specifications, we are worki
 
 ## Convert from and to RAMMatrices
 
-To convert a RAMMatrices object to a ParameterTable, simply use `partable = ParameterTable(rammatrices)`. 
+To convert a RAMMatrices object to a ParameterTable, simply use `partable = ParameterTable(rammatrices)`.
 To convert an object of type `ParameterTable` to RAMMatrices, you can use `ram_matrices = RAMMatrices(partable)`.
\ No newline at end of file
diff --git a/docs/src/tutorials/specification/ram_matrices.md b/docs/src/tutorials/specification/ram_matrices.md
index 5f0757238..ee8f5bb56 100644
--- a/docs/src/tutorials/specification/ram_matrices.md
+++ b/docs/src/tutorials/specification/ram_matrices.md
@@ -1,6 +1,6 @@
 # RAMMatrices interface
 
-Models can also be specified by an object of type `RAMMatrices`. 
+Models can also be specified by an object of type `RAMMatrices`.
 The RAM (reticular action model) specification corresponds to three matrices; the `A` matrix containing all directed parameters, the `S` matrix containing all undirected parameters, and the `F` matrix filtering out latent variables from the model implied covariance.
 
 The model implied covariance matrix for the observed variables of a SEM is then computed as
@@ -56,9 +56,9 @@ A =[0  0  0  0  0  0  0  0  0  0  0     1.0   0     0
 θ = Symbol.(:θ, 1:31)
 
 spec = RAMMatrices(;
-    A = A, 
-    S = S, 
-    F = F, 
+    A = A,
+    S = S,
+    F = F,
     params = θ,
     colnames = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65]
 )
@@ -71,9 +71,9 @@ model = Sem(
 
 Let's look at this step by step:
 
-First, we specify the `A`, `S` and `F`-Matrices. 
-For a free parameter, we write a `Symbol` like `:θ1` (or any other symbol we like) to the corresponding place in the respective matrix, to constrain parameters to be equal we just use the same `Symbol` in the respective entries. 
-To fix a parameter (as in the `A`-Matrix above), we just write down the number we want to fix it to. 
+First, we specify the `A`, `S` and `F`-Matrices.
+For a free parameter, we write a `Symbol` like `:θ1` (or any other symbol we like) to the corresponding place in the respective matrix, to constrain parameters to be equal we just use the same `Symbol` in the respective entries.
+To fix a parameter (as in the `A`-Matrix above), we just write down the number we want to fix it to.
 All other entries are 0.
 
 Second, we specify a vector of symbols containing our parameters:
@@ -82,14 +82,14 @@ Second, we specify a vector of symbols containing our parameters:
 θ = Symbol.(:θ, 1:31)
 ```
 
-Third, we construct an object of type `RAMMatrices`, passing our matrices and parameters, as well as the column names of our matrices. 
+Third, we construct an object of type `RAMMatrices`, passing our matrices and parameters, as well as the column names of our matrices.
 Those are quite important, as they will be used to rearrange your data to match it to your `RAMMatrices` specification.
 
 ```julia
 spec = RAMMatrices(;
-    A = A, 
-    S = S, 
-    F = F, 
+    A = A,
+    S = S,
+    F = F,
     params = θ,
     colnames = [:x1, :x2, :x3, :y1, :y2, :y3, :y4, :y5, :y6, :y7, :y8, :ind60, :dem60, :dem65]
 )
@@ -109,7 +109,7 @@ According to the RAM, model implied mean values of the observed variables are co
 ```math
 \mu = F(I-A)^{-1}M
 ```
-where `M` is a vector of mean parameters. To estimate the means of the observed variables in our example (and set the latent means to `0`), we would specify the model just as before but add 
+where `M` is a vector of mean parameters. To estimate the means of the observed variables in our example (and set the latent means to `0`), we would specify the model just as before but add
 
 ```julia
 ...
@@ -128,5 +128,5 @@ spec = RAMMatrices(;
 
 ## Convert from and to ParameterTables
 
-To convert a RAMMatrices object to a ParameterTable, simply use `partable = ParameterTable(ram_matrices)`. 
+To convert a RAMMatrices object to a ParameterTable, simply use `partable = ParameterTable(ram_matrices)`.
 To convert an object of type `ParameterTable` to RAMMatrices, you can use `ram_matrices = RAMMatrices(partable)`.
\ No newline at end of file
diff --git a/ext/SEMBlackBoxOptimExt/AdamMutation.jl b/ext/SEMBlackBoxOptimExt/AdamMutation.jl
new file mode 100644
index 000000000..4f1a80e3b
--- /dev/null
+++ b/ext/SEMBlackBoxOptimExt/AdamMutation.jl
@@ -0,0 +1,49 @@
+# mutate by moving in the gradient direction
+mutable struct AdamMutation{M <: AbstractSem, O, S} <: MutationOperator
+    model::M
+    optim::O
+    opt_state::S
+    params_fraction::Float64
+
+    function AdamMutation(model::AbstractSem, params::AbstractDict)
+        optim = RAdam(params[:AdamMutation_eta], params[:AdamMutation_beta])
+        params_fraction = params[:AdamMutation_params_fraction]
+        opt_state = Optimisers.init(optim, Vector{Float64}(undef, nparams(model)))
+
+        new{typeof(model), typeof(optim), typeof(opt_state)}(
+            model,
+            optim,
+            opt_state,
+            params_fraction,
+        )
+    end
+end
+
+Base.show(io::IO, op::AdamMutation) =
+    print(io, "AdamMutation(", op.optim, " state[3]=", op.opt_state[3], ")")
+
+"""
+Default parameters for `AdamMutation`.
+"""
+const AdamMutation_DefaultOptions = ParamsDict(
+    :AdamMutation_eta => 1E-1,
+    :AdamMutation_beta => (0.99, 0.999),
+    :AdamMutation_params_fraction => 0.25,
+)
+
+function BlackBoxOptim.apply!(m::AdamMutation, v::AbstractVector{<:Real}, target_index::Int)
+    grad = similar(v)
+    obj = SEM.evaluate!(0.0, grad, nothing, m.model, v)
+    @inbounds for i in eachindex(grad)
+        (rand() > m.params_fraction) && (grad[i] = 0.0)
+    end
+
+    m.opt_state, dv = Optimisers.apply!(m.optim, m.opt_state, v, grad)
+    if (m.opt_state[3][1] <= 1E-20) || !isfinite(obj) || any(!isfinite, dv)
+        m.opt_state = Optimisers.init(m.optim, v)
+    else
+        v .-= dv
+    end
+
+    return v
+end
diff --git a/ext/SEMBlackBoxOptimExt/BlackBoxOptim.jl b/ext/SEMBlackBoxOptimExt/BlackBoxOptim.jl
new file mode 100644
index 000000000..f9d67e069
--- /dev/null
+++ b/ext/SEMBlackBoxOptimExt/BlackBoxOptim.jl
@@ -0,0 +1,89 @@
+############################################################################################
+### connect to BlackBoxOptim.jl as backend
+############################################################################################
+
+"""
+"""
+struct SemOptimizerBlackBoxOptim <: SemOptimizer{:BlackBoxOptim}
+    lower_bound::Float64 # default lower bound
+    variance_lower_bound::Float64 # default variance lower bound
+    lower_bounds::Union{Dict{Symbol, Float64}, Nothing}
+
+    upper_bound::Float64 # default upper bound
+    upper_bounds::Union{Dict{Symbol, Float64}, Nothing}
+end
+
+function SemOptimizerBlackBoxOptim(;
+    lower_bound::Float64 = -1000.0,
+    lower_bounds::Union{AbstractDict{Symbol, Float64}, Nothing} = nothing,
+    variance_lower_bound::Float64 = 0.001,
+    upper_bound::Float64 = 1000.0,
+    upper_bounds::Union{AbstractDict{Symbol, Float64}, Nothing} = nothing,
+    kwargs...,
+)
+    if variance_lower_bound < 0.0
+        throw(ArgumentError("variance_lower_bound must be non-negative"))
+    end
+    return SemOptimizerBlackBoxOptim(
+        lower_bound,
+        variance_lower_bound,
+        lower_bounds,
+        upper_bound,
+        upper_bounds,
+    )
+end
+
+SEM.SemOptimizer{:BlackBoxOptim}(args...; kwargs...) =
+    SemOptimizerBlackBoxOptim(args...; kwargs...)
+
+SEM.algorithm(optimizer::SemOptimizerBlackBoxOptim) = optimizer.algorithm
+SEM.options(optimizer::SemOptimizerBlackBoxOptim) = optimizer.options
+
+struct SemModelBlackBoxOptimProblem{M <: AbstractSem} <:
+       OptimizationProblem{ScalarFitnessScheme{true}}
+    model::M
+    fitness_scheme::ScalarFitnessScheme{true}
+    search_space::ContinuousRectSearchSpace
+end
+
+function BlackBoxOptim.search_space(model::AbstractSem)
+    optim = model.optimizer::SemOptimizerBlackBoxOptim
+    varparams = Set(SEM.variance_params(model.implied.ram_matrices))
+    return ContinuousRectSearchSpace(
+        [
+            begin
+                def = in(p, varparams) ? optim.variance_lower_bound : optim.lower_bound
+                isnothing(optim.lower_bounds) ? def : get(optim.lower_bounds, p, def)
+            end for p in SEM.params(model)
+        ],
+        [
+            begin
+                def = optim.upper_bound
+                isnothing(optim.upper_bounds) ? def : get(optim.upper_bounds, p, def)
+            end for p in SEM.params(model)
+        ],
+    )
+end
+
+function SemModelBlackBoxOptimProblem(
+    model::AbstractSem,
+    optimizer::SemOptimizerBlackBoxOptim,
+)
+    SemModelBlackBoxOptimProblem(model, ScalarFitnessScheme{true}(), search_space(model))
+end
+
+BlackBoxOptim.fitness(params::AbstractVector, wrapper::SemModelBlackBoxOptimProblem) =
+    return SEM.evaluate!(0.0, nothing, nothing, wrapper.model, params)
+
+# sem_fit method
+function SEM.sem_fit(
+    optimizer::SemOptimizerBlackBoxOptim,
+    model::AbstractSem,
+    start_params::AbstractVector;
+    MaxSteps::Integer = 50000,
+    kwargs...,
+)
+    problem = SemModelBlackBoxOptimProblem(model, optimizer)
+    res = bboptimize(problem; MaxSteps, kwargs...)
+    return SemFit(best_fitness(res), best_candidate(res), nothing, model, res)
+end
diff --git a/ext/SEMBlackBoxOptimExt/DiffEvoFactory.jl b/ext/SEMBlackBoxOptimExt/DiffEvoFactory.jl
new file mode 100644
index 000000000..75080541a
--- /dev/null
+++ b/ext/SEMBlackBoxOptimExt/DiffEvoFactory.jl
@@ -0,0 +1,196 @@
+"""
+Base class for factories of optimizers for a specific problem.
+"""
+abstract type OptimizerFactory{P <: OptimizationProblem} end
+
+problem(factory::OptimizerFactory) = factory.problem
+
+const OptController_DefaultParameters = ParamsDict(
+    :MaxTime => 60.0,
+    :MaxSteps => 10^8,
+    :TraceMode => :compact,
+    :TraceInterval => 5.0,
+    :RecoverResults => false,
+    :SaveTrace => false,
+)
+
+function generate_opt_controller(alg::Optimizer, optim_factory::OptimizerFactory, params)
+    return BlackBoxOptim.OptController(
+        alg,
+        problem(optim_factory),
+        BlackBoxOptim.chain(
+            BlackBoxOptim.DefaultParameters,
+            OptController_DefaultParameters,
+            params,
+        ),
+    )
+end
+
+function check_population(
+    factory::OptimizerFactory,
+    popmatrix::BlackBoxOptim.PopulationMatrix,
+)
+    ssp = factory |> problem |> search_space
+    for i in 1:popsize(popmatrix)
+        @assert popmatrix[:, i] ∈ ssp "Individual $i is out of space: $(popmatrix[:,i])" # fitness: $(fitness(population, i))"
+    end
+end
+
+initial_search_space(factory::OptimizerFactory, id::Int) = search_space(factory.problem)
+
+function initial_population_matrix(factory::OptimizerFactory, id::Int)
+    #@info "Standard initial_population_matrix()"
+    ini_ss = initial_search_space(factory, id)
+    if !isempty(factory.initial_population)
+        numdims(factory.initial_population) == numdims(factory.problem) || throw(
+            DimensionMismatch(
+                "Dimensions of :Population ($(numdims(factory.initial_population))) " *
+                "are different from the problem dimensions ($(numdims(factory.problem)))",
+            ),
+        )
+        res = factory.initial_population[
+            :,
+            StatsBase.sample(
+                1:popsize(factory.initial_population),
+                factory.population_size,
+            ),
+        ]
+    else
+        res = rand_individuals(ini_ss, factory.population_size, method = :latin_hypercube)
+    end
+    prj = RandomBound(ini_ss)
+    if size(res, 2) > 1
+        apply!(prj, view(res, :, 1), SEM.start_fabin3(factory.problem.model))
+    end
+    if size(res, 2) > 2
+        apply!(prj, view(res, :, 2), SEM.start_simple(factory.problem.model))
+    end
+    return res
+end
+
+# convert individuals in the archive into population matrix
+population_matrix(archive::Any) = population_matrix!(
+    Matrix{Float64}(undef, length(BlackBoxOptim.params(first(archive))), length(archive)),
+    archive,
+)
+
+function population_matrix!(pop::AbstractMatrix{<:Real}, archive::Any)
+    npars = length(BlackBoxOptim.params(first(archive)))
+    size(pop, 1) == npars || throw(
+        DimensionMismatch(
+            "Matrix rows count ($(size(pop, 1))) doesn't match the number of problem dimensions ($(npars))",
+        ),
+    )
+    @inbounds for (i, indi) in enumerate(archive)
+        (i <= size(pop, 2)) || break
+        pop[:, i] .= BlackBoxOptim.params(indi)
+    end
+    if size(pop, 2) > length(archive)
+        @warn "Matrix columns count ($(size(pop, 2))) is bigger than population size ($(length(archive))), last columns not set"
+    end
+    return pop
+end
+
+generate_embedder(factory::OptimizerFactory, id::Int, problem::OptimizationProblem) =
+    RandomBound(search_space(problem))
+
+abstract type DiffEvoFactory{P <: OptimizationProblem} <: OptimizerFactory{P} end
+
+generate_selector(
+    factory::DiffEvoFactory,
+    id::Int,
+    problem::OptimizationProblem,
+    population,
+) = RadiusLimitedSelector(get(factory.params, :selector_radius, popsize(population) ÷ 5))
+
+function generate_modifier(factory::DiffEvoFactory, id::Int, problem::OptimizationProblem)
+    ops = GeneticOperator[
+        MutationClock(UniformMutation(search_space(problem)), 1 / numdims(problem)),
+        BlackBoxOptim.AdaptiveDiffEvoRandBin1(
+            BlackBoxOptim.AdaptiveDiffEvoParameters(
+                factory.params[:fdistr],
+                factory.params[:crdistr],
+            ),
+        ),
+        SimplexCrossover{3}(1.05),
+        SimplexCrossover{2}(1.1),
+        #SimulatedBinaryCrossover(0.05, 16.0),
+        #SimulatedBinaryCrossover(0.05, 3.0),
+        #SimulatedBinaryCrossover(0.1, 5.0),
+        #SimulatedBinaryCrossover(0.2, 16.0),
+        UnimodalNormalDistributionCrossover{2}(
+            chain(BlackBoxOptim.UNDX_DefaultOptions, factory.params),
+        ),
+        UnimodalNormalDistributionCrossover{3}(
+            chain(BlackBoxOptim.UNDX_DefaultOptions, factory.params),
+        ),
+        ParentCentricCrossover{2}(chain(BlackBoxOptim.PCX_DefaultOptions, factory.params)),
+        ParentCentricCrossover{3}(chain(BlackBoxOptim.PCX_DefaultOptions, factory.params)),
+    ]
+    if problem isa SemModelBlackBoxOptimProblem
+        push!(
+            ops,
+            AdamMutation(problem.model, chain(AdamMutation_DefaultOptions, factory.params)),
+        )
+    end
+    FAGeneticOperatorsMixture(ops)
+end
+
+function generate_optimizer(
+    factory::DiffEvoFactory,
+    id::Int,
+    problem::OptimizationProblem,
+    popmatrix,
+)
+    population = FitPopulation(popmatrix, nafitness(fitness_scheme(problem)))
+    BlackBoxOptim.DiffEvoOpt(
+        "AdaptiveDE/rand/1/bin/gradient",
+        population,
+        generate_selector(factory, id, problem, population),
+        generate_modifier(factory, id, problem),
+        generate_embedder(factory, id, problem),
+    )
+end
+
+const Population_DefaultParameters = ParamsDict(
+    :Population => BlackBoxOptim.PopulationMatrix(undef, 0, 0),
+    :PopulationSize => 100,
+)
+
+const DE_DefaultParameters = chain(
+    ParamsDict(
+        :SelectorRadius => 0,
+        :fdistr =>
+            BlackBoxOptim.BimodalCauchy(0.65, 0.1, 1.0, 0.1, clampBelow0 = false),
+        :crdistr =>
+            BlackBoxOptim.BimodalCauchy(0.1, 0.1, 0.95, 0.1, clampBelow0 = false),
+    ),
+    Population_DefaultParameters,
+)
+
+struct DefaultDiffEvoFactory{P <: OptimizationProblem} <: DiffEvoFactory{P}
+    problem::P
+    initial_population::BlackBoxOptim.PopulationMatrix
+    population_size::Int
+    params::ParamsDictChain
+end
+
+DefaultDiffEvoFactory(problem::OptimizationProblem; kwargs...) =
+    DefaultDiffEvoFactory(problem, BlackBoxOptim.kwargs2dict(kwargs))
+
+function DefaultDiffEvoFactory(problem::OptimizationProblem, params::AbstractDict)
+    params = chain(DE_DefaultParameters, params)
+    DefaultDiffEvoFactory{typeof(problem)}(
+        problem,
+        params[:Population],
+        params[:PopulationSize],
+        params,
+    )
+end
+
+function BlackBoxOptim.bbsetup(factory::OptimizerFactory; kwargs...)
+    popmatrix = initial_population_matrix(factory, 1)
+    check_population(factory, popmatrix)
+    alg = generate_optimizer(factory, 1, problem(factory), popmatrix)
+    return generate_opt_controller(alg, factory, BlackBoxOptim.kwargs2dict(kwargs))
+end
diff --git a/ext/SEMBlackBoxOptimExt/SEMBlackBoxOptimExt.jl b/ext/SEMBlackBoxOptimExt/SEMBlackBoxOptimExt.jl
new file mode 100644
index 000000000..9cbdac4d0
--- /dev/null
+++ b/ext/SEMBlackBoxOptimExt/SEMBlackBoxOptimExt.jl
@@ -0,0 +1,13 @@
+module SEMBlackBoxOptimExt
+
+using StructuralEquationModels, BlackBoxOptim, Optimisers
+
+SEM = StructuralEquationModels
+
+export SemOptimizerBlackBoxOptim
+
+include("AdamMutation.jl")
+include("DiffEvoFactory.jl")
+include("SemOptimizerBlackBoxOptim.jl")
+
+end
diff --git a/ext/SEMBlackBoxOptimExt/SemOptimizerBlackBoxOptim.jl b/ext/SEMBlackBoxOptimExt/SemOptimizerBlackBoxOptim.jl
new file mode 100644
index 000000000..219f409e8
--- /dev/null
+++ b/ext/SEMBlackBoxOptimExt/SemOptimizerBlackBoxOptim.jl
@@ -0,0 +1,91 @@
+############################################################################################
+### connect to BlackBoxOptim.jl as backend
+############################################################################################
+
+"""
+"""
+struct SemOptimizerBlackBoxOptim <: SemOptimizer{:BlackBoxOptim}
+    lower_bound::Float64 # default lower bound
+    variance_lower_bound::Float64 # default variance lower bound
+    lower_bounds::Union{Dict{Symbol, Float64}, Nothing}
+
+    upper_bound::Float64 # default upper bound
+    upper_bounds::Union{Dict{Symbol, Float64}, Nothing}
+end
+
+function SemOptimizerBlackBoxOptim(;
+    lower_bound::Float64 = -1000.0,
+    lower_bounds::Union{AbstractDict{Symbol, Float64}, Nothing} = nothing,
+    variance_lower_bound::Float64 = 0.001,
+    upper_bound::Float64 = 1000.0,
+    upper_bounds::Union{AbstractDict{Symbol, Float64}, Nothing} = nothing,
+    kwargs...,
+)
+    if variance_lower_bound < 0.0
+        throw(ArgumentError("variance_lower_bound must be non-negative"))
+    end
+    return SemOptimizerBlackBoxOptim(
+        lower_bound,
+        variance_lower_bound,
+        lower_bounds,
+        upper_bound,
+        upper_bounds,
+    )
+end
+
+SEM.SemOptimizer{:BlackBoxOptim}(args...; kwargs...) =
+    SemOptimizerBlackBoxOptim(args...; kwargs...)
+
+SEM.algorithm(optimizer::SemOptimizerBlackBoxOptim) = optimizer.algorithm
+SEM.options(optimizer::SemOptimizerBlackBoxOptim) = optimizer.options
+
+struct SemModelBlackBoxOptimProblem{M <: AbstractSem} <:
+       OptimizationProblem{ScalarFitnessScheme{true}}
+    model::M
+    fitness_scheme::ScalarFitnessScheme{true}
+    search_space::ContinuousRectSearchSpace
+end
+
+function BlackBoxOptim.search_space(model::AbstractSem)
+    optim = model.optimizer::SemOptimizerBlackBoxOptim
+    return ContinuousRectSearchSpace(
+        SEM.lower_bounds(
+            optim.lower_bounds,
+            model,
+            default = optim.lower_bound,
+            variance_default = optim.variance_lower_bound,
+        ),
+        SEM.upper_bounds(optim.upper_bounds, model, default = optim.upper_bound),
+    )
+end
+
+function SemModelBlackBoxOptimProblem(
+    model::AbstractSem,
+    optimizer::SemOptimizerBlackBoxOptim,
+)
+    SemModelBlackBoxOptimProblem(model, ScalarFitnessScheme{true}(), search_space(model))
+end
+
+BlackBoxOptim.fitness(params::AbstractVector, wrapper::SemModelBlackBoxOptimProblem) =
+    return SEM.evaluate!(0.0, nothing, nothing, wrapper.model, params)
+
+# sem_fit method
+function SEM.sem_fit(
+    optimizer::SemOptimizerBlackBoxOptim,
+    model::AbstractSem,
+    start_params::AbstractVector;
+    Method::Symbol = :adaptive_de_rand_1_bin_with_gradient,
+    MaxSteps::Integer = 50000,
+    kwargs...,
+)
+    problem = SemModelBlackBoxOptimProblem(model, optimizer)
+    if Method == :adaptive_de_rand_1_bin_with_gradient
+        # custom adaptive differential evolution with mutation that moves along the gradient
+        bbopt_factory = DefaultDiffEvoFactory(problem; kwargs...)
+        bbopt = bbsetup(bbopt_factory; MaxSteps, kwargs...)
+    else
+        bbopt = bbsetup(problem; Method, MaxSteps, kwargs...)
+    end
+    res = bboptimize(bbopt)
+    return SemFit(best_fitness(res), best_candidate(res), nothing, model, res)
+end
diff --git a/ext/SEMNLOptExt/NLopt.jl b/ext/SEMNLOptExt/NLopt.jl
index 959380292..87662ce8d 100644
--- a/ext/SEMNLOptExt/NLopt.jl
+++ b/ext/SEMNLOptExt/NLopt.jl
@@ -206,8 +206,8 @@ end
 ############################################################################################
 
 function Base.show(io::IO, result::NLoptResult)
-    print(io, "Optimizer status: $(result.result[3]) \n")
-    print(io, "Minimum:          $(round(result.result[1]; digits = 2)) \n")
-    print(io, "Algorithm:        $(result.problem.algorithm) \n")
-    print(io, "No. evaluations:  $(result.problem.numevals) \n")
+    print(io,   "Optimizer status: $(result.result[3]) \n")
+    @printf(io, "Minimum:          %.4g\n", result.result[1])
+    print(io,   "Algorithm:        $(result.problem.algorithm) \n")
+    print(io,   "No. evaluations:  $(result.problem.numevals) \n")
 end
diff --git a/ext/SEMNLOptExt/SEMNLOptExt.jl b/ext/SEMNLOptExt/SEMNLOptExt.jl
index a159f6dc8..c0fe0db49 100644
--- a/ext/SEMNLOptExt/SEMNLOptExt.jl
+++ b/ext/SEMNLOptExt/SEMNLOptExt.jl
@@ -1,6 +1,6 @@
 module SEMNLOptExt
 
-using StructuralEquationModels, NLopt
+using StructuralEquationModels, NLopt, Printf
 
 SEM = StructuralEquationModels
 
diff --git a/ext/SEMProximalOptExt/ProximalAlgorithms.jl b/ext/SEMProximalOptExt/ProximalAlgorithms.jl
index eceff0dc3..c80c409a8 100644
--- a/ext/SEMProximalOptExt/ProximalAlgorithms.jl
+++ b/ext/SEMProximalOptExt/ProximalAlgorithms.jl
@@ -10,7 +10,7 @@ Connects to `ProximalAlgorithms.jl` as the optimization backend.
         algorithm = ProximalAlgorithms.PANOC(),
         operator_g,
         operator_h = nothing,
-        kwargs...,
+        kwargs...)
 
 # Arguments
 - `algorithm`: optimization algorithm.
@@ -109,9 +109,9 @@ end
 ############################################################################################
 
 function Base.show(io::IO, result::ProximalResult)
-    print(io, "Minimum:          $(round(result.result[:minimum]; digits = 2)) \n")
-    print(io, "No. evaluations:  $(result.result[:iterations]) \n")
-    print(io, "Operator:         $(nameof(typeof(result.result[:operator_g]))) \n")
+    @printf(io, "Minimum:          %.4g\n", result.result[:minimum])
+    print(io,   "No. evaluations:  $(result.result[:iterations]) \n")
+    print(io,   "Operator:         $(nameof(typeof(result.result[:operator_g]))) \n")
     if haskey(result.result, :operator_h)
         print(io, "Second Operator:  $(nameof(typeof(result.result[:operator_h]))) \n")
     end
diff --git a/ext/SEMProximalOptExt/SEMProximalOptExt.jl b/ext/SEMProximalOptExt/SEMProximalOptExt.jl
index 156311367..3359ad992 100644
--- a/ext/SEMProximalOptExt/SEMProximalOptExt.jl
+++ b/ext/SEMProximalOptExt/SEMProximalOptExt.jl
@@ -1,6 +1,6 @@
 module SEMProximalOptExt
 
-using StructuralEquationModels
+using StructuralEquationModels, Printf
 using ProximalAlgorithms
 
 export SemOptimizerProximal
diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl
index 7c8923dc8..20e292d90 100644
--- a/src/StructuralEquationModels.jl
+++ b/src/StructuralEquationModels.jl
@@ -1,6 +1,7 @@
 module StructuralEquationModels
 
 using LinearAlgebra,
+    Printf,
     Optim,
     NLSolversBase,
     Statistics,
@@ -13,7 +14,8 @@ using LinearAlgebra,
     StenoGraphs,
     LazyArtifacts,
     DelimitedFiles,
-    DataFrames
+    DataFrames,
+    PackageExtensionCompat
 
 export StenoGraphs, @StenoGraph, meld
 
@@ -25,6 +27,7 @@ include("objective_gradient_hessian.jl")
 
 # helper objects and functions
 include("additional_functions/commutation_matrix.jl")
+include("additional_functions/sparse_utils.jl")
 include("additional_functions/params_array.jl")
 
 # fitted objects
@@ -37,6 +40,8 @@ include("frontend/specification/RAMMatrices.jl")
 include("frontend/specification/EnsembleParameterTable.jl")
 include("frontend/specification/StenoGraphs.jl")
 include("frontend/fit/summary.jl")
+include("frontend/finite_diff.jl")
+include("frontend/predict.jl")
 # pretty printing
 include("frontend/pretty_printing.jl")
 # observed
@@ -55,6 +60,7 @@ include("implied/RAM/symbolic.jl")
 include("implied/RAM/generic.jl")
 include("implied/empty.jl")
 # loss
+include("loss/ML/abstract.jl")
 include("loss/ML/ML.jl")
 include("loss/ML/FIML.jl")
 include("loss/regularization/ridge.jl")
@@ -66,6 +72,7 @@ include("optimizer/Empty.jl")
 include("optimizer/optim.jl")
 # helper functions
 include("additional_functions/helper.jl")
+include("additional_functions/start_val/common.jl")
 include("additional_functions/start_val/start_fabin3.jl")
 include("additional_functions/start_val/start_simple.jl")
 include("additional_functions/artifacts.jl")
@@ -103,8 +110,8 @@ export AbstractSem,
     start_val,
     start_fabin3,
     start_simple,
+    AbstractLoss,
     SemLoss,
-    SemLossFunction,
     SemML,
     SemFIML,
     em_mvn,
@@ -112,6 +119,9 @@ export AbstractSem,
     SemConstant,
     SemWLS,
     loss,
+    nsem_terms,
+    sem_terms,
+    sem_term,
     SemOptimizer,
     SemOptimizerEmpty,
     SemOptimizerOptim,
@@ -184,4 +194,9 @@ export AbstractSem,
     ←,
     ↔,
     ⇔
+
+function __init__()
+    @require_extensions
+end
+
 end
diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl
index 5559034e0..072bd7b79 100644
--- a/src/additional_functions/helper.jl
+++ b/src/additional_functions/helper.jl
@@ -53,16 +53,10 @@ function remove_all_missing(data::AbstractMatrix)
     return data[keep, :], keep
 end
 
-function batch_inv!(fun, model)
-    for i in 1:size(fun.inverses, 1)
-        fun.inverses[i] .= LinearAlgebra.inv!(fun.choleskys[i])
-    end
-end
-
 #=
 function batch_sym_inv_update!(fun::Union{LossFunction, DiffFunction}, model)
     M_inv = inv(fun.choleskys[1])
-    for i = 1:size(fun.inverses, 1)
+    for i in 1:size(fun.inverses, 1)
         if size(model.observed.patterns_not[i]) == 0
             fun.inverses[i] .= M_inv
         else
@@ -130,6 +124,39 @@ function elimination_matrix(n::Integer)
     return L
 end
 
+# vector of lower-triangular values of a square matrix
+function vech(A::AbstractMatrix{T}) where {T}
+    size(A, 1) == size(A, 2) ||
+        throw(ArgumentError("Matrix must be square, $(size(A)) given"))
+    n = size(A, 1)
+    v = Vector{T}(undef, (n * (n + 1)) >> 1)
+    k = 0
+    for (j, Aj) in enumerate(eachcol(A)), i in j:n
+        @inbounds v[k+=1] = Aj[i]
+    end
+    @assert k == length(v)
+    return v
+end
+
+# vector of lower-triangular linear indices of a nXn square matrix
+function vechinds(n::Integer)
+    A_lininds = LinearIndices((n, n))
+    v = Vector{Int}(undef, (n * (n + 1)) >> 1)
+    k = 0
+    for j in 1:n, i in j:n
+        @inbounds v[k+=1] = A_lininds[i, j]
+    end
+    @assert k == length(v)
+    return v
+end
+
+# vector of lower-triangular linear indices of a square matrix
+function vechinds(A::AbstractMatrix)
+    size(A, 1) == size(A, 2) ||
+        throw(ArgumentError("Matrix must be square, $(size(A)) given"))
+    return vechinds(size(A, 1))
+end
+
 # returns the vector of non-unique values in the order of appearance
 # each non-unique values is reported once
 function nonunique(values::AbstractVector)
diff --git a/src/additional_functions/simulation.jl b/src/additional_functions/simulation.jl
index 89fb6d151..90f58ac57 100644
--- a/src/additional_functions/simulation.jl
+++ b/src/additional_functions/simulation.jl
@@ -33,102 +33,84 @@ function update_observed end
 # change observed (data) without reconstructing the whole model
 ############################################################################################
 
+# don't change non-SEM terms
+replace_observed(loss::AbstractLoss; kwargs...) = loss
+
 # use the same observed type as before
-replace_observed(model::AbstractSemSingle; kwargs...) =
-    replace_observed(model, typeof(observed(model)).name.wrapper; kwargs...)
+replace_observed(loss::SemLoss; kwargs...) =
+    replace_observed(loss, typeof(SEM.observed(loss)).name.wrapper; kwargs...)
 
 # construct a new observed type
-replace_observed(model::AbstractSemSingle, observed_type; kwargs...) =
-    replace_observed(model, observed_type(; kwargs...); kwargs...)
-
-replace_observed(model::AbstractSemSingle, new_observed::SemObserved; kwargs...) =
-    replace_observed(
-        model,
-        observed(model),
-        implied(model),
-        loss(model),
-        new_observed;
-        kwargs...,
-    )
-
-function replace_observed(
-    model::AbstractSemSingle,
-    old_observed,
-    implied,
-    loss,
-    new_observed::SemObserved;
-    kwargs...,
-)
+replace_observed(loss::SemLoss, observed_type; kwargs...) =
+    replace_observed(loss, observed_type(; kwargs...); kwargs...)
+
+function replace_observed(loss::SemLoss, new_observed::SemObserved; kwargs...)
     kwargs = Dict{Symbol, Any}(kwargs...)
+    old_observed = SEM.observed(loss)
+    implied = SEM.implied(loss)
 
     # get field types
     kwargs[:observed_type] = typeof(new_observed)
     kwargs[:old_observed_type] = typeof(old_observed)
-    kwargs[:implied_type] = typeof(implied)
-    kwargs[:loss_types] = [typeof(lossfun) for lossfun in loss.functions]
 
     # update implied
     implied = update_observed(implied, new_observed; kwargs...)
     kwargs[:implied] = implied
+    kwargs[:implied_type] = typeof(implied)
     kwargs[:nparams] = nparams(implied)
 
     # update loss
-    loss = update_observed(loss, new_observed; kwargs...)
-    kwargs[:loss] = loss
-
-    #new_implied = update_observed(model.implied, new_observed; kwargs...)
-
-    return Sem(
-        new_observed,
-        update_observed(model.implied, new_observed; kwargs...),
-        update_observed(model.loss, new_observed; kwargs...),
-    )
+    return update_observed(loss, new_observed; kwargs...)
 end
 
-function update_observed(loss::SemLoss, new_observed; kwargs...)
-    new_functions = Tuple(
-        update_observed(lossfun, new_observed; kwargs...) for lossfun in loss.functions
-    )
-    return SemLoss(new_functions, loss.weights)
+replace_observed(loss::LossTerm; kwargs...) =
+    LossTerm(replace_observed(loss.loss; kwargs...), loss.id, loss.weight)
+
+function replace_observed(sem::Sem; kwargs...)
+    updated_terms = Tuple(replace_observed(term; kwargs...) for term in loss_terms(sem))
+    return Sem(updated_terms...)
 end
 
 ############################################################################################
 # simulate data
 ############################################################################################
 """
-    (1) rand(model::AbstractSemSingle, params, n)
+    rand(sem::Union{Sem, SemLoss, SemImplied}, params, n)
+    rand(sem::Union{Sem, SemLoss, SemImplied}, n)
 
-    (2) rand(model::AbstractSemSingle, n)
-
-Sample normally distributed data from the model-implied covariance matrix and mean vector.
+Sample from the multivariate normal distribution implied by the SEM model.
 
 # Arguments
-- `model::AbstractSemSingle`: model to simulate from.
-- `params`: parameter values to simulate from.
-- `n::Integer`: Number of samples.
+- `sem`: SEM model to use. Ensemble models with multiple SEM terms are not supported.
+- `params`: SEM model parameters to simulate from.
+- `n::Integer`: Number of samples to draw.
 
 # Examples
 ```julia
 rand(model, start_simple(model), 100)
 ```
 """
-function Distributions.rand(
-    model::AbstractSemSingle{O, I, L},
-    params,
-    n::Integer,
-) where {O, I <: Union{RAM, RAMSymbolic}, L}
-    update!(EvaluationTargets{true, false, false}(), model.implied, model, params)
-    return rand(model, n)
+function Distributions.rand(implied::SemImplied, params, n::Integer)
+    update!(EvaluationTargets{true, false, false}(), implied, params)
+    return rand(implied, n)
 end
 
-function Distributions.rand(
-    model::AbstractSemSingle{O, I, L},
-    n::Integer,
-) where {O, I <: Union{RAM, RAMSymbolic}, L}
-    if MeanStruct(model.implied) === NoMeanStruct
-        data = permutedims(rand(MvNormal(Symmetric(model.implied.Σ)), n))
-    elseif MeanStruct(model.implied) === HasMeanStruct
-        data = permutedims(rand(MvNormal(model.implied.μ, Symmetric(model.implied.Σ)), n))
+Distributions.rand(implied::SemImplied, n::Integer) =
+    error("rand($(typeof(implied)), n) is not implemented")
+
+function Distributions.rand(implied::Union{RAM, RAMSymbolic}, n::Integer)
+    Σ = Symmetric(implied.Σ)
+    if MeanStruct(implied) === NoMeanStruct
+        return permutedims(rand(MvNormal(Σ), n))
+    elseif MeanStruct(implied) === HasMeanStruct
+        return permutedims(rand(MvNormal(implied.μ, Σ), n))
     end
-    return data
 end
+
+Distributions.rand(loss::SemLoss, params, n::Integer) = rand(SEM.implied(loss), params, n)
+
+Distributions.rand(loss::SemLoss, n::Integer) = rand(SEM.implied(loss), n)
+
+Distributions.rand(model::Sem, params, n::Integer) = rand(sem_term(model), params, n)
+
+Distributions.rand(model::Sem, n::Integer) = rand(sem_term(model), n)
diff --git a/src/additional_functions/sparse_utils.jl b/src/additional_functions/sparse_utils.jl
new file mode 100644
index 000000000..76381c473
--- /dev/null
+++ b/src/additional_functions/sparse_utils.jl
@@ -0,0 +1,83 @@
+# generate sparse matrix with 1 in each row
+function eachrow_to_col(
+    ::Type{T},
+    column_indices::AbstractVector{Int},
+    ncolumns::Integer,
+) where {T}
+    nrows = length(column_indices)
+    (nrows > ncolumns) && throw(
+        DimensionMismatch(
+            "The number of rows ($nrows) cannot exceed the number of columns ($ncolumns)",
+        ),
+    )
+    all(i -> 1 <= i <= ncolumns, column_indices) ||
+        throw(ArgumentError("All column indices must be between 1 and $ncolumns"))
+
+    sparse(eachindex(column_indices), column_indices, ones(T, nrows), nrows, ncolumns)
+end
+
+eachrow_to_col(column_indices::AbstractVector{Int}, ncolumns::Integer) =
+    eachrow_to_col(Float64, column_indices, ncolumns)
+
+# generate sparse matrix with 1 in each column
+function eachcol_to_row(
+    ::Type{T},
+    row_indices::AbstractVector{Int},
+    nrows::Integer,
+) where {T}
+    ncols = length(row_indices)
+    (ncols > nrows) && throw(
+        DimensionMismatch(
+            "The number of columns ($ncols) cannot exceed the number of rows ($nrows)",
+        ),
+    )
+    all(i -> 1 <= i <= nrows, row_indices) ||
+        throw(ArgumentError("All row indices must be between 1 and $nrows"))
+
+    sparse(row_indices, eachindex(row_indices), ones(T, ncols), nrows, ncols)
+end
+
+eachcol_to_row(row_indices::AbstractVector{Int}, nrows::Integer) =
+    eachcol_to_row(Float64, row_indices, nrows)
+
+# non-zero indices of columns in matrix A generated by eachrow_to_col()
+# if order == :rows, then the indices are in the order of the rows,
+# if order == :columns, the indices are in the order of the columns
+function nzcols_eachrow_to_col(F, A::SparseMatrixCSC; order::Symbol = :rows)
+    order ∈ [:rows, :columns] || throw(ArgumentError("order must be :rows or :columns"))
+    T = typeof(F(1))
+    res = Vector{T}(undef, size(A, 1))
+    n = 0
+    for i in 1:size(A, 2)
+        colptr = A.colptr[i]
+        next_colptr = A.colptr[i+1]
+        if next_colptr > colptr # non-zero
+            @assert next_colptr - colptr == 1
+            n += 1
+            res[order == :rows ? A.rowval[colptr] : n] = F(i)
+        end
+    end
+    @assert n == size(A, 1)
+    return res
+end
+
+nzcols_eachrow_to_col(A::SparseMatrixCSC; order::Symbol = :rows) =
+    nzcols_eachrow_to_col(identity, A, order = order)
+
+# same as nzcols_eachrow_to_col()
+# but without assumption that each row cooresponds to exactly one column
+# the order is always columns order
+function nzcols(F, A::SparseMatrixCSC)
+    T = typeof(F(1))
+    res = Vector{T}()
+    for i in 1:size(A, 2)
+        colptr = A.colptr[i]
+        next_colptr = A.colptr[i+1]
+        if next_colptr > colptr # non-zero
+            push!(res, F(i))
+        end
+    end
+    return res
+end
+
+nzcols(A::SparseMatrixCSC) = nzcols(identity, A)
diff --git a/src/additional_functions/start_val/common.jl b/src/additional_functions/start_val/common.jl
new file mode 100644
index 000000000..92c85d6f5
--- /dev/null
+++ b/src/additional_functions/start_val/common.jl
@@ -0,0 +1,17 @@
+
+# start values for SEM Models (including ensembles)
+function start_values(f, model::AbstractSem; kwargs...)
+    start_vals = fill(0.0, nparams(model))
+
+    # initialize parameters using the SEM loss terms
+    # (first SEM loss term that sets given parameter to nonzero value)
+    for term in loss_terms(model)
+        issemloss(term) || continue
+        term_start_vals = f(loss(term); kwargs...)
+        for (i, val) in enumerate(term_start_vals)
+            iszero(val) || (start_vals[i] = val)
+        end
+    end
+
+    return start_vals
+end
diff --git a/src/additional_functions/start_val/start_fabin3.jl b/src/additional_functions/start_val/start_fabin3.jl
index bd55f21d7..92d151957 100644
--- a/src/additional_functions/start_val/start_fabin3.jl
+++ b/src/additional_functions/start_val/start_fabin3.jl
@@ -7,21 +7,17 @@ Not available for ensemble models.
 function start_fabin3 end
 
 # splice model and loss functions
-function start_fabin3(model::AbstractSemSingle; kwargs...)
-    return start_fabin3(model.observed, model.implied, model.loss.functions..., kwargs...)
+function start_fabin3(model::SemLoss; kwargs...)
+    return start_fabin3(model.observed, model.implied; kwargs...)
 end
 
-function start_fabin3(observed, implied, args...; kwargs...)
-    return start_fabin3(implied.ram_matrices, obs_cov(observed), obs_mean(observed))
-end
-
-# SemObservedMissing
-function start_fabin3(observed::SemObservedMissing, implied, args...; kwargs...)
-    if !observed.em_model.fitted
-        em_mvn(observed; kwargs...)
-    end
-
-    return start_fabin3(implied.ram_matrices, observed.em_model.Σ, observed.em_model.μ)
+function start_fabin3(observed::SemObserved, implied::SemImplied; kwargs...)
+    return start_fabin3(
+        implied.ram_matrices,
+        obs_cov(observed),
+        # ignore observed means if no meansturcture
+        !isnothing(implied.ram_matrices.M) ? obs_mean(observed) : nothing,
+    )
 end
 
 function start_fabin3(
@@ -189,3 +185,6 @@ end
 function is_in_Λ(ind_vec, F_ind)
     return any(ind -> !(ind[2] ∈ F_ind) && (ind[1] ∈ F_ind), ind_vec)
 end
+
+# ensembles
+start_fabin3(model::AbstractSem; kwargs...) = start_values(start_fabin3, model; kwargs...)
diff --git a/src/additional_functions/start_val/start_simple.jl b/src/additional_functions/start_val/start_simple.jl
index ad5148e3f..1ca123864 100644
--- a/src/additional_functions/start_val/start_simple.jl
+++ b/src/additional_functions/start_val/start_simple.jl
@@ -16,33 +16,13 @@ Return a vector of simple starting values.
 function start_simple end
 
 # Single Models ----------------------------------------------------------------------------
-function start_simple(model::AbstractSemSingle; kwargs...)
-    return start_simple(model.observed, model.implied, model.loss.functions...; kwargs...)
-end
-
-function start_simple(observed, implied, args...; kwargs...)
-    return start_simple(implied.ram_matrices; kwargs...)
-end
-
-# Ensemble Models --------------------------------------------------------------------------
-function start_simple(model::SemEnsemble; kwargs...)
-    start_vals = []
-
-    for sem in model.sems
-        push!(start_vals, start_simple(sem; kwargs...))
-    end
-
-    has_start_val = [.!iszero.(start_val) for start_val in start_vals]
+start_simple(model::SemLoss; kwargs...) =
+    start_simple(observed(model), implied(model); kwargs...)
 
-    start_val = similar(start_vals[1])
-    start_val .= 0.0
+start_simple(observed::SemObserved, implied::SemImplied; kwargs...) =
+    start_simple(implied.ram_matrices; kwargs...)
 
-    for (j, indices) in enumerate(has_start_val)
-        start_val[indices] .= start_vals[j][indices]
-    end
-
-    return start_val
-end
+start_simple(model::AbstractSem; kwargs...) = start_values(start_simple, model; kwargs...)
 
 function start_simple(
     ram_matrices::RAMMatrices;
@@ -53,54 +33,61 @@ function start_simple(
     start_covariances_observed = 0.0,
     start_covariances_latent = 0.0,
     start_covariances_obs_lat = 0.0,
-    start_means = 0.0,
+    start_mean_latent = 0.0,
+    start_mean_observed = 0.0,
     kwargs...,
 )
-    A, S, F_ind, M, n_par = ram_matrices.A,
-    ram_matrices.S,
-    observed_var_indices(ram_matrices),
-    ram_matrices.M,
-    nparams(ram_matrices)
+    A, S, M = ram_matrices.A, ram_matrices.S, ram_matrices.M
+    obs_inds = Set(observed_var_indices(ram_matrices))
+    C_indices = CartesianIndices(size(A))
 
-    start_val = zeros(n_par)
-    n_obs = nobserved_vars(ram_matrices)
-    n_var = nvars(ram_matrices)
+    start_vals = Vector{Float64}(undef, nparams(ram_matrices))
+    for i in eachindex(start_vals)
+        par = 0.0
 
-    C_indices = CartesianIndices((n_var, n_var))
-
-    for i in 1:n_par
         Si_ind = param_occurences(S, i)
-        Ai_ind = param_occurences(A, i)
         if length(Si_ind) != 0
             # use the first occurence of the parameter to determine starting value
             c_ind = C_indices[Si_ind[1]]
             if c_ind[1] == c_ind[2]
-                if c_ind[1] ∈ F_ind
-                    start_val[i] = start_variances_observed
-                else
-                    start_val[i] = start_variances_latent
-                end
+                par = ifelse(
+                    c_ind[1] ∈ obs_inds,
+                    start_variances_observed,
+                    start_variances_latent,
+                )
             else
-                o1 = c_ind[1] ∈ F_ind
-                o2 = c_ind[2] ∈ F_ind
-                if o1 & o2
-                    start_val[i] = start_covariances_observed
-                elseif !o1 & !o2
-                    start_val[i] = start_covariances_latent
-                else
-                    start_val[i] = start_covariances_obs_lat
-                end
+                o1 = c_ind[1] ∈ obs_inds
+                o2 = c_ind[2] ∈ obs_inds
+                par = ifelse(
+                    o1 && o2,
+                    start_covariances_observed,
+                    ifelse(!o1 && !o2, start_covariances_latent, start_covariances_obs_lat),
+                )
             end
-        elseif length(Ai_ind) != 0
+        end
+
+        Ai_ind = param_occurences(A, i)
+        if length(Ai_ind) != 0
+            iszero(par) ||
+                @warn "param[$i]=$(params(ram_matrices, i)) is already set to $par"
             c_ind = C_indices[Ai_ind[1]]
-            if (c_ind[1] ∈ F_ind) & !(c_ind[2] ∈ F_ind)
-                start_val[i] = start_loadings
-            else
-                start_val[i] = start_regressions
+            par = ifelse(
+                (c_ind[1] ∈ obs_inds) && !(c_ind[2] ∈ obs_inds),
+                start_loadings,
+                start_regressions,
+            )
+        end
+
+        if !isnothing(M)
+            Mi_inds = param_occurences(M, i)
+            if length(Mi_inds) != 0
+                iszero(par) ||
+                    @warn "param[$i]=$(params(ram_matrices, i)) is already set to $par"
+                par = ifelse(Mi_inds[1] ∈ obs_inds, start_mean_observed, start_mean_latent)
             end
-        elseif !isnothing(M) && (length(param_occurences(M, i)) != 0)
-            start_val[i] = start_means
         end
+
+        start_vals[i] = par
     end
-    return start_val
+    return start_vals
 end
diff --git a/src/frontend/finite_diff.jl b/src/frontend/finite_diff.jl
new file mode 100644
index 000000000..ee0a9bf96
--- /dev/null
+++ b/src/frontend/finite_diff.jl
@@ -0,0 +1,35 @@
+_unwrap(wrapper::SemFiniteDiff) = wrapper.model
+params(wrapper::SemFiniteDiff) = params(wrapper.model)
+loss_terms(wrapper::SemFiniteDiff) = loss_terms(wrapper.model)
+
+FiniteDiffLossWrappers = Union{LossFiniteDiff, SemLossFiniteDiff}
+
+_unwrap(term::AbstractLoss) = term
+_unwrap(wrapper::FiniteDiffLossWrappers) = wrapper.loss
+implied(wrapper::FiniteDiffLossWrappers) = implied(_unwrap(wrapper))
+observed(wrapper::FiniteDiffLossWrappers) = observed(_unwrap(wrapper))
+
+FiniteDiffWrapper(model::AbstractSem) = SemFiniteDiff(model)
+FiniteDiffWrapper(loss::AbstractLoss) = LossFiniteDiff(loss)
+FiniteDiffWrapper(loss::SemLoss) = SemLossFiniteDiff(loss)
+
+function evaluate!(
+    objective,
+    gradient,
+    hessian,
+    sem::Union{SemFiniteDiff, FiniteDiffLossWrappers},
+    params,
+)
+    wrapped = _unwrap(sem)
+    obj(p) = _evaluate!(
+        objective_zero(objective, gradient, hessian),
+        nothing,
+        nothing,
+        wrapped,
+        p,
+    )
+    isnothing(gradient) || FiniteDiff.finite_difference_gradient!(gradient, obj, params)
+    isnothing(hessian) || FiniteDiff.finite_difference_hessian!(hessian, obj, params)
+    # FIXME if objective is not calculated, SemLoss implied states may not correspond to params
+    return !isnothing(objective) ? obj(params) : nothing
+end
diff --git a/src/frontend/fit/SemFit.jl b/src/frontend/fit/SemFit.jl
index 84d2f502c..8dcff86cb 100644
--- a/src/frontend/fit/SemFit.jl
+++ b/src/frontend/fit/SemFit.jl
@@ -36,7 +36,7 @@ function Base.show(io::IO, semfit::SemFit)
     print(io, "\n")
     print(io, semfit.model)
     print(io, "\n")
-    #print(io, "Objective value: $(round(semfit.minimum, digits = 4)) \n")
+    #@printf(io, "Objective value: %.4g\n", semfit.minimum)
     print(io, "------------- Optimization result ------------- \n")
     print(io, "\n")
     print(io, semfit.optimization_result)
diff --git a/src/frontend/fit/fitmeasures/RMSEA.jl b/src/frontend/fit/fitmeasures/RMSEA.jl
index b91e81d3e..2c3a0e69e 100644
--- a/src/frontend/fit/fitmeasures/RMSEA.jl
+++ b/src/frontend/fit/fitmeasures/RMSEA.jl
@@ -1,18 +1,12 @@
 """
-    RMSEA(sem_fit::SemFit)
+    RMSEA(fit::SemFit)
 
 Return the RMSEA.
 """
-function RMSEA end
+RMSEA(fit::SemFit) = RMSEA(fit, fit.model)
 
-RMSEA(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O}) =
-    RMSEA(df(sem_fit), χ²(sem_fit), nsamples(sem_fit))
+RMSEA(fit::SemFit, model::AbstractSem) =
+    sqrt(nsem_terms(model)) * RMSEA(df(fit), χ²(fit), nsamples(fit))
 
-RMSEA(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O}) =
-    sqrt(length(sem_fit.model.sems)) * RMSEA(df(sem_fit), χ²(sem_fit), nsamples(sem_fit))
-
-function RMSEA(df, chi2, nsamples)
-    rmsea = (chi2 - df) / (nsamples * df)
-    rmsea > 0 ? nothing : rmsea = 0
-    return sqrt(rmsea)
-end
+RMSEA(df::Number, chi2::Number, nsamples::Number) =
+    sqrt(max((chi2 - df) / (nsamples * df), 0.0))
diff --git a/src/frontend/fit/fitmeasures/chi2.jl b/src/frontend/fit/fitmeasures/chi2.jl
index 333783f95..43b327f4a 100644
--- a/src/frontend/fit/fitmeasures/chi2.jl
+++ b/src/frontend/fit/fitmeasures/chi2.jl
@@ -1,89 +1,45 @@
 """
-    χ²(sem_fit::SemFit)
+    χ²(fit::SemFit)
 
 Return the χ² value.
 """
-function χ² end
+χ²(fit::SemFit) = χ²(fit, fit.model)
 
-############################################################################################
-# Single Models
-############################################################################################
+function χ²(fit::SemFit, model::AbstractSem)
+    terms = sem_terms(model)
+    isempty(terms) && return 0.0
 
-# SemFit splices loss functions ------------------------------------------------------------
-χ²(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O}) = χ²(
-    sem_fit,
-    sem_fit.model.observed,
-    sem_fit.model.implied,
-    sem_fit.model.loss.functions...,
-)
+    term1 = _unwrap(loss(terms[1]))
+    L = typeof(term1).name
 
-# RAM + SemML
-χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemML) =
-    (nsamples(sem_fit) - 1) *
-    (sem_fit.minimum - logdet(observed.obs_cov) - nobserved_vars(observed))
-
-# bollen, p. 115, only correct for GLS weight matrix
-χ²(sem_fit::SemFit, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemWLS) =
-    (nsamples(sem_fit) - 1) * sem_fit.minimum
-
-# FIML
-function χ²(sem_fit::SemFit, observed::SemObservedMissing, imp, loss_ml::SemFIML)
-    ll_H0 = minus2ll(sem_fit)
-    ll_H1 = minus2ll(observed)
-    chi2 = ll_H0 - ll_H1
-    return chi2
-end
-
-############################################################################################
-# Collections
-############################################################################################
-
-# SemFit splices loss functions ------------------------------------------------------------
-χ²(sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O}) =
-    χ²(sem_fit, sem_fit.model, sem_fit.model.sems[1].loss.functions[1])
-
-function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemWLS}
-    check_ensemble_length(model)
-    check_lossfun_types(model, L)
-    return (nsamples(model) - 1) * sem_fit.minimum
-end
+    # check that all SemLoss terms are of the same class (ML, FIML, WLS etc), ignore typeparams
+    for (i, term) in enumerate(terms)
+        lossterm = _unwrap(loss(term))
+        @assert lossterm isa SemLoss
+        if typeof(_unwrap(lossterm)).name != L
+            @error "SemLoss term #$i is $(typeof(_unwrap(lossterm)).name), expected $L. Heterogeneous loss functions are not supported"
+        end
+    end
 
-function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemML}
-    check_ensemble_length(model)
-    check_lossfun_types(model, L)
-    F_G = sem_fit.minimum
-    F_G -= sum([
-        w * (logdet(m.observed.obs_cov) + nobserved_vars(m.observed)) for
-        (w, m) in zip(model.weights, model.sems)
-    ])
-    return (nsamples(model) - 1) * F_G
+    return χ²(typeof(term1), fit, model)
 end
 
-function χ²(sem_fit::SemFit, model::SemEnsemble, lossfun::L) where {L <: SemFIML}
-    check_ensemble_length(model)
-    check_lossfun_types(model, L)
+χ²(::Type{<:SemWLS}, fit::SemFit, model::AbstractSem) = (nsamples(model) - 1) * fit.minimum
 
-    ll_H0 = minus2ll(sem_fit)
-    ll_H1 = sum(minus2ll.(observed.(models(model))))
-    chi2 = ll_H0 - ll_H1
-
-    return chi2
-end
-
-function check_ensemble_length(model)
-    for sem in model.sems
-        if length(sem.loss.functions) > 1
-            @error "A model for one of the groups contains multiple loss functions."
+function χ²(::Type{<:SemML}, fit::SemFit, model::AbstractSem)
+    G = sum(loss_terms(model)) do term
+        if issemloss(term)
+            data = observed(term)
+            something(weight(term), 1.0) * (logdet(obs_cov(data)) + nobserved_vars(data))
+        else
+            return 0.0
         end
     end
+    return (nsamples(model) - 1) * (fit.minimum - G)
 end
 
-function check_lossfun_types(model, type)
-    for sem in model.sems
-        for lossfun in sem.loss.functions
-            if !isa(lossfun, type)
-                @error "Your model(s) contain multiple lossfunctions with differing types."
-            end
-        end
-    end
+function χ²(::Type{<:SemFIML}, fit::SemFit, model::AbstractSem)
+    ll_H0 = minus2ll(fit)
+    ll_H1 = sum(minus2ll ∘ observed, sem_terms(model))
+    return ll_H0 - ll_H1
 end
diff --git a/src/frontend/fit/fitmeasures/df.jl b/src/frontend/fit/fitmeasures/df.jl
index 4d9025601..c2ba3a95b 100644
--- a/src/frontend/fit/fitmeasures/df.jl
+++ b/src/frontend/fit/fitmeasures/df.jl
@@ -10,13 +10,16 @@ df(sem_fit::SemFit) = df(sem_fit.model)
 
 df(model::AbstractSem) = n_dp(model) - nparams(model)
 
-function n_dp(model::AbstractSemSingle)
-    nvars = nobserved_vars(model)
+# length of Σ and μ (if present)
+function n_dp(implied::SemImplied)
+    nvars = nobserved_vars(implied)
     ndp = 0.5(nvars^2 + nvars)
-    if !isnothing(model.implied.μ)
+    if !isnothing(implied.μ)
         ndp += nvars
     end
     return ndp
 end
 
-n_dp(model::SemEnsemble) = sum(n_dp.(model.sems))
+n_dp(term::SemLoss) = n_dp(implied(term))
+
+n_dp(model::AbstractSem) = sum(n_dp ∘ loss, sem_terms(model))
diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl
index 2cb87d79c..f3c8d58ef 100644
--- a/src/frontend/fit/fitmeasures/minus2ll.jl
+++ b/src/frontend/fit/fitmeasures/minus2ll.jl
@@ -1,5 +1,5 @@
 """
-    minus2ll(sem_fit::SemFit)
+    minus2ll(fit::SemFit)
 
 Return the negative 2* log likelihood.
 """
@@ -9,52 +9,47 @@ function minus2ll end
 # Single Models
 ############################################################################################
 
-# SemFit splices loss functions ------------------------------------------------------------
-minus2ll(
-    sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: AbstractSemSingle, O},
-) = minus2ll(
-    sem_fit,
-    sem_fit.model.observed,
-    sem_fit.model.implied,
-    sem_fit.model.loss.functions...,
-)
+minus2ll(fit::SemFit) = minus2ll(fit.model, fit)
 
-minus2ll(sem_fit::SemFit, obs, imp, args...) = minus2ll(sem_fit.minimum, obs, imp, args...)
+function minus2ll(term::SemLoss, fit::SemFit)
+    minimum = objective(term, fit.solution)
+    return minus2ll(term, minimum)
+end
 
-# SemML ------------------------------------------------------------------------------------
-minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, loss_ml::SemML) =
-    nsamples(obs) * (minimum + log(2π) * nobserved_vars(obs))
+minus2ll(term::SemML, minimum::Number) =
+    nsamples(term) * (minimum + log(2π) * nobserved_vars(term))
 
 # WLS --------------------------------------------------------------------------------------
-minus2ll(minimum::Number, obs, imp::Union{RAM, RAMSymbolic}, loss_ml::SemWLS) = missing
+minus2ll(term::SemWLS, minimum::Number) = missing
 
 # compute likelihood for missing data - H0 -------------------------------------------------
 # -2ll = (∑ log(2π)*(nᵢ + mᵢ)) + F*n
-function minus2ll(minimum::Number, observed, imp::Union{RAM, RAMSymbolic}, loss_ml::SemFIML)
-    F = minimum * nsamples(observed)
-    F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), observed.patterns)
+function minus2ll(term::SemFIML, minimum::Number)
+    obs = observed(term)::SemObservedMissing
+    F = minimum * nsamples(obs)
+    F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), obs.patterns)
     return F
 end
 
 # compute likelihood for missing data - H1 -------------------------------------------------
 # -2ll =  ∑ log(2π)*(nᵢ + mᵢ) + ln(Σᵢ) + (mᵢ - μᵢ)ᵀ Σᵢ⁻¹ (mᵢ - μᵢ)) + tr(SᵢΣᵢ)
 function minus2ll(observed::SemObservedMissing)
-    # fit EM-based mean and cov if not yet fitted
-    # FIXME EM could be very computationally expensive
-    observed.em_model.fitted || em_mvn(observed)
-
-    Σ = observed.em_model.Σ
-    μ = observed.em_model.μ
+    Σ, μ = obs_cov(observed), obs_mean(observed)
 
+    # FIXME: this code is duplicate to objective(fiml, ...)
     F = sum(observed.patterns) do pat
         # implied covariance/mean
-        Σᵢ = Σ[pat.measured_mask, pat.measured_mask]
+        Σᵢ = Symmetric(Σ[pat.measured_mask, pat.measured_mask])
         Σᵢ_chol = cholesky!(Σᵢ)
         ld = logdet(Σᵢ_chol)
         Σᵢ⁻¹ = LinearAlgebra.inv!(Σᵢ_chol)
-        meandiffᵢ = pat.measured_mean - μ[pat.measured_mask]
+        μ_diffᵢ = pat.measured_mean - μ[pat.measured_mask]
 
-        F_one_pattern(meandiffᵢ, Σᵢ⁻¹, pat.measured_cov, ld, nsamples(pat))
+        F_pat = ld + dot(μ_diffᵢ, Σᵢ⁻¹, μ_diffᵢ)
+        if nsamples(pat) > 1
+            F_pat += dot(pat.measured_cov, Σᵢ⁻¹)
+        end
+        F_pat * nsamples(pat)
     end
 
     F += log(2π) * sum(pat -> nsamples(pat) * nmeasured_vars(pat), observed.patterns)
@@ -62,20 +57,5 @@ function minus2ll(observed::SemObservedMissing)
     return F
 end
 
-############################################################################################
-# Collection
-############################################################################################
-
-minus2ll(minimum, model::AbstractSemSingle) =
-    minus2ll(minimum, model.observed, model.implied, model.loss.functions...)
-
-function minus2ll(
-    sem_fit::SemFit{Mi, So, St, Mo, O} where {Mi, So, St, Mo <: SemEnsemble, O},
-)
-    m2ll = 0.0
-    for sem in sem_fit.model.sems
-        minimum = objective!(sem, sem_fit.solution)
-        m2ll += minus2ll(minimum, sem)
-    end
-    return m2ll
-end
+minus2ll(model::AbstractSem, fit::SemFit) =
+    sum(Base.Fix2(minus2ll, fit) ∘ _unwrap ∘ loss, sem_terms(model))
diff --git a/src/frontend/fit/fitmeasures/p.jl b/src/frontend/fit/fitmeasures/p.jl
index 3d4275f95..8166309cc 100644
--- a/src/frontend/fit/fitmeasures/p.jl
+++ b/src/frontend/fit/fitmeasures/p.jl
@@ -3,4 +3,4 @@
 
 Return the p value computed from the χ² test statistic.
 """
-p_value(sem_fit::SemFit) = 1 - cdf(Chisq(df(sem_fit)), χ²(sem_fit))
+p_value(sem_fit::SemFit) = ccdf(Chisq(df(sem_fit)), χ²(sem_fit))
diff --git a/src/frontend/fit/standard_errors/hessian.jl b/src/frontend/fit/standard_errors/hessian.jl
index 6ae53407f..80b96d337 100644
--- a/src/frontend/fit/standard_errors/hessian.jl
+++ b/src/frontend/fit/standard_errors/hessian.jl
@@ -35,20 +35,21 @@ function se_hessian(fit::SemFit; method = :finitediff)
 end
 
 # Addition functions -------------------------------------------------------------
-function H_scaling(model::AbstractSemSingle)
-    if length(model.loss.functions) > 1
-        @warn "Hessian scaling for multiple loss functions is not implemented yet"
-    end
-    return H_scaling(model.loss.functions[1], model)
-end
-
-H_scaling(lossfun::SemML, model::AbstractSemSingle) = 2 / (nsamples(model) - 1)
+H_scaling(loss::SemML) = 2 / (nsamples(loss) - 1)
 
-function H_scaling(lossfun::SemWLS, model::AbstractSemSingle)
+function H_scaling(loss::SemWLS)
     @warn "Standard errors for WLS are only correct if a GLS weight matrix (the default) is used."
-    return 2 / (nsamples(model) - 1)
+    return 2 / (nsamples(loss) - 1)
 end
 
-H_scaling(lossfun::SemFIML, model::AbstractSemSingle) = 2 / nsamples(model)
+H_scaling(loss::SemFIML) = 2 / nsamples(loss)
 
-H_scaling(model::SemEnsemble) = 2 / nsamples(model)
+function H_scaling(model::AbstractSem)
+    semterms = SEM.sem_terms(model)
+    if length(semterms) > 1
+        #@warn "Hessian scaling for multiple loss functions is not implemented yet"
+        return 2 / nsamples(model)
+    else
+        return length(semterms) >= 1 ? H_scaling(loss(semterms[1])) : 1.0
+    end
+end
diff --git a/src/frontend/fit/summary.jl b/src/frontend/fit/summary.jl
index 70bf6816c..f64cc46e1 100644
--- a/src/frontend/fit/summary.jl
+++ b/src/frontend/fit/summary.jl
@@ -33,8 +33,7 @@ function details(sem_fit::SemFit; show_fitmeasures = false, color = :light_cyan,
             key_length = length(string(k))
             print(k)
             print(repeat(" ", goal_length - key_length))
-            print(round(a[k]; digits = 2))
-            print("\n")
+            @printf("%.3g\n", a[k])
         end
     end
     print("\n")
diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl
new file mode 100644
index 000000000..91ea29699
--- /dev/null
+++ b/src/frontend/predict.jl
@@ -0,0 +1,119 @@
+abstract type SemScoresPredictMethod end
+
+struct SemRegressionScores <: SemScoresPredictMethod end
+struct SemBartlettScores <: SemScoresPredictMethod end
+struct SemAndersonRubinScores <: SemScoresPredictMethod end
+
+function SemScoresPredictMethod(method::Symbol)
+    if method == :regression
+        return SemRegressionScores()
+    elseif method == :Bartlett
+        return SemBartlettScores()
+    elseif method == :AndersonRubin
+        return SemAndersonRubinScores()
+    else
+        throw(ArgumentError("Unsupported prediction method: $method"))
+    end
+end
+
+predict_latent_scores(
+    fit::SemFit,
+    data::SemObserved = observed(sem_term(fit.model));
+    method::Symbol = :regression,
+) = predict_latent_scores(SemScoresPredictMethod(method), fit, data)
+
+predict_latent_scores(
+    method::SemScoresPredictMethod,
+    fit::SemFit,
+    data::SemObserved = observed(sem_term(fit.model)),
+) = predict_latent_scores(method, loss(sem_term(fit.model)), fit.solution, data)
+
+function inv_cov!(A::AbstractMatrix)
+    if istril(A)
+        A = LowerTriangular(A)
+    elseif istriu(A)
+        A = UpperTriangular(A)
+    else
+    end
+    A_chol = Cholesky(A)
+    return inv!(A_chol)
+end
+
+function latent_scores_operator(
+    ::SemRegressionScores,
+    model::SemLoss,
+    params::AbstractVector,
+)
+    implied = SEM.implied(model)
+    ram = implied.ram_matrices
+    lv_inds = latent_var_indices(ram)
+
+    A = materialize(ram.A, params)
+    lv_FA = ram.F * A[:, lv_inds]
+    lv_I_A⁻¹ = inv(I - A)[lv_inds, :]
+
+    S = materialize(ram.S, params)
+
+    cov_lv = lv_I_A⁻¹ * S * lv_I_A⁻¹'
+    Σ = implied.Σ
+    Σ⁻¹ = inv(Σ)
+    return cov_lv * lv_FA' * Σ⁻¹
+end
+
+function latent_scores_operator(::SemBartlettScores, model::SemLoss, params::AbstractVector)
+    implied = SEM.implied(model)
+    ram = implied.ram_matrices
+    lv_inds = latent_var_indices(ram)
+    A = materialize(ram.A, params)
+    lv_FA = ram.F * A[:, lv_inds]
+
+    S = materialize(ram.S, params)
+    obs_inds = observed_var_indices(ram)
+    ov_S⁻¹ = inv(S[obs_inds, obs_inds])
+
+    return inv(lv_FA' * ov_S⁻¹ * lv_FA) * lv_FA' * ov_S⁻¹
+end
+
+function predict_latent_scores(
+    method::SemScoresPredictMethod,
+    model::SemLoss,
+    params::AbstractVector,
+    data::SemObserved = observed(model),
+)
+    nobserved_vars(data) == nobserved_vars(model) || throw(
+        DimensionMismatch(
+            "Number of variables in data ($(nsamples(data))) does not match the number of observed variables in the model ($(nobserved_vars(model)))",
+        ),
+    )
+    length(params) == nparams(model) || throw(
+        DimensionMismatch(
+            "The length of parameters vector ($(length(params))) does not match the number of parameters in the model ($(nparams(model)))",
+        ),
+    )
+
+    implied = SEM.implied(model)
+    update!(EvaluationTargets(0.0, nothing, nothing), implied, params)
+    ram = implied.ram_matrices
+    lv_inds = latent_var_indices(ram)
+    A = materialize(ram.A, params)
+    lv_I_A⁻¹ = inv(I - A)[lv_inds, :]
+
+    lv_scores_op = latent_scores_operator(method, model, params)
+
+    data =
+        data.data .- (isnothing(data.obs_mean) ? mean(data.data, dims = 1) : data.obs_mean')
+    lv_scores = data * lv_scores_op'
+    if MeanStructure(implied) === HasMeanStructure
+        M = materialize(ram.M, params)
+        lv_scores .+= (lv_I_A⁻¹ * M)'
+    end
+
+    return lv_scores
+end
+
+predict_latent_scores(
+    model::SemLoss,
+    params::AbstractVector,
+    data::SemObserved = observed(model);
+    method::Symbol = :regression,
+) = predict_latent_scores(SemScoresPredictMethod(method), model, params, data)
diff --git a/src/frontend/pretty_printing.jl b/src/frontend/pretty_printing.jl
index c1cd72c2f..10829c532 100644
--- a/src/frontend/pretty_printing.jl
+++ b/src/frontend/pretty_printing.jl
@@ -28,9 +28,11 @@ end
 # Loss Functions, Implied,
 ##############################################################
 
-function Base.show(io::IO, struct_inst::SemLossFunction)
-    print_type_name(io, struct_inst)
-    print_field_types(io, struct_inst)
+function Base.show(io::IO, sem::SemLoss)
+    println(io, "Structural Equation Model Loss ($(nameof(typeof(sem))))")
+    println(io, "- Observed: $(nameof(typeof(observed(sem)))) ($(nsamples(sem)) samples)")
+    println(io, "- Implied: $(nameof(typeof(implied(sem)))) ($(nparams(sem)) parameters)")
+    println(io, "- Variables: $(nobserved_vars(sem)) observed, $(nlatent_vars(sem)) latent")
 end
 
 function Base.show(io::IO, struct_inst::SemImplied)
diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl
index c5ad010b3..dff5b3ab0 100644
--- a/src/frontend/specification/ParameterTable.jl
+++ b/src/frontend/specification/ParameterTable.jl
@@ -243,6 +243,18 @@ See [sort_vars!](@ref) for in-place version.
 """
 sort_vars(partable::ParameterTable) = sort_vars!(deepcopy(partable))
 
+function reorder_observed_vars!(partable::ParameterTable, new_order::AbstractVector{Symbol})
+    # just check that it's 1-to-1
+    source_to_dest_perm(
+        partable.observed_vars,
+        new_order,
+        one_to_one = true,
+        entities = "observed_vars",
+    )
+    copy!(partable.observed_vars, new_order)
+    return partable
+end
+
 # add a row --------------------------------------------------------------------------------
 
 function Base.push!(partable::ParameterTable, d::Union{AbstractDict{Symbol}, NamedTuple})
@@ -392,6 +404,18 @@ function update_se_hessian!(
     return update_partable!(partable, :se, params(fit), se)
 end
 
+function variance_params(partable::ParameterTable)
+    res = [
+        param for (param, rel, from, to) in zip(
+            partable.columns.param,
+            partable.columns.relation,
+            partable.columns.from,
+            partable.columns.to,
+        ) if (rel == :↔) && (from == to)
+    ]
+    unique!(res)
+end
+
 """
     param_values!(out::AbstractVector, partable::ParameterTable,
                   col::Symbol = :estimate)
@@ -589,3 +613,78 @@ lavaan_param_values(
     lav_col,
     lav_group,
 )
+
+"""
+    lavaan_model(partable::ParameterTable)
+
+Generate lavaan model definition from a `partable`.
+"""
+function lavaan_model(partable::ParameterTable)
+    latent_vars = Set(partable.variables.latent)
+    observed_vars = Set(partable.variables.observed)
+
+    variance_defs = Dict{Symbol, IOBuffer}()
+    latent_dep_defs = Dict{Symbol, IOBuffer}()
+    latent_regr_defs = Dict{Symbol, IOBuffer}()
+    observed_regr_defs = Dict{Symbol, IOBuffer}()
+
+    model = IOBuffer()
+    for (from, to, rel, param, value, free) in zip(
+        partable.columns.from,
+        partable.columns.to,
+        partable.columns.relation,
+        partable.columns.param,
+        partable.columns.value_fixed,
+        partable.columns.free,
+    )
+        function append_param(io)
+            if free
+                @assert param != :const
+                param == Symbol("") || write(io, "$param * ")
+            else
+                write(io, "$value * ")
+            end
+        end
+        function append_rhs(io)
+            if position(io) > 0
+                write(io, " + ")
+            end
+            append_param(io)
+            write(io, "$to")
+        end
+
+        if from == Symbol("1")
+            write(model, "$to ~ ")
+            append_param(model)
+            write(model, "1\n")
+        else
+            if rel == :↔
+                variance_def = get!(() -> IOBuffer(), variance_defs, from)
+                append_rhs(variance_def)
+            elseif rel == :→
+                if (from ∈ latent_vars) && (to ∈ observed_vars)
+                    latent_dep_def = get!(() -> IOBuffer(), latent_dep_defs, from)
+                    append_rhs(latent_dep_def)
+                elseif (from ∈ latent_vars) && (to ∈ latent_vars)
+                    latent_regr_def = get!(() -> IOBuffer(), latent_regr_defs, from)
+                    append_rhs(latent_regr_def)
+                else
+                    observed_regr_def = get!(() -> IOBuffer(), observed_regr_defs, from)
+                    append_rhs(observed_regr_def)
+                end
+            end
+        end
+    end
+    function write_rules(io, defs, relation)
+        vars = sort!(collect(keys(defs)))
+        for var in vars
+            write(io, String(var), " ", relation, " ")
+            write(io, String(take!(defs[var])), "\n")
+        end
+    end
+    write_rules(model, latent_dep_defs, "=~")
+    write_rules(model, latent_regr_defs, "~")
+    write_rules(model, observed_regr_defs, "~")
+    write_rules(model, variance_defs, "~~")
+    return String(take!(model))
+end
diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl
index 43fd87945..0507d4cf3 100644
--- a/src/frontend/specification/RAMMatrices.jl
+++ b/src/frontend/specification/RAMMatrices.jl
@@ -22,34 +22,23 @@ vars(ram::RAMMatrices) = ram.vars
 isobserved_var(ram::RAMMatrices, i::Integer) = ram.F.colptr[i+1] > ram.F.colptr[i]
 islatent_var(ram::RAMMatrices, i::Integer) = ram.F.colptr[i+1] == ram.F.colptr[i]
 
-# indices of observed variables in the order as they appear in ram.F rows
-function observed_var_indices(ram::RAMMatrices)
-    obs_inds = Vector{Int}(undef, nobserved_vars(ram))
-    @inbounds for i in 1:nvars(ram)
-        colptr = ram.F.colptr[i]
-        if ram.F.colptr[i+1] > colptr # is observed
-            obs_inds[ram.F.rowval[colptr]] = i
-        end
-    end
-    return obs_inds
-end
+# indices of observed variables, for order=:rows (default), the order is as they appear in ram.F rows
+# if order=:columns, the order is as they appear in the comined variables list (ram.F columns)
+observed_var_indices(ram::RAMMatrices; order::Symbol = :rows) =
+    nzcols_eachrow_to_col(ram.F; order)
 
 latent_var_indices(ram::RAMMatrices) = [i for i in axes(ram.F, 2) if islatent_var(ram, i)]
 
-# observed variables in the order as they appear in ram.F rows
-function observed_vars(ram::RAMMatrices)
+# observed variables, if order=:rows, the order is as they appear in ram.F rows
+# if order=:columns, the order is as they appear in the comined variables list (ram.F columns)
+function observed_vars(ram::RAMMatrices; order::Symbol = :rows)
+    order ∈ [:rows, :columns] ||
+        throw(ArgumentError("order kwarg should be :rows or :cols"))
     if isnothing(ram.vars)
         @warn "Your RAMMatrices do not contain variable names. Please make sure the order of variables in your data is correct!"
         return nothing
     else
-        obs_vars = Vector{Symbol}(undef, nobserved_vars(ram))
-        @inbounds for (i, v) in enumerate(vars(ram))
-            colptr = ram.F.colptr[i]
-            if ram.F.colptr[i+1] > colptr # is observed
-                obs_vars[ram.F.rowval[colptr]] = v
-            end
-        end
-        return obs_vars
+        return nzcols_eachrow_to_col(Base.Fix1(getindex, vars(ram)), ram.F; order = order)
     end
 end
 
@@ -62,6 +51,17 @@ function latent_vars(ram::RAMMatrices)
     end
 end
 
+function variance_params(ram::RAMMatrices)
+    S_diaginds = Set(diagind(ram.S))
+    varparams = Vector{Symbol}()
+    for (i, param) in enumerate(ram.params)
+        if any(∈(S_diaginds), param_occurences(ram.S, i))
+            push!(varparams, param)
+        end
+    end
+    return unique!(varparams)
+end
+
 ############################################################################################
 ### Constructor
 ############################################################################################
@@ -113,6 +113,17 @@ function RAMMatrices(;
     return RAMMatrices(A, S, F, M, copy(params), vars)
 end
 
+# copy RAMMatrices replacing the parameters vector
+# (e.g. when reordering parameters or adding new parameters to the ensemble model)
+RAMMatrices(ram::RAMMatrices; params::AbstractVector{Symbol}) = RAMMatrices(;
+    A = materialize(ram.A, SEM.params(ram)),
+    S = materialize(ram.S, SEM.params(ram)),
+    F = copy(ram.F),
+    M = !isnothing(ram.M) ? materialize(ram.M, SEM.params(ram)) : nothing,
+    params,
+    vars = ram.vars,
+)
+
 ############################################################################################
 ### get RAMMatrices from parameter table
 ############################################################################################
@@ -221,13 +232,7 @@ function RAMMatrices(
     return RAMMatrices(
         ParamsMatrix{T}(A_inds, A_consts, (n_vars, n_vars)),
         ParamsMatrix{T}(S_inds, S_consts, (n_vars, n_vars)),
-        sparse(
-            1:n_observed,
-            [vars_index[var] for var in partable.observed_vars],
-            ones(T, n_observed),
-            n_observed,
-            n_vars,
-        ),
+        eachrow_to_col(T, [vars_index[var] for var in partable.observed_vars], n_vars),
         !isnothing(M_inds) ? ParamsVector{T}(M_inds, M_consts, (n_vars,)) : nothing,
         params,
         vars_sorted,
@@ -240,6 +245,19 @@ Base.convert(
     params::Union{AbstractVector{Symbol}, Nothing} = nothing,
 ) = RAMMatrices(partable; params)
 
+# reorders the observed variables in the RAMMatrices, i.e. the order of the rows in F
+function reorder_observed_vars!(ram::RAMMatrices, new_order::AbstractVector{Symbol})
+    # just check that it's 1-to-1
+    src2dest = source_to_dest_perm(
+        observed_vars(ram),
+        new_order,
+        one_to_one = true,
+        entities = "observed_vars",
+    )
+    copy!(ram.F, ram.F[src2dest, :])
+    return ram
+end
+
 ############################################################################################
 ### get parameter table from RAMMatrices
 ############################################################################################
diff --git a/src/frontend/specification/Sem.jl b/src/frontend/specification/Sem.jl
index 33440e257..75ac73ead 100644
--- a/src/frontend/specification/Sem.jl
+++ b/src/frontend/specification/Sem.jl
@@ -1,7 +1,147 @@
+losstype(::Type{<:LossTerm{L, W, I}}) where {L, W, I} = L
+losstype(term::LossTerm) = losstype(typeof(term))
+loss(term::LossTerm) = term.loss
+weight(term::LossTerm) = term.weight
+id(term::LossTerm) = term.id
+
+"""
+    issemloss(term::LossTerm) -> Bool
+
+Check if a SEM model term is a SEM loss function ([@ref SemLoss]).
+"""
+issemloss(term::LossTerm) = isa(loss(term), SemLoss)
+
+for f in (
+    :implied,
+    :observed,
+    :nsamples,
+    :observed_vars,
+    :nobserved_vars,
+    :vars,
+    :nvars,
+    :latent_vars,
+    :nlatent_vars,
+    :params,
+    :nparams,
+)
+    @eval $f(term::LossTerm) = $f(loss(term))
+end
+
+function Base.show(io::IO, term::LossTerm)
+    if !isnothing(id(term))
+        print(io, ":$(id(term)): ")
+    end
+    print(io, nameof(losstype(term)))
+    if issemloss(term)
+        print(
+            io,
+            " ($(nsamples(term)) samples, $(nobserved_vars(term)) observed, $(nlatent_vars(term)) latent variables)",
+        )
+    end
+    if !isnothing(weight(term))
+        @printf(io, " w=%.3g", weight(term))
+    else
+        print(io, " w=1")
+    end
+end
+
 ############################################################################################
 # constructor for Sem types
 ############################################################################################
 
+function Sem(
+    loss_terms...;
+    params::Union{Vector{Symbol}, Nothing} = nothing,
+    default_sem_weights = :nsamples,
+)
+    default_sem_weights ∈ [:nsamples, :uniform, :one] ||
+        throw(ArgumentError("Unsupported default_sem_weights=:$default_sem_weights"))
+    # assemble a list of weighted losses and check params equality
+    terms = Vector{LossTerm}()
+    params = !isnothing(params) ? copy(params) : params
+    has_sem_weights = false
+    nsems = 0
+    for inp_term in loss_terms
+        if inp_term isa AbstractLoss
+            term = inp_term
+            term_w = nothing
+            term_id = nothing
+        elseif inp_term isa Pair
+            if inp_term[1] isa AbstractLoss
+                term, term_w = inp_term
+                term_id = nothing
+            elseif inp_term[2] isa AbstractLoss
+                term_id, term = inp_term
+                term_w = nothing
+            elseif inp_term[2] isa Pair
+                term_id, (term, term_w) = inp_term
+                isa(term, AbstractLoss) || throw(
+                    ArgumentError(
+                        "AbstractLoss expected as a second argument of a loss term double pair (id => loss => weight), $(nameof(typeof(term))) found",
+                    ),
+                )
+            end
+        elseif inp_term isa LossTerm
+            term_id = id(inp_term)
+            term = loss(inp_term)
+            term_w = weight(inp_term)
+        else
+            throw(
+                ArgumentError(
+                    "[id =>] AbstractLoss [=> weight] expected as a loss term, $(nameof(typeof(inp_term))) found",
+                ),
+            )
+        end
+
+        if term isa SemLoss
+            nsems += 1
+            has_sem_weights |= !isnothing(term_w)
+            # check integrity
+            if isnothing(params)
+                params = SEM.params(term)
+            elseif params != SEM.params(term)
+                # FIXME the suggestion might no longer be relevant, since ParTable also stores params order
+                error("The parameters of your SEM models do not match.\n
+Maybe you tried to specify models of an ensemble via ParameterTables.\n
+In that case, you may use RAMMatrices instead.")
+            end
+            check_observed_vars(term)
+        elseif !(term isa AbstractLoss)
+            throw(
+                ArgumentError(
+                    "AbstractLoss term expected at $(length(terms)+1) position, $(nameof(typeof(term))) found",
+                ),
+            )
+        end
+        push!(terms, LossTerm(term, term_id, term_w))
+    end
+    isnothing(params) && throw(ErrorException("No SEM models provided."))
+
+    if !has_sem_weights && nsems > 1
+        # set the weights of SEMs in the ensemble
+        if default_sem_weights == :nsamples
+            # weight SEM by the number of samples
+            nsamples_total = sum(nsamples(term) for term in terms if issemloss(term))
+            for (i, term) in enumerate(terms)
+                if issemloss(term)
+                    terms[i] =
+                        LossTerm(loss(term), id(term), nsamples(term) / nsamples_total)
+                end
+            end
+        elseif default_sem_weights == :uniform # uniform weights
+            for (i, term) in enumerate(terms)
+                if issemloss(term)
+                    terms[i] = LossTerm(loss(term), id(term), 1 / nsems)
+                end
+            end
+        elseif default_sem_weights == :one # do nothing
+        end
+    end
+
+    terms_tuple = Tuple(terms)
+    return Sem{typeof(terms_tuple)}(terms_tuple, params)
+end
+
 function Sem(;
     specification = ParameterTable,
     observed::O = SemObservedData,
@@ -13,85 +153,107 @@ function Sem(;
 
     set_field_type_kwargs!(kwdict, observed, implied, loss, O, I)
 
-    observed, implied, loss = get_fields!(kwdict, specification, observed, implied, loss)
-
-    sem = Sem(observed, implied, loss)
+    loss = get_fields!(kwdict, specification, observed, implied, loss)
 
-    return sem
+    return Sem(loss...)
 end
 
+############################################################################################
+# functions
+############################################################################################
+
+params(model::AbstractSem) = model.params
+
 """
-    implied(model::AbstractSemSingle) -> SemImplied
+    loss_terms(model::AbstractSem)
+
+Returns a tuple of all [`LossTerm`](@ref) weighted terms in the SEM model.
 
-Returns the [*implied*](@ref SemImplied) part of a model.
+See also [`sem_terms`](@ref), [`loss_term`](@ref).
 """
-implied(model::AbstractSemSingle) = model.implied
+loss_terms(model::AbstractSem) = model.loss_terms
+nloss_terms(model::AbstractSem) = length(loss_terms(model))
 
-nvars(model::AbstractSemSingle) = nvars(implied(model))
-nobserved_vars(model::AbstractSemSingle) = nobserved_vars(implied(model))
-nlatent_vars(model::AbstractSemSingle) = nlatent_vars(implied(model))
+"""
+    sem_terms(model::AbstractSem)
 
-vars(model::AbstractSemSingle) = vars(implied(model))
-observed_vars(model::AbstractSemSingle) = observed_vars(implied(model))
-latent_vars(model::AbstractSemSingle) = latent_vars(implied(model))
+Returns a tuple of all weighted SEM terms in the SEM model.
 
-params(model::AbstractSemSingle) = params(implied(model))
-nparams(model::AbstractSemSingle) = nparams(implied(model))
+In comparison to [`loss_terms`](@ref) that returns all model terms, including e.g.
+regularization terms, this function returns only the [`SemLoss`] terms.
 
+See also [`loss_terms`](@ref), [`sem_term`](@ref).
 """
-    observed(model::AbstractSemSingle) -> SemObserved
+sem_terms(model::AbstractSem) = Tuple(term for term in loss_terms(model) if issemloss(term))
+nsem_terms(model::AbstractSem) = sum(issemloss, loss_terms(model))
+
+nsamples(model::AbstractSem) =
+    sum(term -> issemloss(term) ? nsamples(term) : 0, loss_terms(model))
 
-Returns the [*observed*](@ref SemObserved) part of a model.
 """
-observed(model::AbstractSemSingle) = model.observed
+    loss_term(model::AbstractSem, id::Any) -> AbstractLoss
 
-nsamples(model::AbstractSemSingle) = nsamples(observed(model))
+Returns the loss term with the specified `id` from the `model`.
+Throws an error if the model has no term with the specified `id`.
 
+See also [`loss_terms`](@ref).
 """
-    loss(model::AbstractSemSingle) -> SemLoss
+function loss_term(model::AbstractSem, id::Any)
+    for term in loss_terms(model)
+        if SEM.id(term) == id
+            return loss(term)
+        end
+    end
+    error("No loss term with id=$id found")
+end
 
-Returns the [*loss*](@ref SemLoss) function of a model.
 """
-loss(model::AbstractSemSingle) = model.loss
+    sem_term(model::AbstractSem, [id]) -> SemLoss
 
-# sum of samples in all sub-models
-nsamples(ensemble::SemEnsemble) = sum(nsamples, ensemble.sems)
+Returns the SEM loss term with the specified `id` from the `model`.
+Throws an error if the model has no term with the specified `id` or
+if it is not of a [`SemLoss`](@ref) type.
 
-function SemFiniteDiff(;
-    specification = ParameterTable,
-    observed::O = SemObservedData,
-    implied::I = RAM,
-    loss::L = SemML,
-    kwargs...,
-) where {O, I, L}
-    kwdict = Dict{Symbol, Any}(kwargs...)
-
-    set_field_type_kwargs!(kwdict, observed, implied, loss, O, I)
+If no `id` is specified and the model contains only one SEM term, the term is returned.
+Throws an error if the model contains multiple SEM terms.
 
-    observed, implied, loss = get_fields!(kwdict, specification, observed, implied, loss)
-
-    sem = SemFiniteDiff(observed, implied, loss)
+See also [`loss_term`](@ref), [`sem_terms`](@ref).
+"""
+function sem_term(model::AbstractSem, id::Any)
+    term = loss_term(model, id)
+    issemloss(term) || error("Loss term with id=$id ($(typeof(term))) is not a SEM term")
+    return term
+end
 
-    return sem
+function sem_term(model::AbstractSem, id::Nothing = nothing)
+    if nsem_terms(model) != 1
+        error(
+            "Model contains $(nsem_terms(model)) SEM terms, you have to specify a specific term",
+        )
+    end
+    for term in loss_terms(model)
+        issemloss(term) && return loss(term)
+    end
+    error("Unreachable reached")
 end
 
-############################################################################################
-# functions
-############################################################################################
+# wrappers arounds a single SemLoss term
+observed(model::AbstractSem, id::Nothing = nothing) = observed(sem_term(model, id))
+implied(model::AbstractSem, id::Nothing = nothing) = implied(sem_term(model, id))
+vars(model::AbstractSem, id::Nothing = nothing) = vars(implied(model, id))
+observed_vars(model::AbstractSem, id::Nothing = nothing) = observed_vars(implied(model, id))
+latent_vars(model::AbstractSem, id::Nothing = nothing) = latent_vars(implied(model, id))
 
 function set_field_type_kwargs!(kwargs, observed, implied, loss, O, I)
     kwargs[:observed_type] = O <: Type ? observed : typeof(observed)
     kwargs[:implied_type] = I <: Type ? implied : typeof(implied)
     if loss isa SemLoss
-        kwargs[:loss_types] = [
-            lossfun isa SemLossFunction ? typeof(lossfun) : lossfun for
-            lossfun in loss.functions
-        ]
-    elseif applicable(iterate, loss)
         kwargs[:loss_types] =
-            [lossfun isa SemLossFunction ? typeof(lossfun) : lossfun for lossfun in loss]
+            [aloss isa SemLoss ? typeof(aloss) : aloss for aloss in loss.functions]
+    elseif applicable(iterate, loss)
+        kwargs[:loss_types] = [aloss isa SemLoss ? typeof(aloss) : aloss for aloss in loss]
     else
-        kwargs[:loss_types] = [loss isa SemLossFunction ? typeof(loss) : loss]
+        kwargs[:loss_types] = [loss isa SemLoss ? typeof(loss) : loss]
     end
 end
 
@@ -105,104 +267,75 @@ function get_fields!(kwargs, specification, observed, implied, loss)
     if !isa(observed, SemObserved)
         observed = observed(; specification, kwargs...)
     end
-    kwargs[:observed] = observed
 
     # implied
     if !isa(implied, SemImplied)
-        implied = implied(; specification, kwargs...)
+        # FIXME remove this implicit logic
+        # SemWLS only accepts vech-ed implied covariance
+        if isa(loss, Type) && (loss <: SemWLS) && !haskey(kwargs, :vech)
+            implied_kwargs = copy(kwargs)
+            implied_kwargs[:vech] = true
+        else
+            implied_kwargs = kwargs
+        end
+        implied = implied(specification; implied_kwargs...)
+    end
+
+    if observed_vars(observed) != observed_vars(implied)
+        throw(ArgumentError("observed_vars differ between the observed and the implied"))
     end
 
-    kwargs[:implied] = implied
     kwargs[:nparams] = nparams(implied)
 
     # loss
-    loss = get_SemLoss(loss; specification, kwargs...)
+    loss = get_SemLoss(loss, observed, implied; kwargs...)
     kwargs[:loss] = loss
 
-    return observed, implied, loss
+    return loss
 end
 
 # construct loss field
-function get_SemLoss(loss; kwargs...)
+function get_SemLoss(loss, observed, implied; kwargs...)
     if loss isa SemLoss
-        nothing
+        return loss
     elseif applicable(iterate, loss)
-        loss_out = []
-        for lossfun in loss
-            if isa(lossfun, SemLossFunction)
-                push!(loss_out, lossfun)
+        loss_out = AbstractLoss[]
+        for aloss in loss
+            if isa(aloss, AbstractLoss)
+                push!(loss_out, aloss)
+            elseif aloss <: SemLoss{O, I} where {O, I}
+                res = aloss(observed, implied; kwargs...)
+                push!(loss_out, res)
             else
-                lossfun = lossfun(; kwargs...)
-                push!(loss_out, lossfun)
+                res = aloss(; kwargs...)
+                push!(loss_out, res)
             end
         end
-        loss = SemLoss(loss_out...; kwargs...)
+        return Tuple(loss_out)
     else
-        if !isa(loss, SemLossFunction)
-            loss = SemLoss(loss(; kwargs...); kwargs...)
-        else
-            loss = SemLoss(loss; kwargs...)
-        end
+        return (loss(observed, implied; kwargs...),)
     end
-    return loss
+end
+
+function update_observed(sem::Sem, new_observed; kwargs...)
+    new_terms = Tuple(
+        update_observed(lossterm.loss, new_observed; kwargs...) for
+        lossterm in loss_terms(sem)
+    )
+    return Sem(new_terms...)
 end
 
 ##############################################################
 # pretty printing
 ##############################################################
 
-#= function Base.show(io::IO, sem::Sem{O, I, L, D})  where {O, I, L, D}
-    lossfuntypes = @. nameof(typeof(sem.loss.functions))
-    print(io, "Sem{$(nameof(O)), $(nameof(I)), $lossfuntypes, $(nameof(D))}")
-end =#
-
-function Base.show(io::IO, sem::Sem{O, I, L}) where {O, I, L}
-    lossfuntypes = @. string(nameof(typeof(sem.loss.functions)))
-    lossfuntypes = "   " .* lossfuntypes .* ("\n")
-    print(io, "Structural Equation Model \n")
-    print(io, "- Loss Functions \n")
-    print(io, lossfuntypes...)
-    print(io, "- Fields \n")
-    print(io, "   observed:    $(nameof(O)) \n")
-    print(io, "   implied:     $(nameof(I)) \n")
-end
-
-function Base.show(io::IO, sem::SemFiniteDiff{O, I, L}) where {O, I, L}
-    lossfuntypes = @. string(nameof(typeof(sem.loss.functions)))
-    lossfuntypes = "   " .* lossfuntypes .* ("\n")
-    print(io, "Structural Equation Model : Finite Diff Approximation\n")
-    print(io, "- Loss Functions \n")
-    print(io, lossfuntypes...)
-    print(io, "- Fields \n")
-    print(io, "   observed:    $(nameof(O)) \n")
-    print(io, "   implied:     $(nameof(I)) \n")
-end
-
-function Base.show(io::IO, loss::SemLoss)
-    lossfuntypes = @. string(nameof(typeof(loss.functions)))
-    lossfuntypes = "   " .* lossfuntypes .* ("\n")
-    print(io, "SemLoss \n")
-    print(io, "- Loss Functions \n")
-    print(io, lossfuntypes...)
-    print(io, "- Weights \n")
-    for weight in loss.weights
-        if isnothing(weight.w)
-            print(io, "   one \n")
-        else
-            print(io, "$(round.(weight.w, digits = 2)) \n")
-        end
-    end
-end
-
-function Base.show(io::IO, models::SemEnsemble)
-    print(io, "SemEnsemble \n")
-    print(io, "- Number of Models: $(models.n) \n")
-    print(io, "- Weights: $(round.(models.weights, digits = 2)) \n")
-
-    print(io, "\n", "Models: \n")
-    print(io, "===============================================", "\n")
-    for (model, i) in zip(models.sems, 1:models.n)
-        print(io, "---------------------- ", i, " ----------------------", "\n")
-        print(io, model)
+function Base.show(io::IO, sem::AbstractSem)
+    println(io, "Structural Equation Model ($(nameof(typeof(sem))))")
+    println(io, "- $(nparams(sem)) parameters")
+    println(io, "- Loss terms:")
+    for term in loss_terms(sem)
+        print(io, "  - ")
+        print(io, term)
+        println(io)
     end
 end
diff --git a/src/implied/RAM/generic.jl b/src/implied/RAM/generic.jl
index 30bd29bf4..778c77aca 100644
--- a/src/implied/RAM/generic.jl
+++ b/src/implied/RAM/generic.jl
@@ -8,13 +8,11 @@ Model implied covariance and means via RAM notation.
 
     RAM(;
         specification,
-        meanstructure = false,
         gradient = true,
         kwargs...)
 
 # Arguments
 - `specification`: either a `RAMMatrices` or `ParameterTable` object
-- `meanstructure::Bool`: does the model have a meanstructure?
 - `gradient::Bool`: is gradient-based optimization used
 
 # Extended help
@@ -60,7 +58,6 @@ Additional interfaces
 - `F⨉I_A⁻¹(::RAM)` -> ``F(I-A)^{-1}``
 - `F⨉I_A⁻¹S(::RAM)` -> ``F(I-A)^{-1}S``
 - `I_A(::RAM)` -> ``I-A``
-- `has_meanstructure(::RAM)` -> `Val{Bool}` does the model have a meanstructure?
 
 Only available in gradient! calls:
 - `I_A⁻¹(::RAM)` -> ``(I-A)^{-1}``
@@ -95,14 +92,14 @@ end
 ### Constructors
 ############################################################################################
 
-function RAM(;
-    specification::SemSpecification,
+function RAM(
+    spec::SemSpecification;
     #vech = false,
     gradient_required = true,
-    meanstructure = false,
+    sparse_S::Bool = true,
     kwargs...,
 )
-    ram_matrices = convert(RAMMatrices, specification)
+    ram_matrices = convert(RAMMatrices, spec)
 
     # get dimensions of the model
     n_par = nparams(ram_matrices)
@@ -112,7 +109,9 @@ function RAM(;
     #preallocate arrays
     rand_params = randn(Float64, n_par)
     A_pre = check_acyclic(materialize(ram_matrices.A, rand_params))
-    S_pre = materialize(ram_matrices.S, rand_params)
+    S_pre = Symmetric(
+        (sparse_S ? sparse_materialize : materialize)(ram_matrices.S, rand_params),
+    )
     F = copy(ram_matrices.F)
 
     # pre-allocate some matrices
@@ -130,13 +129,8 @@ function RAM(;
     end
 
     # μ
-    if meanstructure
+    if !isnothing(ram_matrices.M)
         MS = HasMeanStruct
-        !isnothing(ram_matrices.M) || throw(
-            ArgumentError(
-                "You set `meanstructure = true`, but your model specification contains no mean parameters.",
-            ),
-        )
         M_pre = materialize(ram_matrices.M, rand_params)
         ∇M = gradient_required ? sparse_gradient(ram_matrices.M) : nothing
         μ = zeros(n_obs)
@@ -158,7 +152,7 @@ function RAM(;
         F⨉I_A⁻¹,
         F⨉I_A⁻¹S,
         I_A,
-        copy(I_A),
+        similar(I_A),
         ∇A,
         ∇S,
         ∇M,
@@ -169,7 +163,7 @@ end
 ### methods
 ############################################################################################
 
-function update!(targets::EvaluationTargets, implied::RAM, model::AbstractSemSingle, params)
+function update!(targets::EvaluationTargets, implied::RAM, params)
     materialize!(implied.A, implied.ram_matrices.A, params)
     materialize!(implied.S, implied.ram_matrices.S, params)
     if !isnothing(implied.M)
diff --git a/src/implied/RAM/symbolic.jl b/src/implied/RAM/symbolic.jl
index 44ad4949d..443dc72c1 100644
--- a/src/implied/RAM/symbolic.jl
+++ b/src/implied/RAM/symbolic.jl
@@ -11,12 +11,10 @@ Subtype of `SemImplied` that implements the RAM notation with symbolic precomput
         gradient = true,
         hessian = false,
         approximate_hessian = false,
-        meanstructure = false,
         kwargs...)
 
 # Arguments
 - `specification`: either a `RAMMatrices` or `ParameterTable` object
-- `meanstructure::Bool`: does the model have a meanstructure?
 - `gradient::Bool`: is gradient-based optimization used
 - `hessian::Bool`: is hessian-based optimization used
 - `approximate_hessian::Bool`: for hessian based optimization: should the hessian be approximated
@@ -39,8 +37,8 @@ Jacobians (only available in gradient! calls)
 - `∇Σ(::RAMSymbolic)` -> ``∂vec(Σ)/∂θᵀ``
 - `∇μ(::RAMSymbolic)` -> ``∂μ/∂θᵀ``
 
-- `∇Σ_function(::RAMSymbolic)` -> function to overwrite `∇Σ` in place,
-    i.e. `∇Σ_function(∇Σ, θ)`. Normally, you do not want to use this but simply
+- `∇Σ_eval!(::RAMSymbolic)` -> function to evaluate `∇Σ` in place,
+    i.e. `∇Σ_eval!(∇Σ, θ)`. Normally, you do not want to use this but simply
     query `∇Σ(::RAMSymbolic)`.
 
 Hessians
@@ -49,9 +47,6 @@ hessian matrices".
 Therefore, we desribe it at length in the mathematical appendix of the online documentation,
 and the relevant interfaces are omitted here.
 
-Additional interfaces
-- `has_meanstructure(::RAMSymbolic)` -> `Val{Bool}` does the model have a meanstructure?
-
 ## RAM notation
 The model implied covariance matrix is computed as
 ```math
@@ -62,23 +57,19 @@ and for models with a meanstructure, the model implied means are computed as
     \mu = F(I-A)^{-1}M
 ```
 """
-struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, S1, S2, S3, V2, F4, A4, F5, A5} <:
-       SemImpliedSymbolic
+struct RAMSymbolic{MS, F1, F2, F3, A1, A2, A3, V2, F4, A4, F5, A5} <: SemImpliedSymbolic
     meanstruct::MS
     hessianeval::ExactHessian
-    Σ_function::F1
-    ∇Σ_function::F2
-    ∇²Σ_function::F3
+    Σ_eval!::F1
+    ∇Σ_eval!::F2
+    ∇²Σ_eval!::F3
     Σ::A1
     ∇Σ::A2
     ∇²Σ::A3
-    Σ_symbolic::S1
-    ∇Σ_symbolic::S2
-    ∇²Σ_symbolic::S3
     ram_matrices::V2
-    μ_function::F4
+    μ_eval!::F4
     μ::A4
-    ∇μ_function::F5
+    ∇μ_eval!::F5
     ∇μ::A5
 
     RAMSymbolic{MS}(args...) where {MS <: MeanStruct} =
@@ -89,18 +80,16 @@ end
 ### Constructors
 ############################################################################################
 
-function RAMSymbolic(;
-    specification::SemSpecification,
-    loss_types = nothing,
-    vech = false,
-    simplify_symbolics = false,
-    gradient = true,
-    hessian = false,
-    meanstructure = false,
-    approximate_hessian = false,
+function RAMSymbolic(
+    spec::SemSpecification;
+    vech::Bool = false,
+    simplify_symbolics::Bool = false,
+    gradient::Bool = true,
+    hessian::Bool = false,
+    approximate_hessian::Bool = false,
     kwargs...,
 )
-    ram_matrices = convert(RAMMatrices, specification)
+    ram_matrices = convert(RAMMatrices, spec)
 
     n_par = nparams(ram_matrices)
     par = (Symbolics.@variables θ[1:n_par])[1]
@@ -110,88 +99,78 @@ function RAMSymbolic(;
     M = !isnothing(ram_matrices.M) ? materialize(Num, ram_matrices.M, par) : nothing
     F = ram_matrices.F
 
-    if !isnothing(loss_types) && any(T -> T <: SemWLS, loss_types)
-        vech = true
-    end
-
     I_A⁻¹ = neumann_series(A)
 
     # Σ
-    Σ_symbolic = eval_Σ_symbolic(S, I_A⁻¹, F; vech = vech, simplify = simplify_symbolics)
-    #print(Symbolics.build_function(Σ_symbolic)[2])
-    Σ_function = Symbolics.build_function(Σ_symbolic, par, expression = Val{false})[2]
-    Σ = zeros(size(Σ_symbolic))
-    precompile(Σ_function, (typeof(Σ), Vector{Float64}))
+    Σ_sym = eval_Σ_symbolic(S, I_A⁻¹, F; vech, simplify = simplify_symbolics)
+    #print(Symbolics.build_function(Σ_sym)[2])
+    Σ_eval! = Symbolics.build_function(Σ_sym, par, expression = Val{false})[2]
+    Σ = zeros(size(Σ_sym))
+    precompile(Σ_eval!, (typeof(Σ), Vector{Float64}))
 
     # ∇Σ
     if gradient
-        ∇Σ_symbolic = Symbolics.sparsejacobian(vec(Σ_symbolic), [par...])
-        ∇Σ_function = Symbolics.build_function(∇Σ_symbolic, par, expression = Val{false})[2]
-        constr = findnz(∇Σ_symbolic)
-        ∇Σ = sparse(constr[1], constr[2], fill(1.0, nnz(∇Σ_symbolic)), size(∇Σ_symbolic)...)
-        precompile(∇Σ_function, (typeof(∇Σ), Vector{Float64}))
+        ∇Σ_sym = Symbolics.sparsejacobian(vec(Σ_sym), [par...])
+        ∇Σ_eval! = Symbolics.build_function(∇Σ_sym, par, expression = Val{false})[2]
+        constr = findnz(∇Σ_sym)
+        ∇Σ = sparse(constr[1], constr[2], fill(1.0, nnz(∇Σ_sym)), size(∇Σ_sym)...)
+        precompile(∇Σ_eval!, (typeof(∇Σ), Vector{Float64}))
     else
-        ∇Σ_symbolic = nothing
-        ∇Σ_function = nothing
+        ∇Σ_eval! = nothing
         ∇Σ = nothing
     end
 
     if hessian && !approximate_hessian
-        n_sig = length(Σ_symbolic)
-        ∇²Σ_symbolic_vec = [Symbolics.sparsehessian(σᵢ, [par...]) for σᵢ in vec(Σ_symbolic)]
+        n_sig = length(Σ_sym)
+        ∇²Σ_sym_vec = [Symbolics.sparsehessian(σᵢ, [par...]) for σᵢ in vec(Σ_sym)]
 
         @variables J[1:n_sig]
-        ∇²Σ_symbolic = zeros(Num, n_par, n_par)
+        ∇²Σ_sym = zeros(Num, n_par, n_par)
         for i in 1:n_sig
-            ∇²Σ_symbolic += J[i] * ∇²Σ_symbolic_vec[i]
+            ∇²Σ_sym += J[i] * ∇²Σ_sym_vec[i]
         end
 
-        ∇²Σ_function =
-            Symbolics.build_function(∇²Σ_symbolic, J, par, expression = Val{false})[2]
+        ∇²Σ_eval! = Symbolics.build_function(∇²Σ_sym, J, par, expression = Val{false})[2]
         ∇²Σ = zeros(n_par, n_par)
     else
-        ∇²Σ_symbolic = nothing
-        ∇²Σ_function = nothing
+        ∇²Σ_sym = nothing
+        ∇²Σ_eval! = nothing
         ∇²Σ = nothing
     end
 
     # μ
-    if meanstructure
+    if !isnothing(ram_matrices.M)
         MS = HasMeanStruct
-        μ_symbolic = eval_μ_symbolic(M, I_A⁻¹, F; simplify = simplify_symbolics)
-        μ_function = Symbolics.build_function(μ_symbolic, par, expression = Val{false})[2]
-        μ = zeros(size(μ_symbolic))
+        μ_sym = eval_μ_symbolic(M, I_A⁻¹, F; simplify = simplify_symbolics)
+        μ_eval! = Symbolics.build_function(μ_sym, par, expression = Val{false})[2]
+        μ = zeros(size(μ_sym))
         if gradient
-            ∇μ_symbolic = Symbolics.jacobian(μ_symbolic, [par...])
-            ∇μ_function =
-                Symbolics.build_function(∇μ_symbolic, par, expression = Val{false})[2]
+            ∇μ_sym = Symbolics.jacobian(μ_sym, [par...])
+            ∇μ_eval! = Symbolics.build_function(∇μ_sym, par, expression = Val{false})[2]
             ∇μ = zeros(size(F, 1), size(par, 1))
         else
-            ∇μ_function = nothing
+            ∇μ_eval! = nothing
             ∇μ = nothing
         end
     else
         MS = NoMeanStruct
-        μ_function = nothing
+        μ_eval! = nothing
         μ = nothing
-        ∇μ_function = nothing
+        ∇μ_eval! = nothing
         ∇μ = nothing
     end
 
     return RAMSymbolic{MS}(
-        Σ_function,
-        ∇Σ_function,
-        ∇²Σ_function,
+        Σ_eval!,
+        ∇Σ_eval!,
+        ∇²Σ_eval!,
         Σ,
         ∇Σ,
         ∇²Σ,
-        Σ_symbolic,
-        ∇Σ_symbolic,
-        ∇²Σ_symbolic,
         ram_matrices,
-        μ_function,
+        μ_eval!,
         μ,
-        ∇μ_function,
+        ∇μ_eval!,
         ∇μ,
     )
 end
@@ -200,21 +179,16 @@ end
 ### objective, gradient, hessian
 ############################################################################################
 
-function update!(
-    targets::EvaluationTargets,
-    implied::RAMSymbolic,
-    model::AbstractSemSingle,
-    par,
-)
-    implied.Σ_function(implied.Σ, par)
+function update!(targets::EvaluationTargets, implied::RAMSymbolic, par)
+    implied.Σ_eval!(implied.Σ, par)
     if MeanStruct(implied) === HasMeanStruct
-        implied.μ_function(implied.μ, par)
+        implied.μ_eval!(implied.μ, par)
     end
 
     if is_gradient_required(targets) || is_hessian_required(targets)
-        implied.∇Σ_function(implied.∇Σ, par)
+        implied.∇Σ_eval!(implied.∇Σ, par)
         if MeanStruct(implied) === HasMeanStruct
-            implied.∇μ_function(implied.∇μ, par)
+            implied.∇μ_eval!(implied.∇μ, par)
         end
     end
 end
@@ -236,10 +210,10 @@ end
 ############################################################################################
 
 # expected covariations of observed vars
-function eval_Σ_symbolic(S, I_A⁻¹, F; vech = false, simplify = false)
+function eval_Σ_symbolic(S, I_A⁻¹, F; vech::Bool = false, simplify::Bool = false)
     Σ = F * I_A⁻¹ * S * permutedims(I_A⁻¹) * permutedims(F)
     Σ = Array(Σ)
-    vech && (Σ = Σ[tril(trues(size(F, 1), size(F, 1)))])
+    vech && (Σ = SEM.vech(Σ))
     if simplify
         Threads.@threads for i in eachindex(Σ)
             Σ[i] = Symbolics.simplify(Σ[i])
diff --git a/src/implied/empty.jl b/src/implied/empty.jl
index e87dc72d1..6e10bf26c 100644
--- a/src/implied/empty.jl
+++ b/src/implied/empty.jl
@@ -43,7 +43,7 @@ end
 ### methods
 ############################################################################################
 
-update!(targets::EvaluationTargets, implied::ImpliedEmpty, par, model) = nothing
+update!(targets::EvaluationTargets, implied::ImpliedEmpty, par) = nothing
 
 ############################################################################################
 ### Recommended methods
diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl
index 0ef542f70..455c59ee8 100644
--- a/src/loss/ML/FIML.jl
+++ b/src/loss/ML/FIML.jl
@@ -1,20 +1,81 @@
 ############################################################################################
 ### Types
 ############################################################################################
+
+# state of SemFIML for a specific missing pattern (`SemObservedMissingPattern` type)
+struct SemFIMLPattern{T}
+    ∇ind::Vector{Int}   # indices of co-observed variable pairs
+    Σ⁻¹::Matrix{T}      # preallocated inverse of implied cov
+    logdet::Ref{T}      # logdet of implied cov
+    μ_diff::Vector{T}   # implied mean difference
+end
+
+# allocate arrays for pattern FIML
+function SemFIMLPattern(pat::SemObservedMissingPattern)
+    nobs_vars = nobserved_vars(pat)
+    nmeas_vars = nmeasured_vars(pat)
+
+    # generate linear indicies of co-observed variable pairs for each pattern
+    Σ_linind = LinearIndices((nobs_vars, nobs_vars))
+    pat_vars = findall(pat.measured_mask)
+    ∇ind = vec(Σ_linind[pat_vars, pat_vars])
+
+    return SemFIMLPattern(∇ind, zeros(nmeas_vars, nmeas_vars), Ref(NaN), zeros(nmeas_vars))
+end
+
+function prepare!(fiml::SemFIMLPattern, pat::SemObservedMissingPattern, Σ::AbstractMatrix, μ::AbstractVector)
+    @inbounds @. @views begin
+        fiml.Σ⁻¹ = Σ[pat.measured_mask, pat.measured_mask]
+        fiml.μ_diff = pat.measured_mean - μ[pat.measured_mask]
+    end
+    Σ_chol = cholesky!(Symmetric(fiml.Σ⁻¹))
+    fiml.logdet[] = logdet(Σ_chol)
+    LinearAlgebra.inv!(Σ_chol) # updates fiml.Σ⁻¹
+    return fiml
+end
+
+prepare!(fiml::SemFIMLPattern, pat::SemObservedMissingPattern, implied::SemImplied) =
+	prepare!(fiml, pat, implied.Σ, implied.μ)
+
+function objective(fiml::SemFIMLPattern{T}, pat::SemObservedMissingPattern) where {T}
+    F = fiml.logdet[] + dot(fiml.μ_diff, fiml.Σ⁻¹, fiml.μ_diff)
+    if nsamples(pat) > 1
+        F += dot(pat.measured_cov, fiml.Σ⁻¹)
+        F *= nsamples(pat)
+    end
+    return F
+end
+
+function gradient!(JΣ, Jμ, fiml::SemFIMLPattern, pat::SemObservedMissingPattern)
+    Σ⁻¹ = Symmetric(fiml.Σ⁻¹)
+    μ_diff⨉Σ⁻¹ = fiml.μ_diff' * Σ⁻¹
+    if nsamples(pat) > 1
+        JΣ_pat = Σ⁻¹ * (I - pat.measured_cov * Σ⁻¹ - fiml.μ_diff * μ_diff⨉Σ⁻¹)
+        JΣ_pat .*= nsamples(pat)
+    else
+        JΣ_pat = Σ⁻¹ * (I - fiml.μ_diff * μ_diff⨉Σ⁻¹)
+    end
+    @inbounds vec(JΣ)[fiml.∇ind] .+= vec(JΣ_pat)
+
+    lmul!(2 * nsamples(pat), μ_diff⨉Σ⁻¹)
+    @inbounds Jμ[pat.measured_mask] .+= μ_diff⨉Σ⁻¹'
+    return nothing
+end
+
 """
 Full information maximum likelihood estimation. Can handle observed data with missings.
 
 # Constructor
 
-    SemFIML(;observed, specification, kwargs...)
+    SemFIML(observed::SemObservedMissing, implied::SemImplied)
 
 # Arguments
 - `observed::SemObservedMissing`: the observed part of the model
-- `specification`: either a `RAMMatrices` or `ParameterTable` object
+- `implied::SemImplied`: [`SemImplied`](@ref) instance
 
 # Examples
 ```julia
-my_fiml = SemFIML(observed = my_observed, specification = my_parameter_table)
+my_fiml = SemFIML(my_observed, my_implied)
 ```
 
 # Interfaces
@@ -24,19 +85,14 @@ Analytic gradients are available.
 ## Implementation
 Subtype of `SemLossFunction`.
 """
-mutable struct SemFIML{INV, C, L, O, M, IM, I, T, W} <: SemLossFunction
+struct SemFIML{O, I, T, W} <: SemLoss{O, I}
     hessianeval::ExactHessian
-    inverses::INV #preallocated inverses of imp_cov
-    choleskys::C #preallocated choleskys
-    logdets::L #logdets of implied covmats
-
-    ∇ind::O
 
-    imp_mean::IM
-    meandiff::M
-    imp_inv::I
+    observed::O
+    implied::I
+    patterns::Vector{SemFIMLPattern{T}}
 
-    mult::T
+    imp_inv::Matrix{T}  # implied inverse
 
     commutator::CommutationMatrix
 
@@ -47,39 +103,14 @@ end
 ### Constructors
 ############################################################################################
 
-function SemFIML(; observed::SemObservedMissing, specification, kwargs...)
-    inverses =
-        [zeros(nmeasured_vars(pat), nmeasured_vars(pat)) for pat in observed.patterns]
-    choleskys = Array{Cholesky{Float64, Array{Float64, 2}}, 1}(undef, length(inverses))
-
-    n_patterns = length(observed.patterns)
-    logdets = zeros(n_patterns)
-
-    imp_mean = [zeros(nmeasured_vars(pat)) for pat in observed.patterns]
-    meandiff = [zeros(nmeasured_vars(pat)) for pat in observed.patterns]
-
-    nobs_vars = nobserved_vars(observed)
-    imp_inv = zeros(nobs_vars, nobs_vars)
-    mult = similar.(inverses)
-
-    # generate linear indicies of co-observed variable pairs for each pattern
-    Σ_linind = LinearIndices((nobs_vars, nobs_vars))
-    ∇ind = map(observed.patterns) do pat
-        pat_vars = findall(pat.measured_mask)
-        vec(Σ_linind[pat_vars, pat_vars])
-    end
-
+function SemFIML(observed::SemObservedMissing, implied::SemImplied; kwargs...)
     return SemFIML(
         ExactHessian(),
-        inverses,
-        choleskys,
-        logdets,
-        ∇ind,
-        imp_mean,
-        meandiff,
-        imp_inv,
-        mult,
-        CommutationMatrix(nvars(specification)),
+        observed,
+        implied,
+        [SemFIMLPattern(pat) for pat in observed.patterns],
+        zeros(nobserved_vars(observed), nobserved_vars(observed)),
+        CommutationMatrix(nvars(implied)),
         nothing,
     )
 end
@@ -88,30 +119,28 @@ end
 ### methods
 ############################################################################################
 
-function evaluate!(
-    objective,
-    gradient,
-    hessian,
-    semfiml::SemFIML,
-    implied::SemImplied,
-    model::AbstractSemSingle,
-    params,
-)
+function evaluate!(objective, gradient, hessian, fiml::SemFIML, params)
     isnothing(hessian) || error("Hessian not implemented for FIML")
 
-    if !check_fiml(semfiml, model)
+    implied = SEM.implied(fiml)
+    observed = SEM.observed(fiml)
+
+    copyto!(fiml.imp_inv, implied.Σ)
+    Σ_chol = cholesky!(Symmetric(fiml.imp_inv); check = false)
+
+    if !isposdef(Σ_chol)
         isnothing(objective) || (objective = non_posdef_return(params))
         isnothing(gradient) || fill!(gradient, 1)
         return objective
     end
 
-    prepare_SemFIML!(semfiml, model)
+    @inbounds for (pat_fiml, pat) in zip(fiml.patterns, observed.patterns)
+        prepare!(pat_fiml, pat, implied)
+    end
 
-    scale = inv(nsamples(observed(model)))
-    isnothing(objective) ||
-        (objective = scale * F_FIML(observed(model), semfiml, model, params))
-    isnothing(gradient) ||
-        (∇F_FIML!(gradient, observed(model), semfiml, model); gradient .*= scale)
+    scale = inv(nsamples(observed))
+    isnothing(objective) || (objective = scale * F_FIML(eltype(params), fiml))
+    isnothing(gradient) || (∇F_FIML!(gradient, fiml); gradient .*= scale)
 
     return objective
 end
@@ -127,112 +156,45 @@ update_observed(lossfun::SemFIML, observed::SemObserved; kwargs...) =
 ### additional functions
 ############################################################################################
 
-function F_one_pattern(meandiff, inverse, obs_cov, logdet, N)
-    F = logdet + dot(meandiff, inverse, meandiff)
-    if N > one(N)
-        F += dot(obs_cov, inverse)
-    end
-    return F * N
+function ∇F_fiml_outer!(G, JΣ, Jμ, fiml::SemFIML{O, I}) where {O, I <: SemImpliedSymbolic}
+    mul!(G, fiml.implied.∇Σ', JΣ) # should be transposed
+    mul!(G, fiml.implied.∇μ', Jμ, -1, 1)
 end
 
-function ∇F_one_pattern(μ_diff, Σ⁻¹, S, obs_mask, ∇ind, N, Jμ, JΣ, model)
-    diff⨉inv = μ_diff' * Σ⁻¹
-
-    if N > one(N)
-        JΣ[∇ind] .+= N * vec(Σ⁻¹ * (I - S * Σ⁻¹ - μ_diff * diff⨉inv))
-        @. Jμ[obs_mask] += (N * 2 * diff⨉inv)'
+function ∇F_fiml_outer!(G, JΣ, Jμ, fiml::SemFIML)
+    implied = fiml.implied
 
-    else
-        JΣ[∇ind] .+= vec(Σ⁻¹ * (I - μ_diff * diff⨉inv))
-        @. Jμ[obs_mask] += (2 * diff⨉inv)'
-    end
-end
+    I_A⁻¹ = parent(implied.I_A⁻¹)
+    F⨉I_A⁻¹ = parent(implied.F * I_A⁻¹)
+    S = parent(implied.S)
 
-function ∇F_fiml_outer!(G, JΣ, Jμ, implied::SemImpliedSymbolic, model, semfiml)
-    mul!(G, implied.∇Σ', JΣ) # should be transposed
-    mul!(G, implied.∇μ', Jμ, -1, 1)
-end
-
-function ∇F_fiml_outer!(G, JΣ, Jμ, implied, model, semfiml)
     Iₙ = sparse(1.0I, size(implied.A)...)
-    P = kron(implied.F⨉I_A⁻¹, implied.F⨉I_A⁻¹)
-    Q = kron(implied.S * implied.I_A⁻¹', Iₙ)
-    Q .+= semfiml.commutator * Q
+    P = kron(F⨉I_A⁻¹, F⨉I_A⁻¹)
+    Q = kron(S * I_A⁻¹', Iₙ)
+    Q .+= fiml.commutator * Q
 
     ∇Σ = P * (implied.∇S + Q * implied.∇A)
 
-    ∇μ =
-        implied.F⨉I_A⁻¹ * implied.∇M +
-        kron((implied.I_A⁻¹ * implied.M)', implied.F⨉I_A⁻¹) * implied.∇A
+    ∇μ = F⨉I_A⁻¹ * implied.∇M + kron((I_A⁻¹ * implied.M)', F⨉I_A⁻¹) * implied.∇A
 
     mul!(G, ∇Σ', JΣ) # actually transposed
     mul!(G, ∇μ', Jμ, -1, 1)
 end
 
-function F_FIML(observed::SemObservedMissing, semfiml, model, params)
-    F = zero(eltype(params))
-    for (i, pat) in enumerate(observed.patterns)
-        F += F_one_pattern(
-            semfiml.meandiff[i],
-            semfiml.inverses[i],
-            pat.measured_cov,
-            semfiml.logdets[i],
-            nsamples(pat),
-        )
+function F_FIML(::Type{T}, fiml::SemFIML) where {T}
+    F = zero(T)
+    for (pat_fiml, pat) in zip(fiml.patterns, fiml.observed.patterns)
+        F += objective(pat_fiml, pat)
     end
     return F
 end
 
-function ∇F_FIML!(G, observed::SemObservedMissing, semfiml, model)
-    Jμ = zeros(nobserved_vars(model))
-    JΣ = zeros(nobserved_vars(model)^2)
-
-    for (i, pat) in enumerate(observed.patterns)
-        ∇F_one_pattern(
-            semfiml.meandiff[i],
-            semfiml.inverses[i],
-            pat.measured_cov,
-            pat.measured_mask,
-            semfiml.∇ind[i],
-            nsamples(pat),
-            Jμ,
-            JΣ,
-            model,
-        )
-    end
-    return ∇F_fiml_outer!(G, JΣ, Jμ, implied(model), model, semfiml)
-end
-
-function prepare_SemFIML!(semfiml, model)
-    copy_per_pattern!(semfiml, model)
-    batch_cholesky!(semfiml, model)
-    #batch_sym_inv_update!(semfiml, model)
-    batch_inv!(semfiml, model)
-    for (i, pat) in enumerate(observed(model).patterns)
-        semfiml.meandiff[i] .= pat.measured_mean .- semfiml.imp_mean[i]
-    end
-end
-
-function copy_per_pattern!(fiml::SemFIML, model::AbstractSem)
-    Σ = implied(model).Σ
-    μ = implied(model).μ
-    data = observed(model)
-    @inbounds @views for (i, pat) in enumerate(data.patterns)
-        fiml.inverses[i] .= Σ[pat.measured_mask, pat.measured_mask]
-        fiml.imp_mean[i] .= μ[pat.measured_mask]
-    end
-end
+function ∇F_FIML!(G, fiml::SemFIML)
+    Jμ = zeros(nobserved_vars(fiml))
+    JΣ = zeros(nobserved_vars(fiml)^2)
 
-function batch_cholesky!(semfiml, model)
-    for i in 1:size(semfiml.inverses, 1)
-        semfiml.choleskys[i] = cholesky!(Symmetric(semfiml.inverses[i]))
-        semfiml.logdets[i] = logdet(semfiml.choleskys[i])
+    for (pat_fiml, pat) in zip(fiml.patterns, fiml.observed.patterns)
+        gradient!(JΣ, Jμ, pat_fiml, pat)
     end
-    return true
-end
-
-function check_fiml(semfiml, model)
-    copyto!(semfiml.imp_inv, implied(model).Σ)
-    a = cholesky!(Symmetric(semfiml.imp_inv); check = false)
-    return isposdef(a)
+    ∇F_fiml_outer!(G, JΣ, Jμ, fiml)
 end
diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl
index d14af648c..6af417829 100644
--- a/src/loss/ML/ML.jl
+++ b/src/loss/ML/ML.jl
@@ -8,48 +8,63 @@ Maximum likelihood estimation.
 
 # Constructor
 
-    SemML(;observed, meanstructure = false, approximate_hessian = false, kwargs...)
+    SemML(observed, implied; approximate_hessian = false)
 
 # Arguments
 - `observed::SemObserved`: the observed part of the model
-- `meanstructure::Bool`: does the model have a meanstructure?
+- `implied::SemImplied`: [`SemImplied`](@ref) instance
 - `approximate_hessian::Bool`: if hessian-based optimization is used, should the hessian be swapped for an approximation
 
 # Examples
 ```julia
-my_ml = SemML(observed = my_observed)
+my_ml = SemML(my_observed, my_implied)
 ```
-
-# Interfaces
-Analytic gradients are available, and for models without a meanstructure, also analytic hessians.
-
-# Extended help
-## Implementation
-Subtype of `SemLossFunction`.
 """
-struct SemML{HE <: HessianEval, INV, M, M2} <: SemLossFunction
+struct SemML{O, I, HE <: HessianEval, M} <: SemLoss{O, I}
     hessianeval::HE
-    Σ⁻¹::INV
-    Σ⁻¹Σₒ::M
-    meandiff::M2
 
-    SemML{HE}(args...) where {HE <: HessianEval} =
-        new{HE, map(typeof, args)...}(HE(), args...)
+    observed::O
+    implied::I
+
+    # pre-allocated arrays to store intermediate results in evaluate!()
+    obsXobs_1::M
+    obsXobs_2::M
+    obsXobs_3::M
+    obsXvar_1::M
+    varXvar_1::M
+    varXvar_2::M
+    varXvar_3::M
 end
 
 ############################################################################################
 ### Constructors
 ############################################################################################
 
-function SemML(; observed::SemObserved, approximate_hessian::Bool = false, kwargs...)
-    obsmean = obs_mean(observed)
-    obscov = obs_cov(observed)
-    meandiff = isnothing(obsmean) ? nothing : copy(obsmean)
-
-    return SemML{approximate_hessian ? ApproxHessian : ExactHessian}(
-        similar(obscov),
-        similar(obscov),
-        meandiff,
+function SemML(
+    observed::SemObserved,
+    implied::SemImplied;
+    approximate_hessian::Bool = false,
+    kwargs...,
+)
+    # check integrity
+    check_observed_vars(observed, implied)
+
+    he = approximate_hessian ? ApproxHessian() : ExactHessian()
+    obsXobs = parent(obs_cov(observed))
+    nobs_vars = nobserved_vars(implied)
+    nvars = SEM.nvars(implied)
+
+    return SemML{typeof(observed), typeof(implied), typeof(he), typeof(obsXobs)}(
+        he,
+        observed,
+        implied,
+        similar(obsXobs),
+        similar(obsXobs),
+        similar(obsXobs),
+        similar(obsXobs, (nobs_vars, nvars)),
+        similar(obsXobs, (nvars, nvars)),
+        similar(obsXobs, (nvars, nvars)),
+        similar(obsXobs, (nvars, nvars)),
     )
 end
 
@@ -64,22 +79,20 @@ function evaluate!(
     objective,
     gradient,
     hessian,
-    semml::SemML,
-    implied::SemImpliedSymbolic,
-    model::AbstractSemSingle,
+    ml::SemML{<:Any, <:SemImpliedSymbolic},
     par,
 )
+    implied = SEM.implied(ml)
+
     if !isnothing(hessian)
         (MeanStruct(implied) === HasMeanStruct) &&
             throw(DomainError(H, "hessian of ML + meanstructure is not available"))
     end
 
     Σ = implied.Σ
-    Σₒ = obs_cov(observed(model))
-    Σ⁻¹Σₒ = semml.Σ⁻¹Σₒ
-    Σ⁻¹ = semml.Σ⁻¹
+    Σₒ = obs_cov(observed(ml))
 
-    copyto!(Σ⁻¹, Σ)
+    Σ⁻¹ = copy!(ml.obsXobs_1, Σ)
     Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false)
     if !isposdef(Σ_chol)
         #@warn "∑⁻¹ is not positive definite"
@@ -90,12 +103,12 @@ function evaluate!(
     end
     ld = logdet(Σ_chol)
     Σ⁻¹ = LinearAlgebra.inv!(Σ_chol)
-    mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ)
+    Σ⁻¹Σₒ = mul!(ml.obsXobs_2, Σ⁻¹, Σₒ)
     isnothing(objective) || (objective = ld + tr(Σ⁻¹Σₒ))
 
     if MeanStruct(implied) === HasMeanStruct
         μ = implied.μ
-        μₒ = obs_mean(observed(model))
+        μₒ = obs_mean(observed(ml))
         μ₋ = μₒ - μ
 
         isnothing(objective) || (objective += dot(μ₋, Σ⁻¹, μ₋))
@@ -103,24 +116,26 @@ function evaluate!(
             ∇Σ = implied.∇Σ
             ∇μ = implied.∇μ
             μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹
-            mul!(gradient, ∇Σ', vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹))
+            J = copyto!(ml.obsXobs_3, Σ⁻¹)
+            mul!(J, Σ⁻¹Σₒ, Σ⁻¹, -1, 1)
+            mul!(J, μ₋ᵀΣ⁻¹', μ₋ᵀΣ⁻¹, -1, 1)
+            mul!(gradient, ∇Σ', vec(J))
             mul!(gradient, ∇μ', μ₋ᵀΣ⁻¹', -2, 1)
         end
     elseif !isnothing(gradient) || !isnothing(hessian)
         ∇Σ = implied.∇Σ
-        Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹
+        Σ⁻¹ΣₒΣ⁻¹ = mul!(ml.obsXobs_3, Σ⁻¹Σₒ, Σ⁻¹)
         J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)'
         if !isnothing(gradient)
             mul!(gradient, ∇Σ', J')
         end
         if !isnothing(hessian)
-            if HessianEval(semml) === ApproxHessian
+            if HessianEval(ml) === ApproxHessian
                 mul!(hessian, ∇Σ' * kron(Σ⁻¹, Σ⁻¹), ∇Σ, 2, 0)
             else
-                ∇²Σ_function! = implied.∇²Σ_function
                 ∇²Σ = implied.∇²Σ
                 # inner
-                ∇²Σ_function!(∇²Σ, J, par)
+                implied.∇²Σ_eval!(∇²Σ, J, par)
                 # outer
                 H_outer = kron(2Σ⁻¹ΣₒΣ⁻¹ - Σ⁻¹, Σ⁻¹)
                 mul!(hessian, ∇Σ' * H_outer, ∇Σ)
@@ -134,25 +149,17 @@ end
 ############################################################################################
 ### Non-Symbolic Implied Types
 
-function evaluate!(
-    objective,
-    gradient,
-    hessian,
-    semml::SemML,
-    implied::RAM,
-    model::AbstractSemSingle,
-    par,
-)
+function evaluate!(objective, gradient, hessian, ml::SemML, par)
     if !isnothing(hessian)
         error("hessian of ML + non-symbolic implied type is not available")
     end
 
+    implied = SEM.implied(ml)
+
     Σ = implied.Σ
-    Σₒ = obs_cov(observed(model))
-    Σ⁻¹Σₒ = semml.Σ⁻¹Σₒ
-    Σ⁻¹ = semml.Σ⁻¹
+    Σₒ = obs_cov(observed(ml))
 
-    copyto!(Σ⁻¹, Σ)
+    Σ⁻¹ = copy!(ml.obsXobs_1, Σ)
     Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false)
     if !isposdef(Σ_chol)
         #@warn "Σ⁻¹ is not positive definite"
@@ -163,41 +170,56 @@ function evaluate!(
     end
     ld = logdet(Σ_chol)
     Σ⁻¹ = LinearAlgebra.inv!(Σ_chol)
-    mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ)
+    Σ⁻¹Σₒ = mul!(ml.obsXobs_2, Σ⁻¹, Σₒ)
 
     if !isnothing(objective)
         objective = ld + tr(Σ⁻¹Σₒ)
 
         if MeanStruct(implied) === HasMeanStruct
             μ = implied.μ
-            μₒ = obs_mean(observed(model))
+            μₒ = obs_mean(observed(ml))
             μ₋ = μₒ - μ
             objective += dot(μ₋, Σ⁻¹, μ₋)
         end
     end
 
     if !isnothing(gradient)
-        S = implied.S
-        F⨉I_A⁻¹ = implied.F⨉I_A⁻¹
-        I_A⁻¹ = implied.I_A⁻¹
+        S = parent(implied.S)
+        F⨉I_A⁻¹ = parent(implied.F⨉I_A⁻¹)
+        I_A⁻¹ = parent(implied.I_A⁻¹)
         ∇A = implied.∇A
         ∇S = implied.∇S
 
-        C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹
-        mul!(gradient, ∇A', vec(C * S * I_A⁻¹'), 2, 0)
+        # reuse Σ⁻¹Σₒ to calculate I-Σ⁻¹Σₒ
+        one_Σ⁻¹Σₒ = Σ⁻¹Σₒ
+        lmul!(-1, one_Σ⁻¹Σₒ)
+        one_Σ⁻¹Σₒ[diagind(one_Σ⁻¹Σₒ)] .+= 1
+
+        C = mul!(
+            ml.varXvar_1,
+            F⨉I_A⁻¹',
+            mul!(ml.obsXvar_1, mul!(ml.obsXobs_3, one_Σ⁻¹Σₒ, Σ⁻¹), F⨉I_A⁻¹),
+        )
+        mul!(
+            gradient,
+            ∇A',
+            vec(mul!(ml.varXvar_3, C, mul!(ml.varXvar_2, S, I_A⁻¹'))),
+            2,
+            0,
+        )
         mul!(gradient, ∇S', vec(C), 1, 1)
 
         if MeanStruct(implied) === HasMeanStruct
             μ = implied.μ
-            μₒ = obs_mean(observed(model))
+            μₒ = obs_mean(observed(ml))
             ∇M = implied.∇M
             M = implied.M
             μ₋ = μₒ - μ
             μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹
             k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹
             mul!(gradient, ∇M', k', -2, 1)
-            mul!(gradient, ∇A', vec(k' * (I_A⁻¹ * (M + S * k'))'), -2, 1)
-            mul!(gradient, ∇S', vec(k'k), -1, 1)
+            mul!(gradient, ∇A', vec(mul!(ml.varXvar_1, k', (I_A⁻¹ * (M + S * k'))')), -2, 1)
+            mul!(gradient, ∇S', vec(mul!(ml.varXvar_2, k', k)), -1, 1)
         end
     end
 
@@ -223,10 +245,15 @@ end
 update_observed(lossfun::SemML, observed::SemObservedMissing; kwargs...) =
     error("ML estimation does not work with missing data - use FIML instead")
 
-function update_observed(lossfun::SemML, observed::SemObserved; kwargs...)
-    if size(lossfun.Σ⁻¹) == size(obs_cov(observed))
-        return lossfun
+function update_observed(
+    lossfun::SemML,
+    observed::SemObserved;
+    implied::SemImplied,
+    kwargs...,
+)
+    if (obs_cov(lossfun) == obs_cov(observed)) && (obs_mean(lossfun) == obs_mean(observed))
+        return lossfun # no change
     else
-        return SemML(; observed = observed, kwargs...)
+        return SemML(observed, implied; kwargs...)
     end
 end
diff --git a/src/loss/ML/abstract.jl b/src/loss/ML/abstract.jl
new file mode 100644
index 000000000..bf8585d6a
--- /dev/null
+++ b/src/loss/ML/abstract.jl
@@ -0,0 +1,42 @@
+"""
+    observed(loss::SemLoss) -> SemObserved
+
+Returns the [*observed*](@ref SemObserved) part of a model.
+"""
+observed(loss::SemLoss) = loss.observed
+
+"""
+    implied(loss::SemLoss) -> SemImplied
+
+Returns the [*implied*](@ref SemImplied) part of a model.
+"""
+implied(loss::SemLoss) = loss.implied
+
+for f in (:nsamples, :obs_cov, :obs_mean)
+    @eval $f(loss::SemLoss) = $f(observed(loss))
+end
+
+for f in (
+    :vars,
+    :nvars,
+    :latent_vars,
+    :nlatent_vars,
+    :observed_vars,
+    :nobserved_vars,
+    :params,
+    :nparams,
+)
+    @eval $f(loss::SemLoss) = $f(implied(loss))
+end
+
+function check_observed_vars(observed::SemObserved, implied::SemImplied)
+    isnothing(observed_vars(implied)) ||
+        observed_vars(observed) == observed_vars(implied) ||
+        throw(
+            ArgumentError(
+                "Observed variables defined for \"observed\" and \"implied\" do not match.",
+            ),
+        )
+end
+
+check_observed_vars(sem::SemLoss) = check_observed_vars(observed(sem), implied(sem))
diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl
index 0fe2c9b3c..0ebe12768 100644
--- a/src/loss/WLS/WLS.jl
+++ b/src/loss/WLS/WLS.jl
@@ -8,9 +8,8 @@ Weighted least squares estimation.
 
 # Constructor
 
-    SemWLS(;
-        observed,
-        meanstructure = false,
+    SemWLS(
+        observed::SemObserved, implied::SemImplied;
         wls_weight_matrix = nothing,
         wls_weight_matrix_mean = nothing,
         approximate_hessian = false,
@@ -18,7 +17,7 @@ Weighted least squares estimation.
 
 # Arguments
 - `observed`: the `SemObserved` part of the model
-- `meanstructure::Bool`: does the model have a meanstructure?
+- `implied`: the `SemImplied` part of the model
 - `approximate_hessian::Bool`: should the hessian be swapped for an approximation
 - `wls_weight_matrix`: the weight matrix for weighted least squares.
     Defaults to GLS estimation (``0.5*(D^T*kron(S,S)*D)`` where D is the duplication matrix
@@ -28,7 +27,7 @@ Weighted least squares estimation.
 
 # Examples
 ```julia
-my_wls = SemWLS(observed = my_observed)
+my_wls = SemWLS(my_observed, my_implied)
 ```
 
 # Interfaces
@@ -38,31 +37,48 @@ Analytic gradients are available, and for models without a meanstructure, also a
 ## Implementation
 Subtype of `SemLossFunction`.
 """
-struct SemWLS{HE <: HessianEval, Vt, St, C} <: SemLossFunction
+struct SemWLS{O, I, HE <: HessianEval, Vt, St, C} <: SemLoss{O, I}
+    observed::O
+    implied::I
+
     hessianeval::HE
     V::Vt
     σₒ::St
     V_μ::C
+
+    SemWLS(observed, implied, ::Type{HE}, args...) where {HE <: HessianEval} =
+        new{typeof(observed), typeof(implied), HE, map(typeof, args)...}(
+            observed,
+            implied,
+            HE(),
+            args...,
+        )
 end
 
 ############################################################################################
 ### Constructors
 ############################################################################################
 
-SemWLS{HE}(args...) where {HE <: HessianEval} =
-    SemWLS{HE, map(typeof, args)...}(HE(), args...)
-
-function SemWLS(;
-    observed,
-    wls_weight_matrix = nothing,
-    wls_weight_matrix_mean = nothing,
-    approximate_hessian = false,
-    meanstructure = false,
+function SemWLS(
+    observed::SemObserved,
+    implied::SemImplied;
+    wls_weight_matrix::Union{AbstractMatrix, Nothing} = nothing,
+    wls_weight_matrix_mean::Union{AbstractMatrix, Nothing} = nothing,
+    approximate_hessian::Bool = false,
     kwargs...,
 )
+    # check integrity
+    check_observed_vars(observed, implied)
+
     nobs_vars = nobserved_vars(observed)
-    tril_ind = filter(x -> (x[1] >= x[2]), CartesianIndices(obs_cov(observed)))
-    s = obs_cov(observed)[tril_ind]
+    s = vech(obs_cov(observed))
+    size(s) == size(implied.Σ) || throw(
+        DimensionMismatch(
+            "SemWLS requires implied covariance to be in vech-ed form " *
+            "(vectorized lower triangular part of Σ matrix): $(size(s)) expected, $(size(implied.Σ)) found.\n" *
+            "$(nameof(typeof(implied))) must be constructed with vech=true.",
+        ),
+    )
 
     # compute V here
     if isnothing(wls_weight_matrix)
@@ -76,15 +92,17 @@ function SemWLS(;
                 "wls_weight_matrix has to be of size $(length(tril_ind))×$(length(tril_ind))",
             )
     end
+    size(wls_weight_matrix) == (length(s), length(s)) ||
+        DimensionMismatch("wls_weight_matrix has to be of size $(length(s))×$(length(s))")
 
-    if meanstructure
+    if MeanStruct(implied) == HasMeanStruct
         if isnothing(wls_weight_matrix_mean)
+            @info "Computing WLS weight matrix for the meanstructure using obs_cov()"
             wls_weight_matrix_mean = inv(obs_cov(observed))
-        else
-            size(wls_weight_matrix_mean) == (nobs_vars, nobs_vars) || DimensionMismatch(
-                "wls_weight_matrix_mean has to be of size $(nobs_vars)×$(nobs_vars)",
-            )
         end
+        size(wls_weight_matrix_mean) == (nobs_vars, nobs_vars) || DimensionMismatch(
+            "wls_weight_matrix_mean has to be of size $(nobs_vars)×$(nobs_vars)",
+        )
     else
         isnothing(wls_weight_matrix_mean) ||
             @warn "Ignoring wls_weight_matrix_mean since meanstructure is disabled"
@@ -92,31 +110,25 @@ function SemWLS(;
     end
     HE = approximate_hessian ? ApproxHessian : ExactHessian
 
-    return SemWLS{HE}(wls_weight_matrix, s, wls_weight_matrix_mean)
+    return SemWLS(observed, implied, HE, wls_weight_matrix, s, wls_weight_matrix_mean)
 end
 
 ############################################################################
 ### methods
 ############################################################################
 
-function evaluate!(
-    objective,
-    gradient,
-    hessian,
-    semwls::SemWLS,
-    implied::SemImpliedSymbolic,
-    model::AbstractSemSingle,
-    par,
-)
+function evaluate!(objective, gradient, hessian, wls::SemWLS, par)
+    implied = SEM.implied(wls)
+
     if !isnothing(hessian) && (MeanStruct(implied) === HasMeanStruct)
         error("hessian of WLS with meanstructure is not available")
     end
 
-    V = semwls.V
+    V = wls.V
     ∇σ = implied.∇Σ
 
     σ = implied.Σ
-    σₒ = semwls.σₒ
+    σₒ = wls.σₒ
     σ₋ = σₒ - σ
 
     isnothing(objective) || (objective = dot(σ₋, V, σ₋))
@@ -129,18 +141,17 @@ function evaluate!(
         gradient .*= -2
     end
     isnothing(hessian) || (mul!(hessian, ∇σ' * V, ∇σ, 2, 0))
-    if !isnothing(hessian) && (HessianEval(semwls) === ExactHessian)
-        ∇²Σ_function! = implied.∇²Σ_function
+    if !isnothing(hessian) && (HessianEval(wls) === ExactHessian)
         ∇²Σ = implied.∇²Σ
-        J = -2 * (σ₋' * semwls.V)'
-        ∇²Σ_function!(∇²Σ, J, par)
+        J = -2 * (σ₋' * wls.V)'
+        implied.∇²Σ_eval!(∇²Σ, J, par)
         hessian .+= ∇²Σ
     end
     if MeanStruct(implied) === HasMeanStruct
         μ = implied.μ
-        μₒ = obs_mean(observed(model))
+        μₒ = obs_mean(observed(wls))
         μ₋ = μₒ - μ
-        V_μ = semwls.V_μ
+        V_μ = wls.V_μ
         if !isnothing(objective)
             objective += dot(μ₋, V_μ, μ₋)
         end
@@ -156,5 +167,15 @@ end
 ### Recommended methods
 ############################################################################################
 
-update_observed(lossfun::SemWLS, observed::SemObserved; kwargs...) =
-    SemWLS(; observed = observed, kwargs...)
+function update_observed(
+    lossfun::SemWLS,
+    observed::SemObserved;
+    implied::SemImplied,
+    kwargs...,
+)
+    if size(lossfun.Σ⁻¹) == size(obs_cov(observed))
+        return lossfun # no change
+    else
+        return SemWLS(observed, implied; kwargs...)
+    end
+end
diff --git a/src/loss/constant/constant.jl b/src/loss/constant/constant.jl
index cb5157346..432c076d8 100644
--- a/src/loss/constant/constant.jl
+++ b/src/loss/constant/constant.jl
@@ -4,6 +4,8 @@
 ### Types
 ############################################################################################
 """
+    SemConstant{C <: Number} <: AbstractLoss
+
 Constant loss term. Can be used for comparability to other packages.
 
 # Constructor
@@ -15,37 +17,21 @@ Constant loss term. Can be used for comparability to other packages.
 
 # Examples
 ```julia
-    my_constant = SemConstant(constant_loss = 42.0)
+    my_constant = SemConstant(42.0)
 ```
-
-# Interfaces
-Analytic gradients and hessians are available.
-
-# Extended help
-## Implementation
-Subtype of `SemLossFunction`.
 """
-struct SemConstant{C} <: SemLossFunction
+struct SemConstant{C <: Number} <: AbstractLoss
     hessianeval::ExactHessian
     c::C
-end
 
-############################################################################################
-### Constructors
-############################################################################################
-
-function SemConstant(; constant_loss, kwargs...)
-    return SemConstant(ExactHessian(), constant_loss)
+    SemConstant(c::Number) = new{typeof(c)}(ExactHessian(), c)
 end
 
-############################################################################################
-### methods
-############################################################################################
+SemConstant(; constant_loss::Number, kwargs...) = SemConstant(constant_loss)
 
-objective(constant::SemConstant, model::AbstractSem, par) = constant.c
-gradient(constant::SemConstant, model::AbstractSem, par) = zero(par)
-hessian(constant::SemConstant, model::AbstractSem, par) =
-    zeros(eltype(par), length(par), length(par))
+objective(constant::SemConstant, par) = convert(eltype(par), constant.c)
+gradient(constant::SemConstant, par) = zero(par)
+hessian(constant::SemConstant, par) = zeros(eltype(par), length(par), length(par))
 
 ############################################################################################
 ### Recommended methods
diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl
index 02f637270..d9c4d5bf4 100644
--- a/src/loss/regularization/ridge.jl
+++ b/src/loss/regularization/ridge.jl
@@ -29,7 +29,7 @@ Analytic gradients and hessians are available.
 ## Implementation
 Subtype of `SemLossFunction`.
 """
-struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction
+struct SemRidge{P, W1, W2, GT, HT} <: AbstractLoss
     hessianeval::ExactHessian
     α::P
     which::W1
@@ -78,15 +78,14 @@ end
 ### methods
 ############################################################################################
 
-objective(ridge::SemRidge, model::AbstractSem, par) =
-    @views ridge.α * sum(abs2, par[ridge.which])
+objective(ridge::SemRidge, par) = @views ridge.α * sum(abs2, par[ridge.which])
 
-function gradient(ridge::SemRidge, model::AbstractSem, par)
+function gradient(ridge::SemRidge, par)
     @views ridge.gradient[ridge.which] .= (2 * ridge.α) * par[ridge.which]
     return ridge.gradient
 end
 
-function hessian(ridge::SemRidge, model::AbstractSem, par)
+function hessian(ridge::SemRidge, par)
     @views @. ridge.hessian[ridge.which_H] .= 2 * ridge.α
     return ridge.hessian
 end
diff --git a/src/objective_gradient_hessian.jl b/src/objective_gradient_hessian.jl
index 4aafe4235..b65827fd9 100644
--- a/src/objective_gradient_hessian.jl
+++ b/src/objective_gradient_hessian.jl
@@ -23,44 +23,30 @@ is_hessian_required(::EvaluationTargets{<:Any, <:Any, H}) where {H} = H
 
 (targets::EvaluationTargets)(arg_tuple::Tuple) = targets(arg_tuple...)
 
-# dispatch on SemImplied
-evaluate!(objective, gradient, hessian, loss::SemLossFunction, model::AbstractSem, params) =
-    evaluate!(objective, gradient, hessian, loss, implied(model), model, params)
-
 # fallback method
-function evaluate!(
-    obj,
-    grad,
-    hess,
-    loss::SemLossFunction,
-    implied::SemImplied,
-    model,
-    params,
-)
-    isnothing(obj) || (obj = objective(loss, implied, model, params))
-    isnothing(grad) || copyto!(grad, gradient(loss, implied, model, params))
-    isnothing(hess) || copyto!(hess, hessian(loss, implied, model, params))
+function evaluate!(obj, grad, hess, loss::AbstractLoss, params)
+    isnothing(obj) || (obj = objective(loss, params))
+    isnothing(grad) || copyto!(grad, gradient(loss, params))
+    isnothing(hess) || copyto!(hess, hessian(loss, params))
     return obj
 end
 
-# fallback methods
-objective(f::SemLossFunction, implied::SemImplied, model, params) =
-    objective(f, model, params)
-gradient(f::SemLossFunction, implied::SemImplied, model, params) =
-    gradient(f, model, params)
-hessian(f::SemLossFunction, implied::SemImplied, model, params) = hessian(f, model, params)
+evaluate!(obj, grad, hess, term::LossTerm, params) =
+    evaluate!(obj, grad, hess, loss(term), params)
 
 # fallback method for SemImplied that calls update_xxx!() methods
-function update!(targets::EvaluationTargets, implied::SemImplied, model, params)
-    is_objective_required(targets) && update_objective!(implied, model, params)
-    is_gradient_required(targets) && update_gradient!(implied, model, params)
-    is_hessian_required(targets) && update_hessian!(implied, model, params)
+function update!(targets::EvaluationTargets, implied::SemImplied, params)
+    is_objective_required(targets) && update_objective!(implied, params)
+    is_gradient_required(targets) && update_gradient!(implied, params)
+    is_hessian_required(targets) && update_hessian!(implied, params)
 end
 
+const AbstractSemOrLoss = Union{AbstractSem, AbstractLoss}
+
 # guess objective type
-objective_type(model::AbstractSem, params::Any) = Float64
-objective_type(model::AbstractSem, params::AbstractVector{T}) where {T <: Number} = T
-objective_zero(model::AbstractSem, params::Any) = zero(objective_type(model, params))
+objective_type(model::AbstractSemOrLoss, params::Any) = Float64
+objective_type(model::AbstractSemOrLoss, params::AbstractVector{T}) where {T <: Number} = T
+objective_zero(model::AbstractSemOrLoss, params::Any) = zero(objective_type(model, params))
 
 objective_type(objective::T, gradient, hessian) where {T <: Number} = T
 objective_type(
@@ -76,94 +62,102 @@ objective_type(
 objective_zero(objective, gradient, hessian) =
     zero(objective_type(objective, gradient, hessian))
 
+evaluate!(objective, gradient, hessian, model::AbstractSem, params) =
+    error("evaluate!() for $(typeof(model)) is not implemented")
+
 ############################################################################################
-# methods for AbstractSem
+# methods for Sem
 ############################################################################################
 
-function evaluate!(objective, gradient, hessian, model::AbstractSemSingle, params)
-    targets = EvaluationTargets(objective, gradient, hessian)
-    # update implied state, its gradient and hessian (if required)
-    update!(targets, implied(model), model, params)
-    return evaluate!(
-        !isnothing(objective) ? zero(objective) : nothing,
-        gradient,
-        hessian,
-        loss(model),
-        model,
-        params,
-    )
-end
+function evaluate!(objective, gradient, hessian, model::Sem, params)
+    # reset output
+    isnothing(objective) || (objective = objective_zero(objective, gradient, hessian))
+    isnothing(gradient) || fill!(gradient, zero(eltype(gradient)))
+    isnothing(hessian) || fill!(hessian, zero(eltype(hessian)))
 
-############################################################################################
-# methods for SemFiniteDiff (approximate gradient and hessian with finite differences of objective)
-############################################################################################
+    # gradient and hessian for individual terms
+    t_grad = isnothing(gradient) ? nothing : similar(gradient)
+    t_hess = isnothing(hessian) ? nothing : similar(hessian)
+
+    # update implied states of all SemLoss terms before term calculation loop
+    # to make sure all terms use updated implied states
+    targets = EvaluationTargets(objective, gradient, hessian)
+    for term in loss_terms(model)
+        issemloss(term) && update!(targets, implied(term), params)
+    end
 
-function evaluate!(objective, gradient, hessian, model::SemFiniteDiff, params)
-    function obj(p)
-        # recalculate implied state for p
-        update!(EvaluationTargets{true, false, false}(), implied(model), model, p)
-        evaluate!(
-            objective_zero(objective, gradient, hessian),
-            nothing,
-            nothing,
-            loss(model),
-            model,
-            p,
+    for term in loss_terms(model)
+        t_obj = evaluate!(objective, t_grad, t_hess, term, params)
+        #@show nameof(typeof(term)) t_obj
+        objective = accumulate_loss!(
+            objective,
+            gradient,
+            hessian,
+            weight(term),
+            t_obj,
+            t_grad,
+            t_hess,
         )
     end
-    isnothing(gradient) || FiniteDiff.finite_difference_gradient!(gradient, obj, params)
-    isnothing(hessian) || FiniteDiff.finite_difference_hessian!(hessian, obj, params)
-    return !isnothing(objective) ? obj(params) : nothing
+    return objective
 end
 
-objective(model::AbstractSem, params) =
-    evaluate!(objective_zero(model, params), nothing, nothing, model, params)
-
-############################################################################################
-# methods for SemLoss (weighted sum of individual SemLossFunctions)
-############################################################################################
+# internal function to accumulate loss objective, gradient and hessian
+function accumulate_loss!(
+    total_objective,
+    total_gradient,
+    total_hessian,
+    weight::Nothing,
+    objective,
+    gradient,
+    hessian,
+)
+    isnothing(total_gradient) || (total_gradient .+= gradient)
+    isnothing(total_hessian) || (total_hessian .+= hessian)
+    return isnothing(total_objective) ? total_objective : (total_objective + objective)
+end
 
-function evaluate!(objective, gradient, hessian, loss::SemLoss, model::AbstractSem, params)
-    isnothing(objective) || (objective = zero(objective))
-    isnothing(gradient) || fill!(gradient, zero(eltype(gradient)))
-    isnothing(hessian) || fill!(hessian, zero(eltype(hessian)))
-    f_grad = isnothing(gradient) ? nothing : similar(gradient)
-    f_hess = isnothing(hessian) ? nothing : similar(hessian)
-    for (f, weight) in zip(loss.functions, loss.weights)
-        f_obj = evaluate!(objective, f_grad, f_hess, f, model, params)
-        isnothing(objective) || (objective += weight * f_obj)
-        isnothing(gradient) || (gradient .+= weight * f_grad)
-        isnothing(hessian) || (hessian .+= weight * f_hess)
-    end
-    return objective
+function accumulate_loss!(
+    total_objective,
+    total_gradient,
+    total_hessian,
+    weight::Number,
+    objective,
+    gradient,
+    hessian,
+)
+    isnothing(total_gradient) || axpy!(weight, gradient, total_gradient)
+    isnothing(total_hessian) || axpy!(weight, hessian, total_hessian)
+    return isnothing(total_objective) ? total_objective :
+           (total_objective + weight * objective)
 end
 
 ############################################################################################
-# methods for SemEnsemble (weighted sum of individual AbstractSemSingle models)
+# methods for SemFiniteDiff (approximate gradient and hessian with finite differences of objective)
 ############################################################################################
 
-function evaluate!(objective, gradient, hessian, ensemble::SemEnsemble, params)
-    isnothing(objective) || (objective = zero(objective))
-    isnothing(gradient) || fill!(gradient, zero(eltype(gradient)))
-    isnothing(hessian) || fill!(hessian, zero(eltype(hessian)))
-    sem_grad = isnothing(gradient) ? nothing : similar(gradient)
-    sem_hess = isnothing(hessian) ? nothing : similar(hessian)
-    for (sem, weight) in zip(ensemble.sems, ensemble.weights)
-        sem_obj = evaluate!(objective, sem_grad, sem_hess, sem, params)
-        isnothing(objective) || (objective += weight * sem_obj)
-        isnothing(gradient) || (gradient .+= weight * sem_grad)
-        isnothing(hessian) || (hessian .+= weight * sem_hess)
-    end
-    return objective
+# evaluate!() wrapper that does some housekeeping, if necessary
+_evaluate!(args...) = evaluate!(args...)
+
+# update implied state, its gradient and hessian
+function _evaluate!(objective, gradient, hessian, loss::SemLoss, params)
+    # note that any other Sem loss terms that are dependent on implied
+    # should be enumerated after the SemLoss term
+    # otherwise they would be using outdated implied state
+    update!(EvaluationTargets(objective, gradient, hessian), implied(loss), params)
+    return evaluate!(objective, gradient, hessian, loss, params)
 end
 
+objective(model::AbstractSemOrLoss, params) =
+    _evaluate!(objective_zero(model, params), nothing, nothing, model, params)
+
 # throw an error by default if gradient! and hessian! are not implemented
 
-#= gradient!(lossfun::SemLossFunction, par, model) =
-    throw(ArgumentError("gradient for $(typeof(lossfun).name.wrapper) is not available"))
+#= gradient!(model::AbstractSemOrLoss, par, model) =
+    throw(ArgumentError("gradient for $(nameof(typeof(model))) is not available"))
 
-hessian!(lossfun::SemLossFunction, par, model) =
-    throw(ArgumentError("hessian for $(typeof(lossfun).name.wrapper) is not available")) =#
+hessian!(model::AbstractSemOrLoss, par, model) =
+    throw(ArgumentError("hessian for $(nameof(typeof(model))) is not available")) =#
 
 ############################################################################################
 # Documentation
@@ -212,16 +206,16 @@ To implement a new `AbstractSem` subtype, you can add a method for
 function hessian! end
 
 objective!(model::AbstractSem, params) =
-    evaluate!(objective_zero(model, params), nothing, nothing, model, params)
+    _evaluate!(objective_zero(model, params), nothing, nothing, model, params)
 gradient!(gradient, model::AbstractSem, params) =
-    evaluate!(nothing, gradient, nothing, model, params)
+    _evaluate!(nothing, gradient, nothing, model, params)
 hessian!(hessian, model::AbstractSem, params) =
-    evaluate!(nothing, nothing, hessian, model, params)
+    _evaluate!(nothing, nothing, hessian, model, params)
 objective_gradient!(gradient, model::AbstractSem, params) =
-    evaluate!(objective_zero(model, params), gradient, nothing, model, params)
+    _evaluate!(objective_zero(model, params), gradient, nothing, model, params)
 objective_hessian!(hessian, model::AbstractSem, params) =
-    evaluate!(objective_zero(model, params), nothing, hessian, model, params)
+    _evaluate!(objective_zero(model, params), nothing, hessian, model, params)
 gradient_hessian!(gradient, hessian, model::AbstractSem, params) =
-    evaluate!(nothing, gradient, hessian, model, params)
+    _evaluate!(nothing, gradient, hessian, model, params)
 objective_gradient_hessian!(gradient, hessian, model::AbstractSem, params) =
-    evaluate!(objective_zero(model, params), gradient, hessian, model, params)
+    _evaluate!(objective_zero(model, params), gradient, hessian, model, params)
diff --git a/src/observed/EM.jl b/src/observed/EM.jl
index beac45ca8..1e2f064fd 100644
--- a/src/observed/EM.jl
+++ b/src/observed/EM.jl
@@ -2,187 +2,269 @@
 ### Expectation Maximization Algorithm
 ############################################################################################
 
-# An EM Algorithm for MVN-distributed Data with missing values
-# Adapted from supplementary Material to the book Machine Learning: A Probabilistic Perspective
-# Copyright (2010) Kevin Murphy and Matt Dunham
-# found at https://github.com/probml/pmtk3/blob/master/toolbox/BasicModels/gauss/sub/gaussMissingFitEm.m
-# and at https://github.com/probml/pmtk3/blob/master/toolbox/Algorithms/optimization/emAlgo.m
-
 # what about random restarts?
 
-# outer function ---------------------------------------------------------------------------
 """
-    em_mvn(;
-        observed::SemObservedMissing,
-        start_em = start_em_observed,
-        max_iter_em = 100,
-        rtol_em = 1e-4,
-        kwargs...)
-
-Estimates the covariance matrix and mean vector of the normal distribution via expectation maximization for `observed`.
-Overwrites the statistics stored in `observed`.
+    em_mvn(patterns::AbstractVector{SemObservedMissingPattern};
+           start_em = start_em_observed,
+           max_iter_em = 100,
+           rtol_em = 1e-4,
+           kwargs...)
+
+Estimates the covariance matrix and mean vector of the
+multivariate normal distribution (MVN)
+via expectation maximization (EM) for `observed`.
+
+Returns the tuple of the EM covariance matrix and the EM mean vector.
+
+Based on the EM algorithm for MVN-distributed data with missing values
+adapted from the supplementary material to the book *Machine Learning: A Probabilistic Perspective*,
+copyright (2010) Kevin Murphy and Matt Dunham: see
+[*gaussMissingFitEm.m*](https://github.com/probml/pmtk3/blob/master/toolbox/BasicModels/gauss/sub/gaussMissingFitEm.m) and
+[*emAlgo.m*](https://github.com/probml/pmtk3/blob/master/toolbox/Algorithms/optimization/emAlgo.m) scripts.
 """
 function em_mvn(
-    observed::SemObservedMissing;
+    patterns::AbstractVector{<:SemObservedMissingPattern};
     start_em = start_em_observed,
-    max_iter_em = 100,
-    rtol_em = 1e-4,
+    max_iter_em::Integer = 100,
+    rtol_em::Number = 1e-4,
+    max_nsamples_em::Union{Integer, Nothing} = nothing,
+    min_eigval::Union{Number, Nothing} = nothing,
     kwargs...,
 )
-    nvars = nobserved_vars(observed)
-    nsamps = nsamples(observed)
-
-    # preallocate stuff?
-    𝔼x_pre = zeros(nvars)
-    𝔼xxᵀ_pre = zeros(nvars, nvars)
-
-    ### precompute for full cases
-    fullpat = observed.patterns[1]
-    if nmissed_vars(fullpat) == 0
-        for row in eachrow(fullpat.data)
-            𝔼x_pre += row
-            𝔼xxᵀ_pre += row * row'
+    nobs_vars = nobserved_vars(patterns[1])
+
+    # precompute for full cases
+    𝔼x_full = zeros(nobs_vars)
+    𝔼xxᵀ_full = zeros(nobs_vars, nobs_vars)
+    nsamples_full = 0
+    for pat in patterns
+        if nmissed_vars(pat) == 0
+            𝔼x_full .+= sum(pat.data, dims = 2)
+            mul!(𝔼xxᵀ_full, pat.data, pat.data', 1, 1)
+            nsamples_full += nsamples(pat)
         end
     end
-
-    # ess = 𝔼x, 𝔼xxᵀ, ismissing, missingRows, nsamps
-    # estepFn = (em_model, data) -> estep(em_model, data, EXsum, EXXsum, ismissing, missingRows, nsamps)
+    if nsamples_full == 0
+        @warn "No full cases in data"
+    end
 
     # initialize
-    em_model = start_em(observed; kwargs...)
-    em_model_prev = EmMVNModel(zeros(nvars, nvars), zeros(nvars), false)
-    iter = 1
-    done = false
-    𝔼x = zeros(nvars)
-    𝔼xxᵀ = zeros(nvars, nvars)
-
-    while !done
-        em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xxᵀ_pre)
-        em_mvn_Mstep!(em_model, nsamps, 𝔼x, 𝔼xxᵀ)
-
-        if iter > max_iter_em
-            done = true
-            @warn "EM Algorithm for MVN missing data did not converge. Likelihood for FIML is not interpretable.
-            Maybe try passing different starting values via 'start_em = ...' "
-        elseif iter > 1
-            # done = isapprox(ll, ll_prev; rtol = rtol)
-            done =
-                isapprox(em_model_prev.μ, em_model.μ; rtol = rtol_em) &
-                isapprox(em_model_prev.Σ, em_model.Σ; rtol = rtol_em)
+    Σ₀, μ = start_em(patterns; kwargs...)
+    Σ = convert(Matrix, Σ₀)
+    @assert all(isfinite, Σ) all(isfinite, μ)
+    Σ_prev, μ_prev = copy(Σ), copy(μ)
+
+    iter = 0
+    converged = false
+    Δμ_rel = NaN
+    ΔΣ_rel = NaN
+    while !converged && (iter < max_iter_em)
+        em_step!(
+            Σ,
+            μ,
+            Σ_prev,
+            μ_prev,
+            patterns,
+            𝔼xxᵀ_full,
+            𝔼x_full,
+            nsamples_full;
+            max_nsamples_em,
+            min_eigval,
+        )
+
+        if iter > 0
+            Δμ = norm(μ - μ_prev)
+            ΔΣ = norm(Σ - Σ_prev)
+            Δμ_rel = Δμ / max(norm(μ_prev), norm(μ))
+            ΔΣ_rel = ΔΣ / max(norm(Σ_prev), norm(Σ))
+            #@info "Iteration #$iter: ΔΣ=$(ΔΣ) ΔΣ/Σ=$(ΔΣ_rel) Δμ=$(Δμ) Δμ/μ=$(Δμ_rel)"
+            # converged = isapprox(ll, ll_prev; rtol = rtol)
+            converged = ΔΣ_rel <= rtol_em && Δμ_rel <= rtol_em
         end
-
-        # print("$iter \n")
-        iter = iter + 1
-        em_model_prev.μ, em_model_prev.Σ = em_model.μ, em_model.Σ
+        if !converged
+            Σ, Σ_prev = Σ_prev, Σ
+            μ, μ_prev = μ_prev, μ
+        end
+        iter += 1
+        #@info "$iter\n"
     end
 
-    # update EM Mode in observed
-    observed.em_model.Σ .= em_model.Σ
-    observed.em_model.μ .= em_model.μ
-    observed.em_model.fitted = true
+    if !converged
+        @warn "EM Algorithm for MVN missing data did not converge in $iter iterations (ΔΣ/Σ=$(ΔΣ_rel) Δμ/μ=$(Δμ_rel)).\n" *
+              "Likelihood for FIML is not interpretable.\n" *
+              "Maybe try passing different starting values via 'start_em = ...' "
+    else
+        @info "EM for MVN missing data converged in $iter iterations"
+    end
 
-    return nothing
+    return Σ, μ
 end
 
-# E and M step -----------------------------------------------------------------------------
-
-function em_mvn_Estep!(𝔼x, 𝔼xxᵀ, em_model, observed, 𝔼x_pre, 𝔼xxᵀ_pre)
-    𝔼x .= 0.0
-    𝔼xxᵀ .= 0.0
-
-    𝔼xᵢ = copy(𝔼x)
-    𝔼xxᵀᵢ = copy(𝔼xxᵀ)
-
-    μ = em_model.μ
-    Σ = em_model.Σ
+# E and M steps -----------------------------------------------------------------------------
+
+function em_step!(
+    Σ::AbstractMatrix,
+    μ::AbstractVector,
+    Σ₀::AbstractMatrix,
+    μ₀::AbstractVector,
+    patterns::AbstractVector{<:SemObservedMissingPattern},
+    𝔼xxᵀ_full::AbstractMatrix,
+    𝔼x_full::AbstractVector,
+    nsamples_full::Integer;
+    max_nsamples_em::Union{Integer, Nothing} = nothing,
+    min_eigval::Union{Number, Nothing} = nothing,
+)
+    # E step, update 𝔼x and 𝔼xxᵀ
+    copy!(μ, 𝔼x_full)
+    copy!(Σ, 𝔼xxᵀ_full)
+    nsamples_used = nsamples_full
+    mul!(Σ, μ₀, μ₀', -nsamples_used, 1)
+    axpy!(-nsamples_used, μ₀, μ)
 
     # Compute the expected sufficient statistics
-    for pat in observed.patterns
-        (nmissed_vars(pat) == 0) && continue # skip full cases
+    for pat in patterns
+        (nmissed_vars(pat) == 0) && continue # full cases already accounted for
 
         # observed and unobserved vars
         u = pat.miss_mask
         o = pat.measured_mask
 
-        # precompute for pattern
-        Σoo = Σ[o, o]
-        Σuo = Σ[u, o]
-        μu = μ[u]
-        μo = μ[o]
+        # compute cholesky to speed-up ldiv!()
+        Σ₀oo_chol = cholesky(Symmetric(Σ₀[o, o]))
+        Σ₀uo = Σ₀[u, o]
+        μ₀u = μ₀[u]
+        μ₀o = μ₀[o]
+
+        # get pattern observations
+        nsamples_pat =
+            !isnothing(max_nsamples_em) ? min(max_nsamples_em, nsamples(pat)) :
+            nsamples(pat)
+        zo =
+            nsamples_pat < nsamples(pat) ?
+            pat.data[:, sort!(sample(1:nsamples(pat), nsamples_pat, replace = false))] :
+            copy(pat.data)
+        zo .-= μ₀o # subtract current mean from observations
+
+        𝔼zo = sum(zo, dims = 2)
+        𝔼zu = fill!(similar(μ₀u), 0)
+
+        𝔼zzᵀuo = fill!(similar(Σ₀uo), 0)
+        𝔼zzᵀuu = nsamples_pat * Σ₀[u, u]
+        mul!(𝔼zzᵀuu, Σ₀uo, Σ₀oo_chol \ Σ₀uo', -nsamples_pat, 1)
+
+        # loop through observations
+        yᵢo = similar(μ₀o)
+        𝔼zᵢu = similar(μ₀u)
+        @inbounds for zᵢo in eachcol(zo)
+            ldiv!(yᵢo, Σ₀oo_chol, zᵢo)
+            mul!(𝔼zᵢu, Σ₀uo, yᵢo)
+            mul!(𝔼zzᵀuu, 𝔼zᵢu, 𝔼zᵢu', 1, 1)
+            mul!(𝔼zzᵀuo, 𝔼zᵢu, zᵢo', 1, 1)
+            𝔼zu .+= 𝔼zᵢu
+        end
+        # correct 𝔼zzᵀ by adding back μ₀×𝔼z' + 𝔼z'×μ₀
+        mul!(𝔼zzᵀuo, μ₀u, 𝔼zo', 1, 1)
+        mul!(𝔼zzᵀuo, 𝔼zu, μ₀o', 1, 1)
 
-        V = Σ[u, u] - Σuo * (Σoo \ Σ[o, u])
+        mul!(𝔼zzᵀuu, μ₀u, 𝔼zu', 1, 1)
+        mul!(𝔼zzᵀuu, 𝔼zu, μ₀u', 1, 1)
 
-        # loop trough data
-        for rowdata in eachrow(pat.data)
-            m = μu + Σuo * (Σoo \ (rowdata - μo))
+        𝔼zzᵀoo = zo * zo'
+        mul!(𝔼zzᵀoo, μ₀o, 𝔼zo', 1, 1)
+        mul!(𝔼zzᵀoo, 𝔼zo, μ₀o', 1, 1)
 
-            𝔼xᵢ[u] = m
-            𝔼xᵢ[o] = rowdata
-            𝔼xxᵀᵢ[u, u] = 𝔼xᵢ[u] * 𝔼xᵢ[u]' + V
-            𝔼xxᵀᵢ[o, o] = 𝔼xᵢ[o] * 𝔼xᵢ[o]'
-            𝔼xxᵀᵢ[o, u] = 𝔼xᵢ[o] * 𝔼xᵢ[u]'
-            𝔼xxᵀᵢ[u, o] = 𝔼xᵢ[u] * 𝔼xᵢ[o]'
+        # update Σ and μ
+        Σ[o, o] .+= 𝔼zzᵀoo
+        Σ[u, o] .+= 𝔼zzᵀuo
+        Σ[o, u] .+= 𝔼zzᵀuo'
+        Σ[u, u] .+= 𝔼zzᵀuu
 
-            𝔼x .+= 𝔼xᵢ
-            𝔼xxᵀ .+= 𝔼xxᵀᵢ
-        end
-    end
+        μ[o] .+= 𝔼zo
+        μ[u] .+= 𝔼zu
 
-    𝔼x .+= 𝔼x_pre
-    𝔼xxᵀ .+= 𝔼xxᵀ_pre
-end
+        nsamples_used += nsamples_pat
+    end
 
-function em_mvn_Mstep!(em_model, nsamples, 𝔼x, 𝔼xxᵀ)
-    em_model.μ = 𝔼x / nsamples
-    Σ = Symmetric(𝔼xxᵀ / nsamples - em_model.μ * em_model.μ')
+    # M step, update em_model
+    lmul!(1 / nsamples_used, Σ)
+    lmul!(1 / nsamples_used, μ)
+    # at this point μ = μ - μ₀
+    # and Σ = Σ + (μ - μ₀)×(μ - μ₀)' - μ₀×μ₀'
+    mul!(Σ, μ, μ₀', -1, 1)
+    mul!(Σ, μ₀, μ', -1, 1)
+    mul!(Σ, μ, μ', -1, 1)
+    μ .+= μ₀
+
+    if !isnothing(min_eigval)
+        # make all eigvals no less than min_eigval
+        Σ_eigen = eigen(Σ)
+        Σ_eigmin = minimum(Σ_eigen.values)
+        if Σ_eigmin < min_eigval
+            clamped_Σ_eigvals = max.(Σ_eigen.values, min_eigval)
+            mul!(parent(Σ), Σ_eigen.vectors, Diagonal(clamped_Σ_eigvals) * Σ_eigen.vectors')
+            StatsBase._symmetrize!(parent(Σ))
+            if verbose
+                n_below_min = sum(<(min_eigval), mtx_eig.values)
+                Δnorm = sqrt(sum(abs2, Σ_eigen.values - clamped_Σ_eigvals))
+                @info "eigmin(Σ) = $Σ_eigmin, N(eigvals < $min_eigval) = $n_below_min, Δ(Σ, clamped(Σ)) = $Δnorm"
+            end
+        else
+            verbose &&
+                @info "eigmin(Σ) = $Σ_eigmin, N(eigvals < $min_eigval) = 0, Δ(Σ, clamped(Σ)) = 0"
+        end
+    end
 
     # ridge Σ
     # while !isposdef(Σ)
     #     Σ += 0.5I
     # end
 
-    em_model.Σ = Σ
-
     # diagonalization
     #if !isposdef(Σ)
     #    print("Matrix not positive definite")
-    #    em_model.Σ .= 0
-    #    em_model.Σ[diagind(em_model.Σ)] .= diag(Σ)
+    #    Σ .= 0
+    #    Σ[diagind(em_model.Σ)] .= diag(Σ)
     #else
-    # em_model.Σ = Σ
+    # Σ = Σ
     #end
 
-    return nothing
+    return Σ, μ
 end
 
 # generate starting values -----------------------------------------------------------------
 
 # use μ and Σ of full cases
-function start_em_observed(observed::SemObservedMissing; kwargs...)
-    fullpat = observed.patterns[1]
-    if (nmissed_vars(fullpat) == 0) && (nobserved_vars(fullpat) > 1)
+function start_em_observed(patterns::AbstractVector{<:SemObservedMissingPattern}; kwargs...)
+    fullpat = patterns[1]
+    if (nmissed_vars(fullpat) == 0) && (nsamples(fullpat) > 1)
         μ = copy(fullpat.measured_mean)
-        Σ = copy(Symmetric(fullpat.measured_cov))
+        Σ = copy(parent(fullpat.measured_cov))
         if !isposdef(Σ)
-            Σ = Matrix(Diagonal(Σ))
+            Σ = Diagonal(Σ)
         end
-        return EmMVNModel(Σ, μ, false)
+        return Σ, μ
     else
-        return start_em_simple(observed, kwargs...)
+        return start_em_simple(patterns, kwargs...)
     end
 end
 
 # use μ = O and Σ = I
-function start_em_simple(observed::SemObservedMissing; kwargs...)
-    nvars = nobserved_vars(observed)
-    μ = zeros(nvars)
-    Σ = rand(nvars, nvars)
+function start_em_simple(patterns::AbstractVector{<:SemObservedMissingPattern}; kwargs...)
+    nobs_vars = nobserved_vars(first(patterns))
+    μ = zeros(nobs_vars)
+    Σ = rand(nobs_vars, nobs_vars)
     Σ = Σ * Σ'
-    # Σ = Matrix(1.0I, nvars, nvars)
-    return EmMVNModel(Σ, μ, false)
+    # Σ = Matrix(1.0I, nobs_vars, nobs_vars)
+    return Σ, μ
 end
 
 # set to passed values
-function start_em_set(observed::SemObservedMissing; model_em, kwargs...)
-    return em_model
+function start_em_set(
+    patterns::AbstractVector{<:SemObservedMissingPattern};
+    obs_cov::AbstractMatrix,
+    obs_mean::AbstractVector,
+    kwargs...,
+)
+    return copy(obs_cov), copy(obs_mean)
 end
diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl
index 221ef5ca3..531db16ef 100644
--- a/src/observed/covariance.jl
+++ b/src/observed/covariance.jl
@@ -3,7 +3,7 @@ Type alias for [`SemObservedData`](@ref) that has mean and covariance, but no ac
 
 For instances of `SemObservedCovariance` [`samples`](@ref) returns `nothing`.
 """
-const SemObservedCovariance{S} = SemObservedData{Nothing, S}
+const SemObservedCovariance{C, S} = SemObservedData{Nothing, C, S}
 
 """
     SemObservedCovariance(;
@@ -76,5 +76,5 @@ function SemObservedCovariance(;
         obs_mean = obs_mean[obs_vars_perm]
     end
 
-    return SemObservedData(nothing, obs_vars, obs_cov, obs_mean, nsamples)
+    return SemObservedData(nothing, obs_vars, Symmetric(obs_cov), obs_mean, nsamples)
 end
diff --git a/src/observed/data.jl b/src/observed/data.jl
index b6ddaa43d..b3a5b9cff 100644
--- a/src/observed/data.jl
+++ b/src/observed/data.jl
@@ -26,10 +26,10 @@ For observed data without missings.
 ## Implementation
 Subtype of `SemObserved`
 """
-struct SemObservedData{D <: Union{Nothing, AbstractMatrix}, S <: Number} <: SemObserved
+struct SemObservedData{D <: Union{Nothing, AbstractMatrix}, C, S <: Number} <: SemObserved
     data::D
     observed_vars::Vector{Symbol}
-    obs_cov::Matrix{S}
+    obs_cov::C
     obs_mean::Vector{S}
     nsamples::Int
 end
@@ -45,7 +45,7 @@ function SemObservedData(;
         prepare_data(data, observed_vars, specification; observed_var_prefix)
     obs_mean, obs_cov = mean_and_cov(data, 1)
 
-    return SemObservedData(data, obs_vars, obs_cov, vec(obs_mean), size(data, 1))
+    return SemObservedData(data, obs_vars, Symmetric(obs_cov), vec(obs_mean), size(data, 1))
 end
 
 ############################################################################################
diff --git a/src/observed/missing.jl b/src/observed/missing.jl
index cf699252e..1e9e8b7ca 100644
--- a/src/observed/missing.jl
+++ b/src/observed/missing.jl
@@ -35,18 +35,18 @@ For observed data with missing values.
 - `nobserved_vars(::SemObservedMissing)` -> number of observed variables
 
 - `samples(::SemObservedMissing)` -> data matrix (contains both measured and missing values)
-- `em_model(::SemObservedMissing)` -> `EmMVNModel` that contains the covariance matrix and mean vector found via expectation maximization
 
 ## Implementation
 Subtype of `SemObserved`
 """
-struct SemObservedMissing{T <: Real, S <: Real, E <: EmMVNModel} <: SemObserved
+struct SemObservedMissing{T <: Real, S <: Real} <: SemObserved
     data::Matrix{Union{T, Missing}}
     observed_vars::Vector{Symbol}
     nsamples::Int
     patterns::Vector{SemObservedMissingPattern{T, S}}
 
-    em_model::E
+    obs_cov::Matrix{S}
+    obs_mean::Vector{S}
 end
 
 ############################################################################################
@@ -79,26 +79,17 @@ function SemObservedMissing(;
     ]
     sort!(patterns, by = nmissed_vars)
 
-    # allocate EM model (but don't fit)
-    em_model = EmMVNModel(zeros(nobs_vars, nobs_vars), zeros(nobs_vars), false)
+    em_cov, em_mean = em_mvn(patterns; kwargs...)
 
     return SemObservedMissing(
         convert(Matrix{Union{nonmissingtype(eltype(data)), Missing}}, data),
         obs_vars,
         nsamples,
         patterns,
-        em_model,
+        em_cov,
+        em_mean,
     )
 end
 
-############################################################################################
-### Recommended methods
-############################################################################################
-
-############################################################################################
-### Additional methods
-############################################################################################
-
-em_model(observed::SemObservedMissing) = observed.em_model
-obs_mean(observed::SemObservedMissing) = obs_mean(em_model(observed))
-obs_cov(observed::SemObservedMissing) = obs_cov(em_model(observed))
+obs_cov(observed::SemObservedMissing) = observed.obs_cov
+obs_mean(observed::SemObservedMissing) = observed.obs_mean
diff --git a/src/observed/missing_pattern.jl b/src/observed/missing_pattern.jl
index 6ac6a360b..8d9a1eb93 100644
--- a/src/observed/missing_pattern.jl
+++ b/src/observed/missing_pattern.jl
@@ -5,10 +5,10 @@ struct SemObservedMissingPattern{T, S}
     measured_mask::BitVector    # measured vars mask
     miss_mask::BitVector        # missing vars mask
     rows::Vector{Int}           # rows in original data
-    data::Matrix{T}             # non-missing submatrix of data
+    data::Matrix{T}             # non-missing submatrix of data (vars × observations)
 
     measured_mean::Vector{S}    # means of measured vars
-    measured_cov::Matrix{S}     # covariance of measured vars
+    measured_cov::Symmetric{S, Matrix{S}} # covariance of measured vars
 end
 
 function SemObservedMissingPattern(
@@ -32,9 +32,9 @@ function SemObservedMissingPattern(
         measured_mask,
         .!measured_mask,
         rows,
-        pat_data,
+        permutedims(pat_data),
         dropdims(pat_mean, dims = 1),
-        pat_cov,
+        Symmetric(pat_cov),
     )
 end
 
diff --git a/src/optimizer/abstract.jl b/src/optimizer/abstract.jl
index 68bcc04ad..e435db28a 100644
--- a/src/optimizer/abstract.jl
+++ b/src/optimizer/abstract.jl
@@ -41,13 +41,13 @@ sem_fit(model::AbstractSem; engine::Symbol = :Optim, start_val = nothing, kwargs
 sem_fit(optim::SemOptimizer, model::AbstractSem, start_params; kwargs...) =
     error("Optimizer $(optim) support not implemented.")
 
-# FABIN3 is the default method for single models
-prepare_start_params(start_val::Nothing, model::AbstractSemSingle; kwargs...) =
-    start_fabin3(model; kwargs...)
-
-# simple algorithm is the default method for ensembles
-prepare_start_params(start_val::Nothing, model::AbstractSem; kwargs...) =
-    start_simple(model; kwargs...)
+# defaults when no starting parameters are specified
+function prepare_start_params(start_val::Nothing, model::AbstractSem; kwargs...)
+    sems = sem_terms(model)
+    # FABIN3 for single models, simple algorithm for ensembles
+    return length(sems) == 1 ? start_fabin3(loss(sems[1]); kwargs...) :
+           start_simple(model; kwargs...)
+end
 
 function prepare_start_params(start_val::AbstractVector, model::AbstractSem; kwargs...)
     (length(start_val) == nparams(model)) || throw(
diff --git a/src/types.jl b/src/types.jl
index e802e057a..341c748ce 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -1,14 +1,6 @@
 ############################################################################################
 # Define the basic type system
 ############################################################################################
-"Most abstract supertype for all SEMs"
-abstract type AbstractSem end
-
-"Supertype for all single SEMs, e.g. SEMs that have at least the fields `observed`, `implied`, `loss`"
-abstract type AbstractSemSingle{O, I, L} <: AbstractSem end
-
-"Supertype for all collections of multiple SEMs"
-abstract type AbstractSemCollection <: AbstractSem end
 
 "Meanstructure trait for `SemImplied` subtypes"
 abstract type MeanStruct end
@@ -36,48 +28,8 @@ HessianEval(::Type{T}) where {T} =
 
 HessianEval(semobj) = HessianEval(typeof(semobj))
 
-"Supertype for all loss functions of SEMs. If you want to implement a custom loss function, it should be a subtype of `SemLossFunction`."
-abstract type SemLossFunction end
-
-"""
-    SemLoss(args...; loss_weights = nothing, ...)
-
-Constructs the loss field of a SEM. Can contain multiple `SemLossFunction`s, the model is optimized over their sum.
-See also [`SemLossFunction`](@ref).
-
-# Arguments
-- `args...`: Multiple `SemLossFunction`s.
-- `loss_weights::Vector`: Weights for each loss function. Defaults to unweighted optimization.
-
-# Examples
-```julia
-my_ml_loss = SemML(...)
-my_ridge_loss = SemRidge(...)
-my_loss = SemLoss(SemML, SemRidge; loss_weights = [1.0, 2.0])
-```
-"""
-mutable struct SemLoss{F <: Tuple, T}
-    functions::F
-    weights::T
-end
-
-function SemLoss(functions...; loss_weights = nothing, kwargs...)
-    if !isnothing(loss_weights)
-        loss_weights = SemWeight.(loss_weights)
-    else
-        loss_weights = Tuple(SemWeight(nothing) for _ in 1:length(functions))
-    end
-
-    return SemLoss(functions, loss_weights)
-end
-
-# weights for loss functions or models. If the weight is nothing, multiplication returns the second argument
-struct SemWeight{T}
-    w::T
-end
-
-Base.:*(x::SemWeight{Nothing}, y) = y
-Base.:*(x::SemWeight, y) = x.w * y
+"Supertype for all loss functions of SEMs. If you want to implement a custom loss function, it should be a subtype of `AbstractLoss`."
+abstract type AbstractLoss end
 
 """
 Supertype of all objects that can serve as the `optimizer` field of a SEM.
@@ -116,153 +68,88 @@ abstract type SemImplied end
 abstract type SemImpliedSymbolic <: SemImplied end
 
 """
-    Sem(;observed = SemObservedData, implied = RAM, loss = SemML, kwargs...)
+State of `SemImplied` that corresponds to the specific SEM parameter values.
+
+Contains the necessary vectors and matrices for calculating the SEM
+objective, gradient and hessian (whichever is requested).
+"""
+abstract type SemImpliedState end
 
-Constructor for the basic `Sem` type.
-All additional kwargs are passed down to the constructors for the observed, implied, and loss fields.
+implied(state::SemImpliedState) = state.implied
+MeanStruct(state::SemImpliedState) = MeanStruct(implied(state))
+HessianEval(state::SemImpliedState) = HessianEval(implied(state))
 
-# Arguments
-- `observed`: object of subtype `SemObserved` or a constructor.
-- `implied`: object of subtype `SemImplied` or a constructor.
-- `loss`: object of subtype `SemLossFunction`s or constructor; or a tuple of such.
-
-Returns a Sem with fields
-- `observed::SemObserved`: Stores observed data, sample statistics, etc. See also [`SemObserved`](@ref).
-- `implied::SemImplied`: Computes model implied statistics, like Σ, μ, etc. See also [`SemImplied`](@ref).
-- `loss::SemLoss`: Computes the objective and gradient of a sum of loss functions. See also [`SemLoss`](@ref).
 """
-mutable struct Sem{O <: SemObserved, I <: SemImplied, L <: SemLoss} <:
-               AbstractSemSingle{O, I, L}
-    observed::O
-    implied::I
-    loss::L
-end
+    abstract type SemLoss{O <: SemObserved, I <: SemImplied} <: AbstractLoss end
 
-############################################################################################
-# automatic differentiation
-############################################################################################
+The base type for calculating the loss of the implied SEM model when explaining the observed data.
+
+All subtypes of `SemLoss` should have the following fields:
+- `observed::O`: object of subtype [`SemObserved`](@ref).
+- `implied::I`: object of subtype [`SemImplied`](@ref).
 """
-    SemFiniteDiff(;observed = SemObservedData, implied = RAM, loss = SemML, kwargs...)
+abstract type SemLoss{O <: SemObserved, I <: SemImplied} <: AbstractLoss end
 
-A wrapper around [`Sem`](@ref) that substitutes dedicated evaluation of gradient and hessian with
-finite difference approximation.
+"Most abstract supertype for all SEMs"
+abstract type AbstractSem end
 
-# Arguments
-- `observed`: object of subtype `SemObserved` or a constructor.
-- `implied`: object of subtype `SemImplied` or a constructor.
-- `loss`: object of subtype `SemLossFunction`s or constructor; or a tuple of such.
-
-Returns a Sem with fields
-- `observed::SemObserved`: Stores observed data, sample statistics, etc. See also [`SemObserved`](@ref).
-- `implied::SemImplied`: Computes model implied statistics, like Σ, μ, etc. See also [`SemImplied`](@ref).
-- `loss::SemLoss`: Computes the objective and gradient of a sum of loss functions. See also [`SemLoss`](@ref).
 """
-struct SemFiniteDiff{O <: SemObserved, I <: SemImplied, L <: SemLoss} <:
-       AbstractSemSingle{O, I, L}
-    observed::O
-    implied::I
+    struct LossTerm{L, I, W}
+
+A term of a [`Sem`](@ref) model that wraps [`AbstractLoss`](@ref) loss function of type `L`.
+Loss term can have an optional *id* of type `I` and *weight* of numeric type `W`.
+"""
+struct LossTerm{L <: AbstractLoss, I <: Union{Symbol, Nothing}, W <: Union{Number, Nothing}}
     loss::L
+    id::I
+    weight::W
 end
 
-############################################################################################
-# ensemble models
-############################################################################################
 """
-    (1) SemEnsemble(models..., weights = nothing, kwargs...)
+    Sem(loss_terms...; [params], kwargs...)
 
-    (2) SemEnsemble(;specification, data, groups, column = :group, kwargs...)
-
-Constructor for ensemble models. (2) can be used to conveniently specify multigroup models.
+SEM model (including model ensembles) that combines all the data, implied SEM structure
+and regularization terms and implements the calculation of their weighted sum, as well as its
+gradient and (optionally) Hessian.
 
 # Arguments
-- `models...`: `AbstractSem`s.
-- `weights::Vector`:  Weights for each model. Defaults to the number of observed data points.
-- `specification::EnsembleParameterTable`: Model specification.
-- `data::DataFrame`: Observed data. Must contain a `column` of type `Vector{Symbol}` that contains the group.
-- `groups::Vector{Symbol}`: Group names.
-- `column::Symbol`: Name of the column in `data` that contains the group.
-
-All additional kwargs are passed down to the model parts.
-
-Returns a SemEnsemble with fields
-- `n::Int`: Number of models.
-- `sems::Tuple`: `AbstractSem`s.
-- `weights::Vector`: Weights for each model.
-- `params::Vector`: Stores parameter labels and their position.
+- `loss_terms...`: [`AbstractLoss`](@ref) objects, including SEM losses ([`SemLoss`](@ref)),
+  optionally can be a pair of a loss object and its numeric weight
+
+# Fields
+- `loss_terms::Tuple`: a tuple of all loss functions and their weights
+- `params::Vector{Symbol}`: the vector of parameter ids shared by all loss functions.
 """
-struct SemEnsemble{N, T <: Tuple, V <: AbstractVector, I} <: AbstractSemCollection
-    n::N
-    sems::T
-    weights::V
-    params::I
+struct Sem{L <: Tuple} <: AbstractSem
+    loss_terms::L
+    params::Vector{Symbol}
 end
 
-# constructor from multiple models
-function SemEnsemble(models...; weights = nothing, kwargs...)
-    n = length(models)
-
-    # default weights
+############################################################################################
+# automatic differentiation
+############################################################################################
 
-    if isnothing(weights)
-        nsamples_total = sum(nsamples, models)
-        weights = [nsamples(model) / nsamples_total for model in models]
-    end
+"""
+    SemFiniteDiff(model::AbstractSem)
 
-    # check parameters equality
-    params = SEM.params(models[1])
-    for model in models
-        if params != SEM.params(model)
-            throw(ErrorException("The parameters of your models do not match. \n
-            Maybe you tried to specify models of an ensemble via ParameterTables. \n
-            In that case, you may use RAMMatrices instead."))
-        end
-    end
+A wrapper around [`Sem`](@ref) that substitutes dedicated evaluation of gradient and hessian with
+finite difference approximation.
 
-    return SemEnsemble(n, models, weights, params)
+# Arguments
+- `model::Sem`: the SEM model to wrap
+"""
+struct SemFiniteDiff{S <: AbstractSem} <: AbstractSem
+    model::S
 end
 
-# constructor from EnsembleParameterTable and data set
-function SemEnsemble(; specification, data, groups, column = :group, kwargs...)
-    if specification isa EnsembleParameterTable
-        specification = convert(Dict{Symbol, RAMMatrices}, specification)
-    end
-    models = []
-    for group in groups
-        ram_matrices = specification[group]
-        data_group = select(filter(r -> r[column] == group, data), Not(column))
-        if iszero(nrow(data_group))
-            error("Your data does not contain any observations from group `$(group)`.")
-        end
-        model = Sem(; specification = ram_matrices, data = data_group, kwargs...)
-        push!(models, model)
-    end
-    return SemEnsemble(models...; weights = nothing, kwargs...)
+struct LossFiniteDiff{L <: AbstractLoss} <: AbstractLoss
+    loss::L
 end
 
-params(ensemble::SemEnsemble) = ensemble.params
-
-"""
-    n_models(ensemble::SemEnsemble) -> Integer
-
-Returns the number of models in an ensemble model.
-"""
-n_models(ensemble::SemEnsemble) = ensemble.n
-"""
-    models(ensemble::SemEnsemble) -> Tuple{AbstractSem}
-
-Returns the models in an ensemble model.
-"""
-models(ensemble::SemEnsemble) = ensemble.sems
-"""
-    weights(ensemble::SemEnsemble) -> Vector
-
-Returns the weights of an ensemble model.
-"""
-weights(ensemble::SemEnsemble) = ensemble.weights
+struct SemLossFiniteDiff{O, I, L <: SemLoss{O, I}} <: SemLoss{O, I}
+    loss::L
+end
 
-"""
-Base type for all SEM specifications.
-"""
 abstract type SemSpecification end
 
 abstract type AbstractParameterTable <: SemSpecification end
diff --git a/test/examples/helper.jl b/test/examples/helper.jl
index f35d2cac6..96a195952 100644
--- a/test/examples/helper.jl
+++ b/test/examples/helper.jl
@@ -8,44 +8,38 @@ function test_gradient(model, params; rtol = 1e-10, atol = 0)
     @test nparams(model) == length(params)
 
     true_grad = FiniteDiff.finite_difference_gradient(Base.Fix1(objective!, model), params)
-    gradient = similar(params)
 
-    # F and G
-    fill!(gradient, NaN)
-    gradient!(gradient, model, params)
-    @test gradient ≈ true_grad rtol = rtol atol = atol
+    gradient_G = fill!(similar(params), NaN)
+    gradient!(gradient_G, model, params)
+    gradient_FG = fill!(similar(params), NaN)
+    objective_gradient!(gradient_FG, model, params)
 
-    # only G
-    fill!(gradient, NaN)
-    objective_gradient!(gradient, model, params)
-    @test gradient ≈ true_grad rtol = rtol atol = atol
+    @test gradient_G == gradient_FG
+
+    #@info "G norm = $(norm(gradient_G - true_grad, Inf))"
+    @test gradient_G ≈ true_grad rtol = rtol atol = atol
 end
 
 function test_hessian(model, params; rtol = 1e-4, atol = 0)
     true_hessian =
         FiniteDiff.finite_difference_hessian(Base.Fix1(objective!, model), params)
-    hessian = similar(params, size(true_hessian))
-    gradient = similar(params)
-
-    # H
-    fill!(hessian, NaN)
-    hessian!(hessian, model, params)
-    @test hessian ≈ true_hessian rtol = rtol atol = atol
-
-    # F and H
-    fill!(hessian, NaN)
-    objective_hessian!(hessian, model, params)
-    @test hessian ≈ true_hessian rtol = rtol atol = atol
-
-    # G and H
-    fill!(hessian, NaN)
-    gradient_hessian!(gradient, hessian, model, params)
-    @test hessian ≈ true_hessian rtol = rtol atol = atol
-
-    # F, G and H
-    fill!(hessian, NaN)
-    objective_gradient_hessian!(gradient, hessian, model, params)
-    @test hessian ≈ true_hessian rtol = rtol atol = atol
+    gradient = fill!(similar(params), NaN)
+
+    hessian_H = fill!(similar(parent(true_hessian)), NaN)
+    hessian!(hessian_H, model, params)
+
+    hessian_FH = fill!(similar(hessian_H), NaN)
+    objective_hessian!(hessian_FH, model, params)
+
+    hessian_GH = fill!(similar(hessian_H), NaN)
+    gradient_hessian!(gradient, hessian_GH, model, params)
+
+    hessian_FGH = fill!(similar(hessian_H), NaN)
+    objective_gradient_hessian!(gradient, hessian_FGH, model, params)
+
+    @test hessian_H == hessian_FH == hessian_GH == hessian_FGH
+
+    @test hessian_H ≈ true_hessian rtol = rtol atol = atol
 end
 
 fitmeasure_names_ml = Dict(
diff --git a/test/examples/multigroup/build_models.jl b/test/examples/multigroup/build_models.jl
index 2f5135176..89467c153 100644
--- a/test/examples/multigroup/build_models.jl
+++ b/test/examples/multigroup/build_models.jl
@@ -4,39 +4,26 @@ const SEM = StructuralEquationModels
 # ML estimation
 ############################################################################################
 
-model_g1 = Sem(specification = specification_g1, data = dat_g1, implied = RAMSymbolic)
+obs_g1 = SemObservedData(data = dat_g1, observed_vars = SEM.observed_vars(specification_g1))
+obs_g2 = SemObservedData(data = dat_g2, observed_vars = SEM.observed_vars(specification_g2))
 
-model_g2 = Sem(specification = specification_g2, data = dat_g2, implied = RAM)
+model_ml_multigroup =
+    Sem(SemML(obs_g1, RAMSymbolic(specification_g1)), SemML(obs_g2, RAM(specification_g2)))
 
-@test SEM.params(model_g1.implied.ram_matrices) == SEM.params(model_g2.implied.ram_matrices)
-
-# test the different constructors
-model_ml_multigroup = SemEnsemble(model_g1, model_g2; optimizer = semoptimizer)
-model_ml_multigroup2 = SemEnsemble(
-    specification = partable,
-    data = dat,
-    column = :school,
-    groups = [:Pasteur, :Grant_White],
-    loss = SemML,
-)
+@testset "Sem API" begin
+    @test SEM.nsamples(model_ml_multigroup) == nsamples(obs_g1) + nsamples(obs_g2)
+    @test SEM.nsem_terms(model_ml_multigroup) == 2
+    @test length(SEM.sem_terms(model_ml_multigroup)) == 2
+end
 
 # gradients
 @testset "ml_gradients_multigroup" begin
     test_gradient(model_ml_multigroup, start_test; atol = 1e-9)
-    test_gradient(model_ml_multigroup2, start_test; atol = 1e-9)
 end
 
 # fit
 @testset "ml_solution_multigroup" begin
-    solution = sem_fit(model_ml_multigroup)
-    update_estimate!(partable, solution)
-    test_estimates(
-        partable,
-        solution_lav[:parameter_estimates_ml];
-        atol = 1e-4,
-        lav_groups = Dict(:Pasteur => 1, :Grant_White => 2),
-    )
-    solution = sem_fit(model_ml_multigroup2)
+    solution = sem_fit(semoptimizer, model_ml_multigroup)
     update_estimate!(partable, solution)
     test_estimates(
         partable,
@@ -47,24 +34,7 @@ end
 end
 
 @testset "fitmeasures/se_ml" begin
-    solution_ml = sem_fit(model_ml_multigroup)
-    test_fitmeasures(
-        fit_measures(solution_ml),
-        solution_lav[:fitmeasures_ml];
-        rtol = 1e-2,
-        atol = 1e-7,
-    )
-    update_se_hessian!(partable, solution_ml)
-    test_estimates(
-        partable,
-        solution_lav[:parameter_estimates_ml];
-        atol = 1e-3,
-        col = :se,
-        lav_col = :se,
-        lav_groups = Dict(:Pasteur => 1, :Grant_White => 2),
-    )
-
-    solution_ml = sem_fit(model_ml_multigroup2)
+    solution_ml = sem_fit(semoptimizer, model_ml_multigroup)
     test_fitmeasures(
         fit_measures(solution_ml),
         solution_lav[:fitmeasures_ml];
@@ -89,15 +59,19 @@ end
 partable_s = sort_vars(partable)
 
 specification_s = convert(Dict{Symbol, RAMMatrices}, partable_s)
+obs_g1_s = SemObservedData(
+    data = dat_g1,
+    observed_vars = SEM.observed_vars(specification_s[:Pasteur]),
+)
+obs_g2_s = SemObservedData(
+    data = dat_g2,
+    observed_vars = SEM.observed_vars(specification_s[:Grant_White]),
+)
 
-specification_g1_s = specification_s[:Pasteur]
-specification_g2_s = specification_s[:Grant_White]
-
-model_g1 = Sem(specification = specification_g1_s, data = dat_g1, implied = RAMSymbolic)
-
-model_g2 = Sem(specification = specification_g2_s, data = dat_g2, implied = RAM)
-
-model_ml_multigroup = SemEnsemble(model_g1, model_g2; optimizer = semoptimizer)
+model_ml_multigroup = Sem(
+    SemML(obs_g1_s, RAMSymbolic(specification_s[:Pasteur])),
+    SemML(obs_g2_s, RAM(specification_s[:Grant_White])),
+)
 
 # gradients
 @testset "ml_gradients_multigroup | sorted" begin
@@ -113,7 +87,7 @@ grad_fd = FiniteDiff.finite_difference_gradient(
 
 # fit
 @testset "ml_solution_multigroup | sorted" begin
-    solution = sem_fit(model_ml_multigroup)
+    solution = sem_fit(semoptimizer, model_ml_multigroup)
     update_estimate!(partable_s, solution)
     test_estimates(
         partable_s,
@@ -124,7 +98,7 @@ grad_fd = FiniteDiff.finite_difference_gradient(
 end
 
 @testset "fitmeasures/se_ml | sorted" begin
-    solution_ml = sem_fit(model_ml_multigroup)
+    solution_ml = sem_fit(semoptimizer, model_ml_multigroup)
     test_fitmeasures(
         fit_measures(solution_ml),
         solution_lav[:fitmeasures_ml];
@@ -144,28 +118,26 @@ end
 end
 
 @testset "sorted | LowerTriangular A" begin
-    @test implied(model_ml_multigroup.sems[2]).A isa LowerTriangular
+    @test implied(SEM.sem_terms(model_ml_multigroup)[2]).A isa LowerTriangular
 end
 
 ############################################################################################
 # ML estimation - user defined loss function
 ############################################################################################
 
-struct UserSemML <: SemLossFunction
+struct UserSemML{O, I} <: SemLoss{O, I}
     hessianeval::ExactHessian
 
-    UserSemML() = new(ExactHessian())
-end
+    observed::O
+    implied::I
 
-############################################################################################
-### functors
-############################################################################################
-
-using LinearAlgebra: isposdef, logdet, tr, inv
+    UserSemML(observed::SemObserved, implied::SemImplied) =
+        new{typeof(observed), typeof(implied)}(ExactHessian(), observed, implied)
+end
 
-function SEM.objective(ml::UserSemML, model::AbstractSem, params)
-    Σ = implied(model).Σ
-    Σₒ = SEM.obs_cov(observed(model))
+function SEM.objective(ml::UserSemML, params)
+    Σ = implied(ml).Σ
+    Σₒ = SEM.obs_cov(observed(ml))
     if !isposdef(Σ)
         return Inf
     else
@@ -174,24 +146,18 @@ function SEM.objective(ml::UserSemML, model::AbstractSem, params)
 end
 
 # models
-model_g1 = Sem(specification = specification_g1, data = dat_g1, implied = RAMSymbolic)
-
-model_g2 = SemFiniteDiff(
-    specification = specification_g2,
-    data = dat_g2,
-    implied = RAMSymbolic,
-    loss = UserSemML(),
+model_ml_multigroup = Sem(
+    SemML(obs_g1, RAMSymbolic(specification_g1)),
+    SEM.FiniteDiffWrapper(UserSemML(obs_g2, RAMSymbolic(specification_g2))),
 )
 
-model_ml_multigroup = SemEnsemble(model_g1, model_g2; optimizer = semoptimizer)
-
 @testset "gradients_user_defined_loss" begin
     test_gradient(model_ml_multigroup, start_test; atol = 1e-9)
 end
 
 # fit
 @testset "solution_user_defined_loss" begin
-    solution = sem_fit(model_ml_multigroup)
+    solution = sem_fit(semoptimizer, model_ml_multigroup)
     update_estimate!(partable, solution)
     test_estimates(
         partable,
@@ -205,28 +171,17 @@ end
 # GLS estimation
 ############################################################################################
 
-model_ls_g1 = Sem(
-    specification = specification_g1,
-    data = dat_g1,
-    implied = RAMSymbolic,
-    loss = SemWLS,
-)
-
-model_ls_g2 = Sem(
-    specification = specification_g2,
-    data = dat_g2,
-    implied = RAMSymbolic,
-    loss = SemWLS,
+model_ls_multigroup = Sem(
+    SemWLS(obs_g1, RAMSymbolic(specification_g1, vech = true)),
+    SemWLS(obs_g2, RAMSymbolic(specification_g2, vech = true)),
 )
 
-model_ls_multigroup = SemEnsemble(model_ls_g1, model_ls_g2; optimizer = semoptimizer)
-
 @testset "ls_gradients_multigroup" begin
     test_gradient(model_ls_multigroup, start_test; atol = 1e-9)
 end
 
 @testset "ls_solution_multigroup" begin
-    solution = sem_fit(model_ls_multigroup)
+    solution = sem_fit(semoptimizer, model_ls_multigroup)
     update_estimate!(partable, solution)
     test_estimates(
         partable,
@@ -237,7 +192,7 @@ end
 end
 
 @testset "fitmeasures/se_ls" begin
-    solution_ls = sem_fit(model_ls_multigroup)
+    solution_ls = sem_fit(semoptimizer, model_ls_multigroup)
     test_fitmeasures(
         fit_measures(solution_ls),
         solution_lav[:fitmeasures_ls];
@@ -262,35 +217,21 @@ end
 ############################################################################################
 
 if !isnothing(specification_miss_g1)
-    model_g1 = Sem(
-        specification = specification_miss_g1,
-        observed = SemObservedMissing,
-        loss = SemFIML,
-        data = dat_miss_g1,
-        implied = RAM,
-        optimizer = SemOptimizerEmpty(),
-        meanstructure = true,
-    )
-
-    model_g2 = Sem(
-        specification = specification_miss_g2,
-        observed = SemObservedMissing,
-        loss = SemFIML,
-        data = dat_miss_g2,
-        implied = RAM,
-        optimizer = SemOptimizerEmpty(),
-        meanstructure = true,
-    )
-
-    model_ml_multigroup = SemEnsemble(model_g1, model_g2; optimizer = semoptimizer)
-    model_ml_multigroup2 = SemEnsemble(
-        specification = partable_miss,
-        data = dat_missing,
-        column = :school,
-        groups = [:Pasteur, :Grant_White],
-        loss = SemFIML,
-        observed = SemObservedMissing,
-        meanstructure = true,
+    model_ml_multigroup = Sem(
+        SemFIML(
+            SemObservedMissing(
+                data = dat_miss_g1,
+                observed_vars = SEM.observed_vars(specification_miss_g1),
+            ),
+            RAM(specification_miss_g1),
+        ),
+        SemFIML(
+            SemObservedMissing(
+                data = dat_miss_g2,
+                observed_vars = SEM.observed_vars(specification_miss_g2),
+            ),
+            RAM(specification_miss_g2),
+        ),
     )
 
     ############################################################################################
@@ -319,19 +260,10 @@ if !isnothing(specification_miss_g1)
 
     @testset "fiml_gradients_multigroup" begin
         test_gradient(model_ml_multigroup, start_test; atol = 1e-7)
-        test_gradient(model_ml_multigroup2, start_test; atol = 1e-7)
     end
 
     @testset "fiml_solution_multigroup" begin
-        solution = sem_fit(model_ml_multigroup)
-        update_estimate!(partable_miss, solution)
-        test_estimates(
-            partable_miss,
-            solution_lav[:parameter_estimates_fiml];
-            atol = 1e-4,
-            lav_groups = Dict(:Pasteur => 1, :Grant_White => 2),
-        )
-        solution = sem_fit(model_ml_multigroup2)
+        solution = sem_fit(semoptimizer, model_ml_multigroup)
         update_estimate!(partable_miss, solution)
         test_estimates(
             partable_miss,
@@ -342,30 +274,14 @@ if !isnothing(specification_miss_g1)
     end
 
     @testset "fitmeasures/se_fiml" begin
-        solution = sem_fit(model_ml_multigroup)
+        solution = sem_fit(semoptimizer, model_ml_multigroup)
         test_fitmeasures(
             fit_measures(solution),
             solution_lav[:fitmeasures_fiml];
             rtol = 1e-3,
             atol = 0,
         )
-        update_se_hessian!(partable_miss, solution)
-        test_estimates(
-            partable_miss,
-            solution_lav[:parameter_estimates_fiml];
-            atol = 1e-3,
-            col = :se,
-            lav_col = :se,
-            lav_groups = Dict(:Pasteur => 1, :Grant_White => 2),
-        )
 
-        solution = sem_fit(model_ml_multigroup2)
-        test_fitmeasures(
-            fit_measures(solution),
-            solution_lav[:fitmeasures_fiml];
-            rtol = 1e-3,
-            atol = 0,
-        )
         update_se_hessian!(partable_miss, solution)
         test_estimates(
             partable_miss,
diff --git a/test/examples/multigroup/multigroup.jl b/test/examples/multigroup/multigroup.jl
index 7dd871ac2..85ab7e162 100644
--- a/test/examples/multigroup/multigroup.jl
+++ b/test/examples/multigroup/multigroup.jl
@@ -1,5 +1,5 @@
 using StructuralEquationModels, Test, FiniteDiff, Suppressor
-using LinearAlgebra: diagind, LowerTriangular
+using LinearAlgebra: diagind, LowerTriangular, isposdef, logdet, tr
 
 const SEM = StructuralEquationModels
 
@@ -86,7 +86,8 @@ start_test = [
     fill(0.05, 3)
     fill(0.01, 3)
 ]
-semoptimizer = SemOptimizerOptim
+
+semoptimizer = SemOptimizerOptim()
 
 @testset "RAMMatrices | constructor | Optim" begin
     include("build_models.jl")
@@ -169,7 +170,6 @@ start_test = [
     0.01
     0.05
 ]
-semoptimizer = SemOptimizerOptim
 
 @testset "Graph → Partable → RAMMatrices | constructor | Optim" begin
     include("build_models.jl")
diff --git a/test/examples/political_democracy/by_parts.jl b/test/examples/political_democracy/by_parts.jl
index 88f98ded2..745e0487c 100644
--- a/test/examples/political_democracy/by_parts.jl
+++ b/test/examples/political_democracy/by_parts.jl
@@ -2,103 +2,83 @@
 ### models w.o. meanstructure
 ############################################################################################
 
-# observed ---------------------------------------------------------------------------------
-observed = SemObservedData(specification = spec, data = dat)
+semoptimizer = SemOptimizer(engine = opt_engine)
 
-# implied
-implied_ram = RAM(specification = spec)
+model_ml = Sem(specification = spec, data = dat)
+@test SEM.params(model_ml) == SEM.params(spec)
 
-implied_ram_sym = RAMSymbolic(specification = spec)
+model_ls_sym =
+    Sem(specification = spec, data = dat, implied = RAMSymbolic, vech = true, loss = SemWLS)
 
-# loss functions ---------------------------------------------------------------------------
-ml = SemML(observed = observed)
+model_ml_sym = Sem(specification = spec, data = dat, implied = RAMSymbolic)
 
-wls = SemWLS(observed = observed)
-
-ridge = SemRidge(α_ridge = 0.001, which_ridge = 16:20, nparams = 31)
-
-constant = SemConstant(constant_loss = 3.465)
-
-# loss -------------------------------------------------------------------------------------
-loss_ml = SemLoss(ml)
-
-loss_wls = SemLoss(wls)
-
-# optimizer -------------------------------------------------------------------------------------
-optimizer_obj = SemOptimizer(engine = opt_engine)
-
-# models -----------------------------------------------------------------------------------
-
-model_ml = Sem(observed, implied_ram, loss_ml)
-
-model_ls_sym = Sem(observed, RAMSymbolic(specification = spec, vech = true), loss_wls)
-
-model_ml_sym = Sem(observed, implied_ram_sym, loss_ml)
-
-model_ridge = Sem(observed, implied_ram, SemLoss(ml, ridge))
+model_ml_ridge = Sem(
+    specification = spec,
+    data = dat,
+    loss = (SemML, SemRidge),
+    α_ridge = 0.001,
+    which_ridge = 16:20,
+)
 
-model_constant = Sem(observed, implied_ram, SemLoss(ml, constant))
+model_ml_const = Sem(
+    specification = spec,
+    data = dat,
+    loss = (SemML, SemConstant),
+    constant_loss = 3.465,
+)
 
-model_ml_weighted =
-    Sem(observed, implied_ram, SemLoss(ml; loss_weights = [nsamples(model_ml)]))
+model_ml_weighted = Sem(SemML(SemObservedData(data = dat), RAM(spec)) => nsamples(model_ml))
 
 ############################################################################################
 ### test gradients
 ############################################################################################
 
-models =
-    [model_ml, model_ls_sym, model_ridge, model_constant, model_ml_sym, model_ml_weighted]
-model_names = ["ml", "ls_sym", "ridge", "constant", "ml_sym", "ml_weighted"]
+models = Dict(
+    "ml" => model_ml,
+    "ls_sym" => model_ls_sym,
+    "ml_ridge" => model_ml_ridge,
+    "ml_const" => model_ml_const,
+    "ml_sym" => model_ml_sym,
+    "ml_weighted" => model_ml_weighted,
+)
 
-for (model, name) in zip(models, model_names)
-    try
-        @testset "$(name)_gradient" begin
-            test_gradient(model, start_test; rtol = 1e-9)
-        end
-    catch
-    end
+@testset "$(id)_gradient" for (id, model) in pairs(models)
+    test_gradient(model, start_test; rtol = 1e-9)
 end
 
 ############################################################################################
 ### test solution
 ############################################################################################
 
-models = [model_ml, model_ls_sym, model_ml_sym, model_constant]
-model_names = ["ml", "ls_sym", "ml_sym", "constant"]
-solution_names = Symbol.("parameter_estimates_" .* ["ml", "ls", "ml", "ml"])
-
-for (model, name, solution_name) in zip(models, model_names, solution_names)
-    try
-        @testset "$(name)_solution" begin
-            solution = sem_fit(optimizer_obj, model)
-            update_estimate!(partable, solution)
-            test_estimates(partable, solution_lav[solution_name]; atol = 1e-2)
-        end
-    catch
-    end
+@testset "$(id)_solution" for id in ["ml", "ls_sym", "ml_sym", "ml_const"]
+    model = models[id]
+    solution = sem_fit(semoptimizer, model)
+    sol_name = Symbol("parameter_estimates_", replace(id, r"_.+$" => ""))
+    update_estimate!(partable, solution)
+    test_estimates(partable, solution_lav[sol_name]; atol = 1e-2)
 end
 
 @testset "ridge_solution" begin
-    solution_ridge = sem_fit(optimizer_obj, model_ridge)
-    solution_ml = sem_fit(optimizer_obj, model_ml)
-    # solution_ridge_id = sem_fit(optimizer_obj, model_ridge_id)
-    @test solution_ridge.minimum < solution_ml.minimum + 1
+    solution_ridge = sem_fit(semoptimizer, model_ml_ridge)
+    solution_ml = sem_fit(semoptimizer, model_ml)
+    # solution_ridge_id = sem_fit(model_ridge_id)
+    @test abs(solution_ridge.minimum - solution_ml.minimum) < 1
 end
 
 # test constant objective value
 @testset "constant_objective_and_gradient" begin
-    @test (objective!(model_constant, start_test) - 3.465) ≈
+    @test (objective!(model_ml_const, start_test) - 3.465) ≈
           objective!(model_ml, start_test)
     grad = similar(start_test)
     grad2 = similar(start_test)
-    gradient!(grad, model_constant, start_test)
+    gradient!(grad, model_ml_const, start_test)
     gradient!(grad2, model_ml, start_test)
     @test grad ≈ grad2
 end
 
 @testset "ml_solution_weighted" begin
-    solution_ml = sem_fit(optimizer_obj, model_ml)
-    solution_ml_weighted = sem_fit(optimizer_obj, model_ml_weighted)
+    solution_ml = sem_fit(semoptimizer, model_ml)
+    solution_ml_weighted = sem_fit(semoptimizer, model_ml_weighted)
     @test solution(solution_ml) ≈ solution(solution_ml_weighted) rtol = 1e-3
     @test nsamples(model_ml) * StructuralEquationModels.minimum(solution_ml) ≈
           StructuralEquationModels.minimum(solution_ml_weighted) rtol = 1e-6
@@ -109,7 +89,7 @@ end
 ############################################################################################
 
 @testset "fitmeasures/se_ml" begin
-    solution_ml = sem_fit(optimizer_obj, model_ml)
+    solution_ml = sem_fit(semoptimizer, model_ml)
     test_fitmeasures(fit_measures(solution_ml), solution_lav[:fitmeasures_ml]; atol = 1e-3)
 
     update_se_hessian!(partable, solution_ml)
@@ -123,7 +103,7 @@ end
 end
 
 @testset "fitmeasures/se_ls" begin
-    solution_ls = sem_fit(optimizer_obj, model_ls_sym)
+    solution_ls = sem_fit(semoptimizer, model_ls_sym)
     fm = fit_measures(solution_ls)
     test_fitmeasures(
         fm,
@@ -131,7 +111,7 @@ end
         atol = 1e-3,
         fitmeasure_names = fitmeasure_names_ls,
     )
-    @test (fm[:AIC] === missing) & (fm[:BIC] === missing) & (fm[:minus2ll] === missing)
+    @test ismissing(fm[:AIC]) && ismissing(fm[:BIC]) && ismissing(fm[:minus2ll])
 
     @suppress update_se_hessian!(partable, solution_ls)
     test_estimates(
@@ -150,22 +130,22 @@ end
 if opt_engine == :Optim
     using Optim, LineSearches
 
-    optimizer_obj = SemOptimizer(
-        engine = opt_engine,
-        algorithm = Newton(;
-            linesearch = BackTracking(order = 3),
-            alphaguess = InitialHagerZhang(),
-        ),
+    model_ls = Sem(
+        data = dat,
+        specification = spec,
+        implied = RAMSymbolic,
+        loss = SemWLS,
+        vech = true,
+        hessian = true,
     )
 
-    implied_sym_hessian_vech =
-        RAMSymbolic(specification = spec, vech = true, hessian = true)
-
-    implied_sym_hessian = RAMSymbolic(specification = spec, hessian = true)
-
-    model_ls = Sem(observed, implied_sym_hessian_vech, loss_wls)
-
-    model_ml = Sem(observed, implied_sym_hessian, loss_ml)
+    model_ml = Sem(
+        data = dat,
+        specification = spec,
+        implied = RAMSymbolic,
+        loss = SemML,
+        hessian = true,
+    )
 
     @testset "ml_hessians" begin
         test_hessian(model_ml, start_test; atol = 1e-4)
@@ -176,13 +156,23 @@ if opt_engine == :Optim
     end
 
     @testset "ml_solution_hessian" begin
-        solution = sem_fit(optimizer_obj, model_ml)
+        solution = sem_fit(SemOptimizer(engine = :Optim, algorithm = Newton()), model_ml)
+
         update_estimate!(partable, solution)
         test_estimates(partable, solution_lav[:parameter_estimates_ml]; atol = 1e-3)
     end
 
     @testset "ls_solution_hessian" begin
-        solution = sem_fit(optimizer_obj, model_ls)
+        solution = sem_fit(
+            SemOptimizer(
+                engine = :Optim,
+                algorithm = Newton(
+                    linesearch = BackTracking(order = 3),
+                    alphaguess = InitialHagerZhang(),
+                ),
+            ),
+            model_ls,
+        )
         update_estimate!(partable, solution)
         test_estimates(
             partable,
@@ -197,69 +187,47 @@ end
 ### meanstructure
 ############################################################################################
 
-# observed ---------------------------------------------------------------------------------
-observed = SemObservedData(specification = spec_mean, data = dat, meanstructure = true)
-
-# implied
-implied_ram = RAM(specification = spec_mean, meanstructure = true)
-
-implied_ram_sym = RAMSymbolic(specification = spec_mean, meanstructure = true)
-
-# loss functions ---------------------------------------------------------------------------
-ml = SemML(observed = observed, meanstructure = true)
-
-wls = SemWLS(observed = observed, meanstructure = true)
-
-# loss -------------------------------------------------------------------------------------
-loss_ml = SemLoss(ml)
-
-loss_wls = SemLoss(wls)
-
-# optimizer -------------------------------------------------------------------------------------
-optimizer_obj = SemOptimizer(engine = opt_engine)
+# models
+model_ls = Sem(
+    data = dat,
+    specification = spec_mean,
+    implied = RAMSymbolic,
+    loss = SemWLS,
+    vech = true,
+)
 
-# models -----------------------------------------------------------------------------------
-model_ml = Sem(observed, implied_ram, loss_ml)
+model_ml = Sem(data = dat, specification = spec_mean, implied = RAM, loss = SemML)
 
-model_ls = Sem(
-    observed,
-    RAMSymbolic(specification = spec_mean, meanstructure = true, vech = true),
-    loss_wls,
+model_ml_cov = Sem(
+    specification = spec,
+    observed = SemObservedCovariance,
+    obs_cov = cov(Matrix(dat)),
+    observed_vars = Symbol.(names(dat)),
+    nsamples = 75,
 )
 
-model_ml_sym = Sem(observed, implied_ram_sym, loss_ml)
+model_ml_sym =
+    Sem(data = dat, specification = spec_mean, implied = RAMSymbolic, loss = SemML)
 
 ############################################################################################
 ### test gradients
 ############################################################################################
 
-models = [model_ml, model_ls, model_ml_sym]
-model_names = ["ml", "ls_sym", "ml_sym"]
+models = Dict("ml" => model_ml, "ls_sym" => model_ls, "ml_sym" => model_ml_sym)
 
-for (model, name) in zip(models, model_names)
-    try
-        @testset "$(name)_gradient_mean" begin
-            test_gradient(model, start_test_mean; rtol = 1e-9)
-        end
-    catch
-    end
+@testset "$(id)_gradient_mean" for (id, model) in pairs(models)
+    test_gradient(model, start_test_mean; rtol = 1e-9)
 end
 
 ############################################################################################
 ### test solution
 ############################################################################################
 
-solution_names = Symbol.("parameter_estimates_" .* ["ml", "ls", "ml"] .* "_mean")
-
-for (model, name, solution_name) in zip(models, model_names, solution_names)
-    try
-        @testset "$(name)_solution_mean" begin
-            solution = sem_fit(optimizer_obj, model)
-            update_estimate!(partable_mean, solution)
-            test_estimates(partable_mean, solution_lav[solution_name]; atol = 1e-2)
-        end
-    catch
-    end
+@testset "$(id)_solution_mean" for (id, model) in pairs(models)
+    solution = sem_fit(semoptimizer, model, start_val = start_test_mean)
+    update_estimate!(partable_mean, solution)
+    sol_name = Symbol("parameter_estimates_", replace(id, r"_.+$" => ""), "_mean")
+    test_estimates(partable_mean, solution_lav[sol_name]; atol = 1e-2)
 end
 
 ############################################################################################
@@ -267,7 +235,7 @@ end
 ############################################################################################
 
 @testset "fitmeasures/se_ml_mean" begin
-    solution_ml = sem_fit(optimizer_obj, model_ml)
+    solution_ml = sem_fit(semoptimizer, model_ml)
     test_fitmeasures(
         fit_measures(solution_ml),
         solution_lav[:fitmeasures_ml_mean];
@@ -285,7 +253,7 @@ end
 end
 
 @testset "fitmeasures/se_ls_mean" begin
-    solution_ls = sem_fit(optimizer_obj, model_ls)
+    solution_ls = sem_fit(semoptimizer, model_ls)
     fm = fit_measures(solution_ls)
     test_fitmeasures(
         fm,
@@ -293,7 +261,7 @@ end
         atol = 1e-3,
         fitmeasure_names = fitmeasure_names_ls,
     )
-    @test (fm[:AIC] === missing) & (fm[:BIC] === missing) & (fm[:minus2ll] === missing)
+    @test ismissing(fm[:AIC]) && ismissing(fm[:BIC]) && ismissing(fm[:minus2ll])
 
     @suppress update_se_hessian!(partable_mean, solution_ls)
     test_estimates(
@@ -309,15 +277,22 @@ end
 ### fiml
 ############################################################################################
 
-observed = SemObservedMissing(specification = spec_mean, data = dat_missing)
-
-fiml = SemFIML(observed = observed, specification = spec_mean)
-
-loss_fiml = SemLoss(fiml)
-
-model_ml = Sem(observed, implied_ram, loss_fiml)
+# models
+model_ml = Sem(
+    data = dat_missing,
+    observed = SemObservedMissing,
+    specification = spec_mean,
+    implied = RAM,
+    loss = SemFIML,
+)
 
-model_ml_sym = Sem(observed, implied_ram_sym, loss_fiml)
+model_ml_sym = Sem(
+    data = dat_missing,
+    observed = SemObservedMissing,
+    specification = spec_mean,
+    implied = RAMSymbolic,
+    loss = SemFIML,
+)
 
 ############################################################################################
 ### test gradients
@@ -336,13 +311,13 @@ end
 ############################################################################################
 
 @testset "fiml_solution" begin
-    solution = sem_fit(optimizer_obj, model_ml)
+    solution = sem_fit(semoptimizer, model_ml)
     update_estimate!(partable_mean, solution)
     test_estimates(partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 1e-2)
 end
 
 @testset "fiml_solution_symbolic" begin
-    solution = sem_fit(optimizer_obj, model_ml_sym)
+    solution = sem_fit(semoptimizer, model_ml_sym, start_val = start_test_mean)
     update_estimate!(partable_mean, solution)
     test_estimates(partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 1e-2)
 end
@@ -352,7 +327,7 @@ end
 ############################################################################################
 
 @testset "fitmeasures/se_fiml" begin
-    solution_ml = sem_fit(optimizer_obj, model_ml)
+    solution_ml = sem_fit(semoptimizer, model_ml)
     test_fitmeasures(
         fit_measures(solution_ml),
         solution_lav[:fitmeasures_fiml];
@@ -363,7 +338,7 @@ end
     test_estimates(
         partable_mean,
         solution_lav[:parameter_estimates_fiml];
-        atol = 1e-3,
+        atol = 0.002,
         col = :se,
         lav_col = :se,
     )
diff --git a/test/examples/political_democracy/constraints.jl b/test/examples/political_democracy/constraints.jl
index fb2116023..1939fd11b 100644
--- a/test/examples/political_democracy/constraints.jl
+++ b/test/examples/political_democracy/constraints.jl
@@ -50,7 +50,7 @@ end
 
     @test solution_constrained.solution[31] * solution_constrained.solution[30] >=
           (0.6 - 1e-8)
-    @test all(abs.(solution_constrained.solution) .< 10)
-    @test solution_constrained.optimization_result.result[3] == :FTOL_REACHED
+    @test all(p -> abs(p) < 10, solution_constrained.solution)
+    @test solution_constrained.optimization_result.result[3] == :FTOL_REACHED skip = true
     @test solution_constrained.minimum <= 21.21 + 0.01
 end
diff --git a/test/examples/political_democracy/constructor.jl b/test/examples/political_democracy/constructor.jl
index bbeb0c648..b4588dbeb 100644
--- a/test/examples/political_democracy/constructor.jl
+++ b/test/examples/political_democracy/constructor.jl
@@ -1,6 +1,3 @@
-using Statistics: cov, mean
-using Random, NLopt
-
 ############################################################################################
 ### models w.o. meanstructure
 ############################################################################################
@@ -8,21 +5,22 @@ using Random, NLopt
 semoptimizer = SemOptimizer(engine = opt_engine)
 
 model_ml = Sem(specification = spec, data = dat)
-@test SEM.params(model_ml.implied.ram_matrices) == SEM.params(spec)
+@test SEM.params(model_ml) == SEM.params(spec)
 
 model_ml_cov = Sem(
     specification = spec,
     observed = SemObservedCovariance,
     obs_cov = cov(Matrix(dat)),
-    obs_colnames = Symbol.(names(dat)),
+    observed_vars = Symbol.(names(dat)),
     nsamples = 75,
 )
 
-model_ls_sym = Sem(specification = spec, data = dat, implied = RAMSymbolic, loss = SemWLS)
+model_ls_sym =
+    Sem(specification = spec, data = dat, implied = RAMSymbolic, vech = true, loss = SemWLS)
 
 model_ml_sym = Sem(specification = spec, data = dat, implied = RAMSymbolic)
 
-model_ridge = Sem(
+model_ml_ridge = Sem(
     specification = spec,
     data = dat,
     loss = (SemML, SemRidge),
@@ -30,7 +28,7 @@ model_ridge = Sem(
     which_ridge = 16:20,
 )
 
-model_constant = Sem(
+model_ml_const = Sem(
     specification = spec,
     data = dat,
     loss = (SemML, SemConstant),
@@ -38,65 +36,52 @@ model_constant = Sem(
 )
 
 model_ml_weighted =
-    Sem(specification = partable, data = dat, loss_weights = (nsamples(model_ml),))
+    Sem(SemML(SemObservedData(data = dat), RAMSymbolic(spec)) => nsamples(model_ml))
 
 ############################################################################################
 ### test gradients
 ############################################################################################
 
-models = [
-    model_ml,
-    model_ml_cov,
-    model_ls_sym,
-    model_ridge,
-    model_constant,
-    model_ml_sym,
-    model_ml_weighted,
-]
-model_names = ["ml", "ml_cov", "ls_sym", "ridge", "constant", "ml_sym", "ml_weighted"]
-
-for (model, name) in zip(models, model_names)
-    try
-        @testset "$(name)_gradient" begin
-            test_gradient(model, start_test; rtol = 1e-9)
-        end
-    catch
-    end
+models = Dict(
+    "ml" => model_ml,
+    "ml_cov" => model_ml_cov,
+    "ls_sym" => model_ls_sym,
+    "ridge" => model_ml_ridge,
+    "ml_const" => model_ml_const,
+    "ml_sym" => model_ml_sym,
+    "ml_weighted" => model_ml_weighted,
+)
+
+@testset "$(id)_gradient" for (id, model) in pairs(models)
+    test_gradient(model, start_test; rtol = 1e-9)
 end
 
 ############################################################################################
 ### test solution
 ############################################################################################
 
-models = [model_ml, model_ml_cov, model_ls_sym, model_ml_sym, model_constant]
-model_names = ["ml", "ml_cov", "ls_sym", "ml_sym", "constant"]
-solution_names = Symbol.("parameter_estimates_" .* ["ml", "ml", "ls", "ml", "ml"])
-
-for (model, name, solution_name) in zip(models, model_names, solution_names)
-    try
-        @testset "$(name)_solution" begin
-            solution = sem_fit(semoptimizer, model)
-            update_estimate!(partable, solution)
-            test_estimates(partable, solution_lav[solution_name]; atol = 1e-2)
-        end
-    catch
-    end
+@testset "$(id)_solution" for id in ["ml", "ml_cov", "ls_sym", "ml_sym", "ml_const"]
+    model = models[id]
+    solution = sem_fit(semoptimizer, model)
+    sol_name = Symbol("parameter_estimates_", replace(id, r"_.+$" => ""))
+    update_estimate!(partable, solution)
+    test_estimates(partable, solution_lav[sol_name]; atol = 1e-2)
 end
 
 @testset "ridge_solution" begin
-    solution_ridge = sem_fit(semoptimizer, model_ridge)
+    solution_ridge = sem_fit(semoptimizer, model_ml_ridge)
     solution_ml = sem_fit(semoptimizer, model_ml)
-    # solution_ridge_id = sem_fit(semoptimizer, model_ridge_id)
+    # solution_ridge_id = sem_fit(model_ridge_id)
     @test abs(solution_ridge.minimum - solution_ml.minimum) < 1
 end
 
 # test constant objective value
 @testset "constant_objective_and_gradient" begin
-    @test (objective!(model_constant, start_test) - 3.465) ≈
+    @test (objective!(model_ml_const, start_test) - 3.465) ≈
           objective!(model_ml, start_test)
     grad = similar(start_test)
     grad2 = similar(start_test)
-    gradient!(grad, model_constant, start_test)
+    gradient!(grad, model_ml_const, start_test)
     gradient!(grad2, model_ml, start_test)
     @test grad ≈ grad2
 end
@@ -104,12 +89,9 @@ end
 @testset "ml_solution_weighted" begin
     solution_ml = sem_fit(semoptimizer, model_ml)
     solution_ml_weighted = sem_fit(semoptimizer, model_ml_weighted)
-    @test isapprox(solution(solution_ml), solution(solution_ml_weighted), rtol = 1e-3)
-    @test isapprox(
-        nsamples(model_ml) * StructuralEquationModels.minimum(solution_ml),
-        StructuralEquationModels.minimum(solution_ml_weighted),
-        rtol = 1e-6,
-    )
+    @test solution(solution_ml) ≈ solution(solution_ml_weighted) rtol = 1e-3
+    @test nsamples(model_ml) * StructuralEquationModels.minimum(solution_ml) ≈
+          StructuralEquationModels.minimum(solution_ml_weighted) rtol = 1e-6
 end
 
 ############################################################################################
@@ -173,13 +155,13 @@ end
         model_ml,
         data = rand(model_ml, params, 1_000_000),
         specification = spec,
-        obs_colnames = colnames,
+        observed_vars = colnames,
     )
     model_ml_sym_new = replace_observed(
         model_ml_sym,
         data = rand(model_ml_sym, params, 1_000_000),
         specification = spec,
-        obs_colnames = colnames,
+        observed_vars = colnames,
     )
     # fit models
     sol_ml = solution(sem_fit(semoptimizer, model_ml_new))
@@ -197,23 +179,19 @@ if opt_engine == :Optim
     using Optim, LineSearches
 
     model_ls = Sem(
-        specification = spec,
         data = dat,
-        implied = RAMSymbolic,
+        specification = spec,
+        observed = SemObservedData,
+        implied = RAMSymbolic(spec, vech = true, hessian = true),
         loss = SemWLS,
-        hessian = true,
-        algorithm = Newton(;
-            linesearch = BackTracking(order = 3),
-            alphaguess = InitialHagerZhang(),
-        ),
     )
 
     model_ml = Sem(
-        specification = spec,
         data = dat,
-        implied = RAMSymbolic,
-        hessian = true,
-        algorithm = Newton(),
+        specification = spec,
+        observed = SemObservedData,
+        implied = RAMSymbolic(spec, hessian = true),
+        loss = SemML,
     )
 
     @testset "ml_hessians" begin
@@ -225,13 +203,23 @@ if opt_engine == :Optim
     end
 
     @testset "ml_solution_hessian" begin
-        solution = sem_fit(semoptimizer, model_ml)
+        solution = sem_fit(SemOptimizer(engine = :Optim, algorithm = Newton()), model_ml)
+
         update_estimate!(partable, solution)
         test_estimates(partable, solution_lav[:parameter_estimates_ml]; atol = 1e-3)
     end
 
     @testset "ls_solution_hessian" begin
-        solution = sem_fit(semoptimizer, model_ls)
+        solution = sem_fit(
+            SemOptimizer(
+                engine = :Optim,
+                algorithm = Newton(
+                    linesearch = BackTracking(order = 3),
+                    alphaguess = InitialHagerZhang(),
+                ),
+            ),
+            model_ls,
+        )
         update_estimate!(partable, solution)
         test_estimates(
             partable,
@@ -252,6 +240,7 @@ model_ls = Sem(
     specification = spec_mean,
     data = dat,
     implied = RAMSymbolic,
+    vech = true,
     loss = SemWLS,
     meanstructure = true,
 )
@@ -263,7 +252,7 @@ model_ml_cov = Sem(
     observed = SemObservedCovariance,
     obs_cov = cov(Matrix(dat)),
     obs_mean = vcat(mean(Matrix(dat), dims = 1)...),
-    obs_colnames = Symbol.(names(dat)),
+    observed_vars = Symbol.(names(dat)),
     meanstructure = true,
     nsamples = 75,
 )
@@ -275,33 +264,26 @@ model_ml_sym =
 ### test gradients
 ############################################################################################
 
-models = [model_ml, model_ml_cov, model_ls, model_ml_sym]
-model_names = ["ml", "ml_cov", "ls_sym", "ml_sym"]
+models = Dict(
+    "ml" => model_ml,
+    "ml_cov" => model_ml_cov,
+    "ls_sym" => model_ls,
+    "ml_sym" => model_ml_sym,
+)
 
-for (model, name) in zip(models, model_names)
-    try
-        @testset "$(name)_gradient_mean" begin
-            test_gradient(model, start_test_mean; rtol = 1e-9)
-        end
-    catch
-    end
+@testset "$(id)_gradient_mean" for (id, model) in pairs(models)
+    test_gradient(model, start_test_mean; rtol = 1e-9)
 end
 
 ############################################################################################
 ### test solution
 ############################################################################################
 
-solution_names = Symbol.("parameter_estimates_" .* ["ml", "ml", "ls", "ml"] .* "_mean")
-
-for (model, name, solution_name) in zip(models, model_names, solution_names)
-    try
-        @testset "$(name)_solution_mean" begin
-            solution = sem_fit(semoptimizer, model)
-            update_estimate!(partable_mean, solution)
-            test_estimates(partable_mean, solution_lav[solution_name]; atol = 1e-2)
-        end
-    catch
-    end
+@testset "$(id)_solution_mean" for (id, model) in pairs(models)
+    solution = sem_fit(semoptimizer, model, start_val = start_test_mean)
+    update_estimate!(partable_mean, solution)
+    sol_name = Symbol("parameter_estimates_", replace(id, r"_.+$" => ""), "_mean")
+    test_estimates(partable_mean, solution_lav[sol_name]; atol = 1e-2)
 end
 
 ############################################################################################
@@ -370,14 +352,14 @@ end
         model_ml,
         data = rand(model_ml, params, 1_000_000),
         specification = spec,
-        obs_colnames = colnames,
+        observed_vars = colnames,
         meanstructure = true,
     )
     model_ml_sym_new = replace_observed(
         model_ml_sym,
         data = rand(model_ml_sym, params, 1_000_000),
         specification = spec,
-        obs_colnames = colnames,
+        observed_vars = colnames,
         meanstructure = true,
     )
     # fit models
@@ -433,7 +415,7 @@ end
 end
 
 @testset "fiml_solution_symbolic" begin
-    solution = sem_fit(semoptimizer, model_ml_sym)
+    solution = sem_fit(semoptimizer, model_ml_sym, start_val = start_test_mean)
     update_estimate!(partable_mean, solution)
     test_estimates(partable_mean, solution_lav[:parameter_estimates_fiml]; atol = 1e-2)
 end
diff --git a/test/examples/political_democracy/political_democracy.jl b/test/examples/political_democracy/political_democracy.jl
index 9d026fb28..8649cb22a 100644
--- a/test/examples/political_democracy/political_democracy.jl
+++ b/test/examples/political_democracy/political_democracy.jl
@@ -1,4 +1,6 @@
 using StructuralEquationModels, Test, Suppressor, FiniteDiff
+using Statistics: cov, mean
+using Random, NLopt
 
 SEM = StructuralEquationModels
 
diff --git a/test/examples/proximal/l0.jl b/test/examples/proximal/l0.jl
index da20f3901..884b7a565 100644
--- a/test/examples/proximal/l0.jl
+++ b/test/examples/proximal/l0.jl
@@ -1,5 +1,3 @@
-using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators
-
 # load data
 dat = example_data("political_democracy")
 
diff --git a/test/examples/proximal/lasso.jl b/test/examples/proximal/lasso.jl
index 314453df4..e33898b6f 100644
--- a/test/examples/proximal/lasso.jl
+++ b/test/examples/proximal/lasso.jl
@@ -1,5 +1,3 @@
-using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators
-
 # load data
 dat = example_data("political_democracy")
 
diff --git a/test/examples/proximal/proximal.jl b/test/examples/proximal/proximal.jl
index 40e72a1ef..84a9162cb 100644
--- a/test/examples/proximal/proximal.jl
+++ b/test/examples/proximal/proximal.jl
@@ -1,3 +1,5 @@
+using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators, Suppressor
+
 @testset "Ridge" begin
     include("ridge.jl")
 end
diff --git a/test/examples/proximal/ridge.jl b/test/examples/proximal/ridge.jl
index 8c0a1df7a..995f1e399 100644
--- a/test/examples/proximal/ridge.jl
+++ b/test/examples/proximal/ridge.jl
@@ -1,5 +1,3 @@
-using StructuralEquationModels, Test, ProximalAlgorithms, ProximalOperators, Suppressor
-
 # load data
 dat = example_data("political_democracy")
 
diff --git a/test/examples/recover_parameters/recover_parameters_twofact.jl b/test/examples/recover_parameters/recover_parameters_twofact.jl
index 6899fe7a7..90a93e076 100644
--- a/test/examples/recover_parameters/recover_parameters_twofact.jl
+++ b/test/examples/recover_parameters/recover_parameters_twofact.jl
@@ -1,5 +1,7 @@
 using StructuralEquationModels, Distributions, Random, Optim, LineSearches
 
+SEM = StructuralEquationModels
+
 include(
     joinpath(
         chop(dirname(pathof(StructuralEquationModels)), tail = 3),
@@ -53,26 +55,22 @@ start = [
     repeat([0.5], 4)
 ]
 
-implied_ml = RAMSymbolic(; specification = ram_matrices, start_val = start)
+implied_ml = RAMSymbolic(ram_matrices)
 
-implied_ml.Σ_function(implied_ml.Σ, true_val)
+implied_ml.Σ_eval!(implied_ml.Σ, true_val)
 
 true_dist = MultivariateNormal(implied_ml.Σ)
 
 Random.seed!(1234)
-x = transpose(rand(true_dist, 100_000))
-semobserved = SemObservedData(data = x, specification = nothing)
-
-loss_ml = SemLoss(SemML(; observed = semobserved, nparams = length(start)))
-
-model_ml = Sem(semobserved, implied_ml, loss_ml)
-objective!(model_ml, true_val)
+x = permutedims(rand(true_dist, 10^5), (2, 1))
 
 optimizer = SemOptimizerOptim(
     BFGS(; linesearch = BackTracking(order = 3), alphaguess = InitialHagerZhang()),# m = 100),
     Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8),
 )
 
-solution_ml = sem_fit(optimizer, model_ml)
+model_ml = Sem(SemML(SemObservedData(data = x, specification = ram_matrices), implied_ml))
+objective!(model_ml, true_val)
+solution_ml = sem_fit(optimizer, model_ml, start_val = start)
 
-@test true_val ≈ solution(solution_ml) atol = 0.05
+@test solution(solution_ml) ≈ true_val atol = 0.05
diff --git a/test/unit_tests/data_input_formats.jl b/test/unit_tests/data_input_formats.jl
index 183b067f5..7f024e110 100644
--- a/test/unit_tests/data_input_formats.jl
+++ b/test/unit_tests/data_input_formats.jl
@@ -93,8 +93,7 @@ function test_observed(
                 @test @inferred(obs_mean(observed)) == dat_mean
             end
         else
-            # FIXME @inferred is broken for EM cov/mean since it may return nothing if EM was not run
-            @test @inferred(obs_mean(observed)) isa AbstractVector{Float64} broken = true # EM-based means
+            @test @inferred(obs_mean(observed)) isa AbstractVector{Float64} # EM-based means
         end
     else
         @test @inferred(obs_mean(observed)) === nothing skip = true
diff --git a/test/unit_tests/model.jl b/test/unit_tests/model.jl
index 7ed190c22..ab0a17c59 100644
--- a/test/unit_tests/model.jl
+++ b/test/unit_tests/model.jl
@@ -46,9 +46,11 @@ function test_params_api(semobj, spec::SemSpecification)
     @test @inferred(params(semobj)) == params(spec)
 end
 
-@testset "Sem(implied=$impliedtype, loss=$losstype)" for impliedtype in (RAM, RAMSymbolic),
-    losstype in (SemML, SemWLS)
-
+@testset "Sem(implied=$impliedtype, loss=$losstype)" for (impliedtype, losstype) in [
+    (RAM, SemML),
+    (RAMSymbolic, SemML),
+    (RAMSymbolic, SemWLS),
+]
     model = Sem(
         specification = ram_matrices,
         observed = obs,
@@ -66,9 +68,8 @@ end
     test_vars_api(implied(model), ram_matrices)
     test_params_api(implied(model), ram_matrices)
 
-    @test @inferred(loss(model)) isa SemLoss
-    semloss = loss(model).functions[1]
-    @test semloss isa losstype
+    @test @inferred(sem_term(model)) isa SemLoss
+    @test sem_term(model) isa losstype
 
     @test @inferred(nsamples(model)) == nsamples(obs)
 end