diff --git a/Project.toml b/Project.toml
index 9a5bc091..37634708 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "StructuralEquationModels"
 uuid = "383ca8c5-e4ff-4104-b0a9-f7b279deed53"
 authors = ["Maximilian Ernst", "Aaron Peikert"]
-version = "0.4.0"
+version = "0.4.2"
 
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
diff --git a/README.md b/README.md
index 7fc19d0f..68b7bcb7 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 | [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://structuralequationmodels.github.io/StructuralEquationModels.jl/) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://structuralequationmodels.github.io/StructuralEquationModels.jl/dev/) | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Github Action CI](https://github.com/StructuralEquationModels/StructuralEquationModels.jl/workflows/CI_extended/badge.svg)](https://github.com/StructuralEquationModels/StructuralEquationModels.jl/actions/) [![codecov](https://codecov.io/gh/StructuralEquationModels/StructuralEquationModels.jl/branch/main/graph/badge.svg?token=P2kjzpvM4V)](https://codecov.io/gh/StructuralEquationModels/StructuralEquationModels.jl) | [![DOI](https://zenodo.org/badge/228649704.svg)](https://zenodo.org/badge/latestdoi/228649704) |
 
 > [!NOTE]  
-> Check out our [preprint](https://formal-methods-mpi.github.io/pkgmanuscript/manuscript.pdf) on the package!
+> Check out our [preprint](https://doi.org/10.31234/osf.io/zwe8g_v1) on the package!
 
 # What is this Package for?
 
@@ -39,7 +39,7 @@ The package makes use of
 - SparseArrays.jl to speed up symbolic computations.
 - Optim.jl and NLopt.jl to provide a range of different Optimizers/Linesearches.
 - ProximalAlgorithms.jl for regularization.
-- FiniteDiff.jl and ForwardDiff.jl to provide gradients for user-defined loss functions.
+- FiniteDiff.jl and to provide gradient approximations for user-defined loss functions.
 
 # At the moment, we are still working on:
 - optimizing performance for big models (with hundreds of parameters)
diff --git a/docs/make.jl b/docs/make.jl
index 4542cf48..1bb68c4d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -52,7 +52,6 @@ makedocs(
             "files" => "internals/files.md",
             "types" => "internals/types.md",
         ],
-        "Complementary material" => ["Mathematical appendix" => "complementary/maths.md"],
     ],
     format = Documenter.HTML(
         prettyurls = get(ENV, "CI", nothing) == "true",
diff --git a/docs/src/developer/implied.md b/docs/src/developer/implied.md
index bea824a9..056cd663 100644
--- a/docs/src/developer/implied.md
+++ b/docs/src/developer/implied.md
@@ -62,11 +62,8 @@ model per group and an additional model with `ImpliedEmpty` and `SemRidge` for t
 # Extended help
 
 ## Interfaces
-- `params(::RAMSymbolic) `-> Vector of parameter labels
-- `nparams(::RAMSymbolic)` -> Number of parameters
-
-## Implementation
-Subtype of `SemImplied`.
+- `param_labels(::ImpliedEmpty) `-> Vector of parameter labels
+- `nparams(::ImpliedEmpty)` -> Number of parameters
 """
 struct ImpliedEmpty{A, B, C} <: SemImplied
     hessianeval::A
@@ -78,7 +75,12 @@ end
 ### Constructors
 ############################################################################################
 
-function ImpliedEmpty(;specification, meanstruct = NoMeanStruct(), hessianeval = ExactHessian(), kwargs...)
+function ImpliedEmpty(;
+    specification,
+    meanstruct = NoMeanStruct(),
+    hessianeval = ExactHessian(),
+    kwargs...,
+)
     return ImpliedEmpty(hessianeval, meanstruct, convert(RAMMatrices, specification))
 end
 
diff --git a/docs/src/developer/loss.md b/docs/src/developer/loss.md
index 931c2d0e..d6949842 100644
--- a/docs/src/developer/loss.md
+++ b/docs/src/developer/loss.md
@@ -204,7 +204,7 @@ julia>?
 
 help?> RAM
 
-help?> SemObservedCommon
+help?> SemObservedData
 ```
 
 We see that the model implied covariance matrix can be assessed as `Σ(implied)` and the observed covariance matrix as `obs_cov(observed)`.
diff --git a/docs/src/developer/observed.md b/docs/src/developer/observed.md
index 240c1c34..ee795cd3 100644
--- a/docs/src/developer/observed.md
+++ b/docs/src/developer/observed.md
@@ -28,8 +28,8 @@ nsamples(observed::MyObserved) = ...
 nobserved_vars(observed::MyObserved) = ...
 ```
 
-As always, you can add additional methods for properties that implied types and loss function want to access, for example (from the `SemObservedCommon` implementation):
+As always, you can add additional methods for properties that implied types and loss function want to access, for example (from the `SemObservedData` implementation):
 
 ```julia
-obs_cov(observed::SemObservedCommon) = observed.obs_cov
+obs_cov(observed::SemObservedData) = observed.obs_cov
 ```
\ No newline at end of file
diff --git a/docs/src/developer/optimizer.md b/docs/src/developer/optimizer.md
index a651ec63..9e01ac87 100644
--- a/docs/src/developer/optimizer.md
+++ b/docs/src/developer/optimizer.md
@@ -16,7 +16,7 @@ SemOptimizer{:Name}(args...; kwargs...) = SemOptimizerName(args...; kwargs...)
 
 SemOptimizerName(;
     algorithm = LBFGS(),
-    options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8),
+    options = Optim.Options(; f_reltol = 1e-10, x_abstol = 1.5e-8),
     kwargs...,
 ) = SemOptimizerName(algorithm, options)
 
@@ -37,13 +37,13 @@ options(optimizer::SemOptimizerName) = optimizer.options
 Note that your optimizer is a subtype of `SemOptimizer{:Name}`, where you can choose a `:Name` that can later be used as a keyword argument to `fit(engine = :Name)`.
 Similarly, `SemOptimizer{:Name}(args...; kwargs...) = SemOptimizerName(args...; kwargs...)` should be defined as well as a constructor that uses only keyword arguments:
 
-´´´julia
+```julia
 SemOptimizerName(;
     algorithm = LBFGS(),
-    options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8),
+    options = Optim.Options(; f_reltol = 1e-10, x_abstol = 1.5e-8),
     kwargs...,
 ) = SemOptimizerName(algorithm, options)
-´´´
+```
 A method for `update_observed` and additional methods might be usefull, but are not necessary.
 
 Now comes the substantive part: We need to provide a method for `fit`:
diff --git a/docs/src/internals/files.md b/docs/src/internals/files.md
index 90ceceaa..4c233839 100644
--- a/docs/src/internals/files.md
+++ b/docs/src/internals/files.md
@@ -10,7 +10,7 @@ Source code is in the `"src"` folder:
 - `"StructuralEquationModels.jl"` defines the module and the exported objects
 - `"types.jl"` defines all abstract types and the basic type hierarchy
 - `"objective_gradient_hessian.jl"` contains methods for computing objective, gradient and hessian values for different model types as well as generic fallback methods
-- The four folders `"observed"`, `"implied"`, `"loss"` and `"diff"` contain implementations of specific subtypes (for example, the `"loss"` folder contains a file `"ML.jl"` that implements the `SemML` loss function).
+- The folders `"observed"`, `"implied"`, and `"loss"` contain implementations of specific subtypes (for example, the `"loss"` folder contains a file `"ML.jl"` that implements the `SemML` loss function).
 - `"optimizer"` contains connections to different optimization backends (aka methods for `fit`)
     - `"optim.jl"`: connection to the `Optim.jl` package
 - `"frontend"` contains user-facing functions
diff --git a/docs/src/internals/internals.md b/docs/src/internals/internals.md
index 77d1085c..18f82dbc 100644
--- a/docs/src/internals/internals.md
+++ b/docs/src/internals/internals.md
@@ -1,3 +1,3 @@
 # Internals and Design
 
-On the following pages, we document the internals and design of the package. Those informations are no prerequisite for extending the package (as decribed in the developer documentation)!, but they may be useful and hopefully interesting.
\ No newline at end of file
+On the following pages, we document some technical information about the package. Those informations are no prerequisite for extending the package (as decribed in the developer documentation)!, but they may be useful.
\ No newline at end of file
diff --git a/docs/src/performance/simulation.md b/docs/src/performance/simulation.md
index 0cb2ea25..d268853f 100644
--- a/docs/src/performance/simulation.md
+++ b/docs/src/performance/simulation.md
@@ -1,9 +1,5 @@
 # Simulation studies
 
-!!! note "Simulation study interface"
-    We are currently working on an interface for simulation studies.
-    Until we are finished with this, this page is just a collection of tips.
-
 ## Replace observed data
 In simulation studies, a common task is fitting the same model to many different datasets.
 It would be a waste of resources to reconstruct the complete model for each dataset.
diff --git a/docs/src/tutorials/backends/nlopt.md b/docs/src/tutorials/backends/nlopt.md
index 2afa5e54..feb5c8f4 100644
--- a/docs/src/tutorials/backends/nlopt.md
+++ b/docs/src/tutorials/backends/nlopt.md
@@ -1,7 +1,7 @@
 # Using NLopt.jl
 
 [`SemOptimizerNLopt`](@ref) implements the connection to `NLopt.jl`.
-It is only available if the `NLopt` package is loaded alongside `StructuralEquationModel.jl` in the running Julia session.
+It is only available if the `NLopt` package is loaded alongside `StructuralEquationModels.jl` in the running Julia session.
 It takes a bunch of arguments:
 
 ```julia
diff --git a/docs/src/tutorials/construction/build_by_parts.md b/docs/src/tutorials/construction/build_by_parts.md
index 45d2a2ea..6b6b59ac 100644
--- a/docs/src/tutorials/construction/build_by_parts.md
+++ b/docs/src/tutorials/construction/build_by_parts.md
@@ -47,21 +47,21 @@ partable = ParameterTable(
 Now, we construct the different parts:
 
 ```@example build
-# observed ---------------------------------------------------------------------------------
+# observed -----------------------------------------------------------------------------
 observed = SemObservedData(specification = partable, data = data)
 
-# implied ------------------------------------------------------------------------------------
+# implied ------------------------------------------------------------------------------
 implied_ram = RAM(specification = partable)
 
-# loss -------------------------------------------------------------------------------------
+# loss ---------------------------------------------------------------------------------
 ml = SemML(observed = observed)
 
 loss_ml = SemLoss(ml)
 
-# optimizer -------------------------------------------------------------------------------------
+# optimizer ----------------------------------------------------------------------------
 optimizer = SemOptimizerOptim()
 
-# model ------------------------------------------------------------------------------------
+# model --------------------------------------------------------------------------------
 
 model_ml = Sem(observed, implied_ram, loss_ml)
 
diff --git a/docs/src/tutorials/construction/outer_constructor.md b/docs/src/tutorials/construction/outer_constructor.md
index a1c0b8ad..e2772430 100644
--- a/docs/src/tutorials/construction/outer_constructor.md
+++ b/docs/src/tutorials/construction/outer_constructor.md
@@ -14,9 +14,8 @@ Structural Equation Model
 - Loss Functions
    SemML
 - Fields
-   observed:  SemObservedCommon
-   implied:     RAM
-   optimizer: SemOptimizerOptim
+   observed:  SemObservedData
+   implied:   RAM
 ```
 
 The output of this call tells you exactly what model you just constructed (i.e. what the loss functions, observed, implied and optimizer parts are).
diff --git a/docs/src/tutorials/regularization/regularization.md b/docs/src/tutorials/regularization/regularization.md
index b1fc5c15..f743ac79 100644
--- a/docs/src/tutorials/regularization/regularization.md
+++ b/docs/src/tutorials/regularization/regularization.md
@@ -30,13 +30,12 @@ It can be used as
 ```julia
 SemOptimizerProximal(
     algorithm = ProximalAlgorithms.PANOC(),
-    options = Dict{Symbol, Any}(),
     operator_g,
     operator_h = nothing
     )
 ```
 
-The proximal operator (aka the regularization function) can be passed as `operator_g`, available options are listed [here](https://juliafirstorder.github.io/ProximalOperators.jl/stable/functions/).
+The proximal operator (aka the regularization function) can be passed as `operator_g`.
 The available Algorithms are listed [here](https://juliafirstorder.github.io/ProximalAlgorithms.jl/stable/guide/implemented_algorithms/).
 
 ## First example - lasso
diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl
index 5559034e..6cbcb057 100644
--- a/src/additional_functions/helper.jl
+++ b/src/additional_functions/helper.jl
@@ -14,84 +14,29 @@ function neumann_series(mat::SparseMatrixCSC; maxiter::Integer = size(mat, 1))
     return inverse
 end
 
-#=
-function make_onelement_array(A)
-    isa(A, Array) ? nothing : (A = [A])
-    return A
-end
- =#
-
-function semvec(observed, implied, loss, optimizer)
-    observed = make_onelement_array(observed)
-    implied = make_onelement_array(implied)
-    loss = make_onelement_array(loss)
-    optimizer = make_onelement_array(optimizer)
-
-    #sem_vec = Array{AbstractSem}(undef, maximum(length.([observed, implied, loss, optimizer])))
-    sem_vec = Sem.(observed, implied, loss, optimizer)
-
-    return sem_vec
-end
-
-skipmissing_mean(mat::AbstractMatrix) =
-    [mean(skipmissing(coldata)) for coldata in eachcol(mat)]
-
-function F_one_person(imp_mean, meandiff, inverse, data, logdet)
-    F = logdet
-    @. meandiff = data - imp_mean
-    F += dot(meandiff, inverse, meandiff)
-    return F
-end
-
-function remove_all_missing(data::AbstractMatrix)
-    keep = Vector{Int64}()
-    for (i, coldata) in zip(axes(data, 1), eachrow(data))
-        if any(!ismissing, coldata)
-            push!(keep, i)
-        end
-    end
-    return data[keep, :], keep
-end
-
 function batch_inv!(fun, model)
     for i in 1:size(fun.inverses, 1)
         fun.inverses[i] .= LinearAlgebra.inv!(fun.choleskys[i])
     end
 end
 
-#=
-function batch_sym_inv_update!(fun::Union{LossFunction, DiffFunction}, model)
-    M_inv = inv(fun.choleskys[1])
-    for i = 1:size(fun.inverses, 1)
-        if size(model.observed.patterns_not[i]) == 0
-            fun.inverses[i] .= M_inv
-        else
-            ind_not = model.observed.patterns_not[i]
-            ind = model.observed.patterns[i]
-
-            A = M_inv[ind_not, ind]
-            H = cholesky(M_inv[ind_not, ind_not])
-            D = H \ A
-            out = M_inv[ind, ind] - LinearAlgebra.BLAS.gemm('T', 'N', 1.0, A, D)
-            fun.inverses[i] .= out
-        end
-    end
-end =#
-
-function sparse_outer_mul!(C, A, B, ind) #computes A*S*B -> C, where ind gives the entries of S that are 1
+# computes A*S*B -> C, where ind gives the entries of S that are 1
+function sparse_outer_mul!(C, A, B, ind) 
     fill!(C, 0.0)
     for i in 1:length(ind)
         BLAS.ger!(1.0, A[:, ind[i][1]], B[ind[i][2], :], C)
     end
 end
 
-function sparse_outer_mul!(C, A, ind) #computes A*∇m, where ∇m ind gives the entries of ∇m that are 1
+# computes A*∇m, where ∇m ind gives the entries of ∇m that are 1
+function sparse_outer_mul!(C, A, ind) 
     fill!(C, 0.0)
     @views C .= sum(A[:, ind], dims = 2)
     return C
 end
 
-function sparse_outer_mul!(C, A, B::Vector, ind) #computes A*S*B -> C, where ind gives the entries of S that are 1
+# computes A*S*B -> C, where ind gives the entries of S that are 1
+function sparse_outer_mul!(C, A, B::Vector, ind) 
     fill!(C, 0.0)
     @views @inbounds for i in 1:length(ind)
         C .+= B[ind[i][2]] .* A[:, ind[i][1]]
diff --git a/src/additional_functions/simulation.jl b/src/additional_functions/simulation.jl
index 27d58f93..da3e6a90 100644
--- a/src/additional_functions/simulation.jl
+++ b/src/additional_functions/simulation.jl
@@ -7,7 +7,7 @@ Return a new model with swaped observed part.
 
 # Arguments
 - `model::AbstractSemSingle`: model to swap the observed part of.
-- `kwargs`: additional keyword arguments; typically includes `data = ...`
+- `kwargs`: additional keyword arguments; typically includes `data` and `specification`
 - `observed`: Either an object of subtype of `SemObserved` or a subtype of `SemObserved`
 
 # Examples
diff --git a/src/additional_functions/start_val/start_fabin3.jl b/src/additional_functions/start_val/start_fabin3.jl
index bd55f21d..ab79d9ad 100644
--- a/src/additional_functions/start_val/start_fabin3.jl
+++ b/src/additional_functions/start_val/start_fabin3.jl
@@ -18,7 +18,7 @@ end
 # SemObservedMissing
 function start_fabin3(observed::SemObservedMissing, implied, args...; kwargs...)
     if !observed.em_model.fitted
-        em_mvn(observed; kwargs...)
+        em_mvn!(observed; kwargs...)
     end
 
     return start_fabin3(implied.ram_matrices, observed.em_model.Σ, observed.em_model.μ)
@@ -45,24 +45,6 @@ function start_fabin3(
     )
     @assert length(F_var2obs) == size(F, 1)
 
-    # check in which matrix each parameter appears
-
-    #=     in_S = length.(S_ind) .!= 0
-        in_A = length.(A_ind) .!= 0
-        A_ind_c = [linear2cartesian(ind, (n_var, n_var)) for ind in A_ind]
-        in_Λ = [any(ind[2] .∈ F_ind) for ind in A_ind_c]
-
-        if !isnothing(M)
-            in_M = length.(M_ind) .!= 0
-            in_any = in_A .| in_S .| in_M
-        else
-            in_any = in_A .| in_S
-        end
-
-        if !all(in_any)
-            @warn "Could not determine fabin3 starting values for some parameters, default to 0."
-        end =#
-
     # set undirected parameters in S
     S_indices = CartesianIndices(S)
     for j in 1:nparams(S)
@@ -79,7 +61,6 @@ function start_fabin3(
 
     # set loadings
     A_indices = CartesianIndices(A)
-    # ind_Λ = findall([is_in_Λ(ind_vec, F_ind) for ind_vec in A_ind_c])
 
     # collect latent variable indicators in A
     # maps latent parameter to the vector of dependent vars
diff --git a/src/additional_functions/start_val/start_simple.jl b/src/additional_functions/start_val/start_simple.jl
index ad5148e3..4fbc8719 100644
--- a/src/additional_functions/start_val/start_simple.jl
+++ b/src/additional_functions/start_val/start_simple.jl
@@ -63,7 +63,6 @@ function start_simple(
     nparams(ram_matrices)
 
     start_val = zeros(n_par)
-    n_obs = nobserved_vars(ram_matrices)
     n_var = nvars(ram_matrices)
 
     C_indices = CartesianIndices((n_var, n_var))
diff --git a/src/frontend/common.jl b/src/frontend/common.jl
index e89a6cf8..2734e8f2 100644
--- a/src/frontend/common.jl
+++ b/src/frontend/common.jl
@@ -1,7 +1,7 @@
 # API methods supported by multiple SEM.jl types
 
 """
-    params(semobj) -> Vector{Symbol}
+    params(partable::ParameterTable) -> Vector{Symbol}
 
 Return the vector of SEM model parameter identifiers.
 """
@@ -42,7 +42,7 @@ nlatent_vars(semobj) = length(latent_vars(semobj))
 """
     param_indices(semobj)
 
-Returns a dict of parameter names and their indices in `semobj`.
+Returns a dict of parameter labels and their indices in `semobj`.
 
 # Examples
 ```julia
diff --git a/src/frontend/fit/fitmeasures/fit_measures.jl b/src/frontend/fit/fitmeasures/fit_measures.jl
index 2fc4dfba..afdde173 100644
--- a/src/frontend/fit/fitmeasures/fit_measures.jl
+++ b/src/frontend/fit/fitmeasures/fit_measures.jl
@@ -14,6 +14,6 @@ end
 """
     fit_measures(sem_fit, args...)
 
-Return a default set of fit measures or the fit measures passed as `arg...`.
+Return a default set of fit measures or the fit measures passed as `args...`.
 """
 function fit_measures end
diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl
index 2cb87d79..ab4d24e5 100644
--- a/src/frontend/fit/fitmeasures/minus2ll.jl
+++ b/src/frontend/fit/fitmeasures/minus2ll.jl
@@ -41,7 +41,7 @@ end
 function minus2ll(observed::SemObservedMissing)
     # fit EM-based mean and cov if not yet fitted
     # FIXME EM could be very computationally expensive
-    observed.em_model.fitted || em_mvn(observed)
+    observed.em_model.fitted || em_mvn!(observed)
 
     Σ = observed.em_model.Σ
     μ = observed.em_model.μ
diff --git a/src/frontend/fit/summary.jl b/src/frontend/fit/summary.jl
index 8ee134a9..3071d565 100644
--- a/src/frontend/fit/summary.jl
+++ b/src/frontend/fit/summary.jl
@@ -15,7 +15,7 @@ function details(sem_fit::SemFit; show_fitmeasures = false, color = :light_cyan,
     println("Number of data samples:      $(nsamples(sem_fit))")
     print("\n")
     printstyled(
-        "----------------------------------- Model ----------------------------------- \n";
+        "----------------------------------- Model ------------------------------------ \n";
         color = color,
     )
     print("\n")
@@ -23,7 +23,7 @@ function details(sem_fit::SemFit; show_fitmeasures = false, color = :light_cyan,
     print("\n")
     if show_fitmeasures
         printstyled(
-            "--------------------------------- Fitmeasures --------------------------------- \n";
+            "-------------------------------- Fitmeasures --------------------------------- \n";
             color = color,
         )
         print("\n")
@@ -51,7 +51,7 @@ function details(
     if show_variables
         print("\n")
         printstyled(
-            "--------------------------------- Variables --------------------------------- \n";
+            "---------------------------------- Variables --------------------------------- \n";
             color = color,
         )
         print("\n")
@@ -242,9 +242,6 @@ function details(
         )
         print("\n")
     end
-
-    #printstyled("""No need to copy and paste results, you can use CSV.write(DataFrame(my_partable), "myfile.csv")"""; hidden = true)
-
 end
 
 function details(
@@ -297,9 +294,6 @@ function details(
             show_columns = show_columns,
         )
     end
-
-    # printstyled("""No need to copy and paste results, you can use CSV.write(DataFrame(my_partable), "myfile.csv")"""; hidden = true)
-
 end
 
 function check_round(vec; digits)
diff --git a/src/frontend/pretty_printing.jl b/src/frontend/pretty_printing.jl
index c1cd72c2..2fa970f2 100644
--- a/src/frontend/pretty_printing.jl
+++ b/src/frontend/pretty_printing.jl
@@ -1,3 +1,7 @@
+##############################################################
+# Some helpers to implement show methods for SEM.jl objects
+##############################################################
+
 function print_field_types(io::IO, struct_instance)
     fields = fieldnames(typeof(struct_instance))
     types = [typeof(getproperty(struct_instance, field)) for field in fields]
@@ -25,7 +29,7 @@ function print_type(io::IO, struct_instance)
 end
 
 ##############################################################
-# Loss Functions, Implied,
+# Loss Function, Implied, Observed, Optimizer
 ##############################################################
 
 function Base.show(io::IO, struct_inst::SemLossFunction)
diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl
index 75175a87..d430e9c0 100644
--- a/src/frontend/specification/RAMMatrices.jl
+++ b/src/frontend/specification/RAMMatrices.jl
@@ -9,7 +9,7 @@ struct RAMMatrices <: SemSpecification
     F::SparseMatrixCSC{Float64}
     M::Union{ParamsVector{Float64}, Nothing}
     param_labels::Vector{Symbol}
-    vars::Union{Vector{Symbol}, Nothing}    # better call it "variables": it's a mixture of observed and latent (and it gets confusing with get_vars())
+    vars::Union{Vector{Symbol}, Nothing}
 end
 
 nparams(ram::RAMMatrices) = nparams(ram.A)
diff --git a/src/frontend/specification/Sem.jl b/src/frontend/specification/Sem.jl
index 7ba8f7fb..53858abd 100644
--- a/src/frontend/specification/Sem.jl
+++ b/src/frontend/specification/Sem.jl
@@ -45,11 +45,6 @@ Returns the [*observed*](@ref SemObserved) part of a model.
 """
 observed(model::AbstractSemSingle) = model.observed
 
-"""
-    nsamples(model::AbstractSem) -> Int
-
-Returns the number of samples from the [*observed*](@ref SemObserved) part of a model.
-"""
 nsamples(model::AbstractSemSingle) = nsamples(observed(model))
 
 """
@@ -156,11 +151,6 @@ end
 # pretty printing
 ##############################################################
 
-#= function Base.show(io::IO, sem::Sem{O, I, L, D})  where {O, I, L, D}
-    lossfuntypes = @. nameof(typeof(sem.loss.functions))
-    print(io, "Sem{$(nameof(O)), $(nameof(I)), $lossfuntypes, $(nameof(D))}")
-end =#
-
 function Base.show(io::IO, sem::Sem{O, I, L}) where {O, I, L}
     lossfuntypes = @. string(nameof(typeof(sem.loss.functions)))
     lossfuntypes = "   " .* lossfuntypes .* ("\n")
diff --git a/src/frontend/specification/StenoGraphs.jl b/src/frontend/specification/StenoGraphs.jl
index 314abcc3..79e17f71 100644
--- a/src/frontend/specification/StenoGraphs.jl
+++ b/src/frontend/specification/StenoGraphs.jl
@@ -55,7 +55,6 @@ function ParameterTable(
     value_fixed = columns[:value_fixed]
     start = columns[:start]
     param_refs = columns[:label]
-    # group = Vector{Symbol}(undef, n)
 
     for (i, element) in enumerate(graph)
         edge = element isa ModifiedEdge ? element.edge : element
diff --git a/src/frontend/specification/documentation.jl b/src/frontend/specification/documentation.jl
index 7697a63c..e46620fb 100644
--- a/src/frontend/specification/documentation.jl
+++ b/src/frontend/specification/documentation.jl
@@ -1,10 +1,3 @@
-"""
-    param_labels(semobj) -> Vector{Symbol}
-
-Return the vector of parameter labels (in the same order as [`params`](@ref)).
-"""
-param_labels(spec::SemSpecification) = spec.param_labels
-
 """
     vars(semobj) -> Vector{Symbol}
 
@@ -37,14 +30,22 @@ function latent_vars end
 latent_vars(spec::SemSpecification) =
     error("latent_vars(spec::$(typeof(spec))) is not implemented")
 
+"""
+    param_labels(semobj) -> Vector{Symbol}
+
+Return the vector of parameter labels (in the same order as [`params`](@ref)).
+"""
+param_labels(spec::SemSpecification) = spec.param_labels
+
+
 """
 `ParameterTable`s contain the specification of a structural equation model.
 
 # Constructor
 
-    (1) ParameterTable(;graph, observed_vars, latent_vars, ...)
+    (1) ParameterTable(graph; observed_vars, latent_vars, ...)
 
-    (2) ParameterTable(ram_matrices)
+    (2) ParameterTable(ram_matrices; ...)
 
 Return a `ParameterTable` constructed from (1) a graph or (2) RAM matrices.
 
@@ -55,7 +56,7 @@ Return a `ParameterTable` constructed from (1) a graph or (2) RAM matrices.
 - `ram_matrices::RAMMatrices`: a `RAMMatrices` object
 
 # Examples
-See the online documentation on [Model specification](@ref) and the [ParameterTable interface](@ref).
+See the online documentation on [Model specification](@ref) and the [Graph interface](@ref).
 
 # Extended help
 ## Additional keyword arguments
@@ -68,7 +69,7 @@ function ParameterTable end
 
 # Constructor
 
-    (1) EnsembleParameterTable(;graph, observed_vars, latent_vars, groups)
+    (1) EnsembleParameterTable(graph; observed_vars, latent_vars, groups)
 
     (2) EnsembleParameterTable(ps::Pair...; param_labels = nothing)
 
@@ -91,9 +92,9 @@ function EnsembleParameterTable end
 
 # Constructor
 
-    (1) RAMMatrices(partable::ParameterTable)
+    (1) RAMMatrices(partable::ParameterTable; param_labels = nothing)
 
-    (2) RAMMatrices(;A, S, F, M = nothing, param_labels, vars)
+    (2) RAMMatrices(;A, S, F, M = nothing, param_labels, vars = nothing)
 
     (3) RAMMatrices(partable::EnsembleParameterTable)
 
diff --git a/src/implied/RAM/generic.jl b/src/implied/RAM/generic.jl
index 301c455e..fd2ef61b 100644
--- a/src/implied/RAM/generic.jl
+++ b/src/implied/RAM/generic.jl
@@ -6,8 +6,7 @@ Model implied covariance and means via RAM notation.
 
 # Constructor
 
-    RAM(;
-        specification,
+    RAM(;specification,
         meanstructure = false,
         gradient = true,
         kwargs...)
@@ -19,9 +18,6 @@ Model implied covariance and means via RAM notation.
 
 # Extended help
 
-## Implementation
-Subtype of `SemImplied`.
-
 ## RAM notation
 
 The model implied covariance matrix is computed as
@@ -37,33 +33,32 @@ and for models with a meanstructure, the model implied means are computed as
 - `param_labels(::RAM) `-> vector of parameter labels
 - `nparams(::RAM)` -> number of parameters
 
-- `Σ(::RAM)` -> model implied covariance matrix
-- `μ(::RAM)` -> model implied mean vector
+- `ram.Σ` -> model implied covariance matrix
+- `ram.μ` -> model implied mean vector
 
 RAM matrices for the current parameter values:
-- `A(::RAM)`
-- `S(::RAM)`
-- `F(::RAM)`
-- `M(::RAM)`
+- `ram.A`
+- `ram.S`
+- `ram.F`
+- `ram.M`
 
 Jacobians of RAM matrices w.r.t to the parameter vector `θ`
-- `∇A(::RAM)` -> ``∂vec(A)/∂θᵀ``
-- `∇S(::RAM)` -> ``∂vec(S)/∂θᵀ``
-- `∇M(::RAM)` = ``∂M/∂θᵀ``
+- `ram.∇A` -> ``∂vec(A)/∂θᵀ``
+- `ram.∇S` -> ``∂vec(S)/∂θᵀ``
+- `ram.∇M` = ``∂M/∂θᵀ``
 
 Vector of indices of each parameter in the respective RAM matrix:
-- `A_indices(::RAM)`
-- `S_indices(::RAM)`
-- `M_indices(::RAM)`
+- `ram.A_indices`
+- `ram.S_indices`
+- `ram.M_indices`
 
 Additional interfaces
-- `F⨉I_A⁻¹(::RAM)` -> ``F(I-A)^{-1}``
-- `F⨉I_A⁻¹S(::RAM)` -> ``F(I-A)^{-1}S``
-- `I_A(::RAM)` -> ``I-A``
-- `has_meanstructure(::RAM)` -> `Val{Bool}` does the model have a meanstructure?
+- `ram.F⨉I_A⁻¹` -> ``F(I-A)^{-1}``
+- `ram.F⨉I_A⁻¹S` -> ``F(I-A)^{-1}S``
+- `ram.I_A` -> ``I-A``
 
 Only available in gradient! calls:
-- `I_A⁻¹(::RAM)` -> ``(I-A)^{-1}``
+- `ram.I_A⁻¹` -> ``(I-A)^{-1}``
 """
 mutable struct RAM{MS, A1, A2, A3, A4, A5, A6, V2, M1, M2, M3, M4, S1, S2, S3} <: SemImplied
     meanstruct::MS
@@ -97,7 +92,6 @@ end
 
 function RAM(;
     specification::SemSpecification,
-    #vech = false,
     gradient_required = true,
     meanstructure = false,
     kwargs...,
diff --git a/src/implied/RAM/symbolic.jl b/src/implied/RAM/symbolic.jl
index eff193c1..9634bfa8 100644
--- a/src/implied/RAM/symbolic.jl
+++ b/src/implied/RAM/symbolic.jl
@@ -6,7 +6,8 @@ Subtype of `SemImplied` that implements the RAM notation with symbolic precomput
 
 # Constructor
 
-    RAMSymbolic(;specification,
+    RAMSymbolic(;
+        specification,
         vech = false,
         gradient = true,
         hessian = false,
@@ -25,32 +26,25 @@ Subtype of `SemImplied` that implements the RAM notation with symbolic precomput
 
 # Extended help
 
-## Implementation
-Subtype of `SemImplied`.
-
 ## Interfaces
 - `param_labels(::RAMSymbolic) `-> vector of parameter ids
 - `nparams(::RAMSymbolic)` -> number of parameters
 
-- `Σ(::RAMSymbolic)` -> model implied covariance matrix
-- `μ(::RAMSymbolic)` -> model implied mean vector
+- `ram.Σ` -> model implied covariance matrix
+- `ram.μ` -> model implied mean vector
 
 Jacobians (only available in gradient! calls)
-- `∇Σ(::RAMSymbolic)` -> ``∂vec(Σ)/∂θᵀ``
-- `∇μ(::RAMSymbolic)` -> ``∂μ/∂θᵀ``
+- `ram.∇Σ` -> ``∂vec(Σ)/∂θᵀ``
+- `ram.∇μ` -> ``∂μ/∂θᵀ``
 
-- `∇Σ_function(::RAMSymbolic)` -> function to overwrite `∇Σ` in place,
-    i.e. `∇Σ_function(∇Σ, θ)`. Normally, you do not want to use this but simply
-    query `∇Σ(::RAMSymbolic)`.
+- `ram.∇Σ_function` -> function to overwrite `∇Σ` in place,
+    i.e. `∇Σ_function(∇Σ, θ)`. Typically, you do not want to use this but simply
+    query `ram.∇Σ`.
 
 Hessians
-The computation of hessians is more involved, and uses the "chain rule for
-hessian matrices".
-Therefore, we desribe it at length in the mathematical appendix of the online documentation,
-and the relevant interfaces are omitted here.
-
-Additional interfaces
-- `has_meanstructure(::RAMSymbolic)` -> `Val{Bool}` does the model have a meanstructure?
+The computation of hessians is more involved.
+Therefore, we desribe it in the online documentation, 
+and the respective interfaces are omitted here.
 
 ## RAM notation
 The model implied covariance matrix is computed as
diff --git a/src/implied/abstract.jl b/src/implied/abstract.jl
index af51440c..d4868d74 100644
--- a/src/implied/abstract.jl
+++ b/src/implied/abstract.jl
@@ -1,4 +1,3 @@
-
 # vars and params API methods for SemImplied
 vars(implied::SemImplied) = vars(implied.ram_matrices)
 observed_vars(implied::SemImplied) = observed_vars(implied.ram_matrices)
diff --git a/src/implied/empty.jl b/src/implied/empty.jl
index 3b0292e7..82a6c946 100644
--- a/src/implied/empty.jl
+++ b/src/implied/empty.jl
@@ -19,11 +19,8 @@ model per group and an additional model with `ImpliedEmpty` and `SemRidge` for t
 # Extended help
 
 ## Interfaces
-- `param_labels(::RAMSymbolic) `-> Vector of parameter labels
-- `nparams(::RAMSymbolic)` -> Number of parameters
-
-## Implementation
-Subtype of `SemImplied`.
+- `param_labels(::ImpliedEmpty) `-> Vector of parameter labels
+- `nparams(::ImpliedEmpty)` -> Number of parameters
 """
 struct ImpliedEmpty{A, B, C} <: SemImplied
     hessianeval::A
diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl
index ca23ded9..ea8da6b3 100644
--- a/src/loss/ML/FIML.jl
+++ b/src/loss/ML/FIML.jl
@@ -19,10 +19,6 @@ my_fiml = SemFIML(observed = my_observed, specification = my_parameter_table)
 
 # Interfaces
 Analytic gradients are available.
-
-# Extended help
-## Implementation
-Subtype of `SemLossFunction`.
 """
 mutable struct SemFIML{INV, C, L, O, M, IM, I, T, W} <: SemLossFunction
     hessianeval::ExactHessian
diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl
index d14af648..ec5eb997 100644
--- a/src/loss/ML/ML.jl
+++ b/src/loss/ML/ML.jl
@@ -21,11 +21,8 @@ my_ml = SemML(observed = my_observed)
 ```
 
 # Interfaces
-Analytic gradients are available, and for models without a meanstructure, also analytic hessians.
-
-# Extended help
-## Implementation
-Subtype of `SemLossFunction`.
+Analytic gradients are available, and for models without a meanstructure
+and RAMSymbolic implied type, also analytic hessians.
 """
 struct SemML{HE <: HessianEval, INV, M, M2} <: SemLossFunction
     hessianeval::HE
diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl
index 0fe2c9b3..dd5be487 100644
--- a/src/loss/WLS/WLS.jl
+++ b/src/loss/WLS/WLS.jl
@@ -5,6 +5,7 @@
 ############################################################################################
 """
 Weighted least squares estimation.
+At the moment only available with the `RAMSymbolic` implied type.
 
 # Constructor
 
@@ -32,11 +33,7 @@ my_wls = SemWLS(observed = my_observed)
 ```
 
 # Interfaces
-Analytic gradients are available, and for models without a meanstructure, also analytic hessians.
-
-# Extended help
-## Implementation
-Subtype of `SemLossFunction`.
+Analytic gradients are available, and for models without a meanstructure also analytic hessians.
 """
 struct SemWLS{HE <: HessianEval, Vt, St, C} <: SemLossFunction
     hessianeval::HE
@@ -124,7 +121,7 @@ function evaluate!(
         if issparse(∇σ)
             gradient .= (σ₋' * V * ∇σ)'
         else # save one allocation
-            mul!(gradient, σ₋' * V, ∇σ) # actually transposed, but should be fine for vectors
+            mul!(gradient, σ₋' * V, ∇σ)
         end
         gradient .*= -2
     end
diff --git a/src/loss/constant/constant.jl b/src/loss/constant/constant.jl
index cb515734..3aed5e27 100644
--- a/src/loss/constant/constant.jl
+++ b/src/loss/constant/constant.jl
@@ -20,10 +20,6 @@ Constant loss term. Can be used for comparability to other packages.
 
 # Interfaces
 Analytic gradients and hessians are available.
-
-# Extended help
-## Implementation
-Subtype of `SemLossFunction`.
 """
 struct SemConstant{C} <: SemLossFunction
     hessianeval::ExactHessian
diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl
index aee52162..90cbcc23 100644
--- a/src/loss/regularization/ridge.jl
+++ b/src/loss/regularization/ridge.jl
@@ -24,10 +24,6 @@ my_ridge = SemRidge(;α_ridge = 0.02, which_ridge = [:λ₁, :λ₂, :ω₂₃],
 
 # Interfaces
 Analytic gradients and hessians are available.
-
-# Extended help
-## Implementation
-Subtype of `SemLossFunction`.
 """
 struct SemRidge{P, W1, W2, GT, HT} <: SemLossFunction
     hessianeval::ExactHessian
diff --git a/src/objective_gradient_hessian.jl b/src/objective_gradient_hessian.jl
index 4aafe423..06f39329 100644
--- a/src/objective_gradient_hessian.jl
+++ b/src/objective_gradient_hessian.jl
@@ -95,7 +95,8 @@ function evaluate!(objective, gradient, hessian, model::AbstractSemSingle, param
 end
 
 ############################################################################################
-# methods for SemFiniteDiff (approximate gradient and hessian with finite differences of objective)
+# methods for SemFiniteDiff
+# (approximate gradient and hessian with finite differences of objective)
 ############################################################################################
 
 function evaluate!(objective, gradient, hessian, model::SemFiniteDiff, params)
@@ -157,14 +158,6 @@ function evaluate!(objective, gradient, hessian, ensemble::SemEnsemble, params)
     return objective
 end
 
-# throw an error by default if gradient! and hessian! are not implemented
-
-#= gradient!(lossfun::SemLossFunction, par, model) =
-    throw(ArgumentError("gradient for $(typeof(lossfun).name.wrapper) is not available"))
-
-hessian!(lossfun::SemLossFunction, par, model) =
-    throw(ArgumentError("hessian for $(typeof(lossfun).name.wrapper) is not available")) =#
-
 ############################################################################################
 # Documentation
 ############################################################################################
diff --git a/src/observed/EM.jl b/src/observed/EM.jl
index beac45ca..46d0622b 100644
--- a/src/observed/EM.jl
+++ b/src/observed/EM.jl
@@ -3,16 +3,32 @@
 ############################################################################################
 
 # An EM Algorithm for MVN-distributed Data with missing values
-# Adapted from supplementary Material to the book Machine Learning: A Probabilistic Perspective
-# Copyright (2010) Kevin Murphy and Matt Dunham
-# found at https://github.com/probml/pmtk3/blob/master/toolbox/BasicModels/gauss/sub/gaussMissingFitEm.m
-# and at https://github.com/probml/pmtk3/blob/master/toolbox/Algorithms/optimization/emAlgo.m
+# Adapted from https://github.com/probml/pmtk3, licensed as
+#= The MIT License
 
-# what about random restarts?
+Copyright (2010) Kevin Murphy and Matt Dunham
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE. =#
 
 # outer function ---------------------------------------------------------------------------
 """
-    em_mvn(;
+    em_mvn!(;
         observed::SemObservedMissing,
         start_em = start_em_observed,
         max_iter_em = 100,
@@ -22,7 +38,7 @@
 Estimates the covariance matrix and mean vector of the normal distribution via expectation maximization for `observed`.
 Overwrites the statistics stored in `observed`.
 """
-function em_mvn(
+function em_mvn!(
     observed::SemObservedMissing;
     start_em = start_em_observed,
     max_iter_em = 100,
@@ -32,7 +48,7 @@ function em_mvn(
     nvars = nobserved_vars(observed)
     nsamps = nsamples(observed)
 
-    # preallocate stuff?
+    # preallocate stuff
     𝔼x_pre = zeros(nvars)
     𝔼xxᵀ_pre = zeros(nvars, nvars)
 
@@ -45,9 +61,6 @@ function em_mvn(
         end
     end
 
-    # ess = 𝔼x, 𝔼xxᵀ, ismissing, missingRows, nsamps
-    # estepFn = (em_model, data) -> estep(em_model, data, EXsum, EXXsum, ismissing, missingRows, nsamps)
-
     # initialize
     em_model = start_em(observed; kwargs...)
     em_model_prev = EmMVNModel(zeros(nvars, nvars), zeros(nvars), false)
@@ -65,13 +78,11 @@ function em_mvn(
             @warn "EM Algorithm for MVN missing data did not converge. Likelihood for FIML is not interpretable.
             Maybe try passing different starting values via 'start_em = ...' "
         elseif iter > 1
-            # done = isapprox(ll, ll_prev; rtol = rtol)
             done =
                 isapprox(em_model_prev.μ, em_model.μ; rtol = rtol_em) &
                 isapprox(em_model_prev.Σ, em_model.Σ; rtol = rtol_em)
         end
 
-        # print("$iter \n")
         iter = iter + 1
         em_model_prev.μ, em_model_prev.Σ = em_model.μ, em_model.Σ
     end
@@ -135,23 +146,7 @@ end
 function em_mvn_Mstep!(em_model, nsamples, 𝔼x, 𝔼xxᵀ)
     em_model.μ = 𝔼x / nsamples
     Σ = Symmetric(𝔼xxᵀ / nsamples - em_model.μ * em_model.μ')
-
-    # ridge Σ
-    # while !isposdef(Σ)
-    #     Σ += 0.5I
-    # end
-
     em_model.Σ = Σ
-
-    # diagonalization
-    #if !isposdef(Σ)
-    #    print("Matrix not positive definite")
-    #    em_model.Σ .= 0
-    #    em_model.Σ[diagind(em_model.Σ)] .= diag(Σ)
-    #else
-    # em_model.Σ = Σ
-    #end
-
     return nothing
 end
 
@@ -178,7 +173,6 @@ function start_em_simple(observed::SemObservedMissing; kwargs...)
     μ = zeros(nvars)
     Σ = rand(nvars, nvars)
     Σ = Σ * Σ'
-    # Σ = Matrix(1.0I, nvars, nvars)
     return EmMVNModel(Σ, μ, false)
 end
 
diff --git a/src/observed/abstract.jl b/src/observed/abstract.jl
index bb92ea12..cf5000e4 100644
--- a/src/observed/abstract.jl
+++ b/src/observed/abstract.jl
@@ -48,7 +48,7 @@ end
 
 # function to prepare input data shared by SemObserved implementations
 # returns tuple of
-# 1) the matrix of data
+# 1) the data matrix
 # 2) the observed variable symbols that match matrix columns
 # 3) the permutation of the original observed_vars (nothing if no reordering)
 # If observed_vars is not specified, the vars order is taken from the specification.
diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl
index 221ef5ca..f81fe8e5 100644
--- a/src/observed/covariance.jl
+++ b/src/observed/covariance.jl
@@ -9,10 +9,9 @@ const SemObservedCovariance{S} = SemObservedData{Nothing, S}
     SemObservedCovariance(;
         specification,
         obs_cov,
-        obs_colnames = nothing,
-        meanstructure = false,
+        nsamples,
         obs_mean = nothing,
-        nsamples::Integer,
+        observed_vars = nothing,
         kwargs...)
 
 Construct [`SemObserved`](@ref) without providing the observations data,
@@ -21,12 +20,13 @@ but with the covariations (`obs_cov`) and the means (`obs_means`) of the observe
 Returns [`SemObservedCovariance`](@ref) object.
 
 # Arguments
-- `obs_cov`: pre-computed covariations of the observed variables
+- `obs_cov`: pre-computed covariance matrix of the observed variables
+- `nsamples::Integer`: number of samples (observed data points) used to compute `obs_cov` and `obs_means`,
+   used for calculating fit statistics
 - `obs_mean`: optional pre-computed means of the observed variables
 - `observed_vars::AbstractVector`: IDs of the observed variables (rows and columns of the `obs_cov` matrix)
 - `specification`: optional SEM specification ([`SemSpecification`](@ref))
-- `nsamples::Number`: number of samples (observed data points) used to compute `obs_cov` and `obs_means`
-   necessary for calculating fit statistics
+
 """
 function SemObservedCovariance(;
     obs_cov::AbstractMatrix,
diff --git a/src/observed/data.jl b/src/observed/data.jl
index b6ddaa43..7ba38edf 100644
--- a/src/observed/data.jl
+++ b/src/observed/data.jl
@@ -10,9 +10,9 @@ For observed data without missings.
         kwargs...)
 
 # Arguments
-- `specification`: optional SEM specification ([`SemSpecification`](@ref))
 - `data`: observed data -- *DataFrame* or *Matrix*
 - `observed_vars::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame)
+- `specification`: optional SEM specification ([`SemSpecification`](@ref))
 
 # Extended help
 ## Interfaces
@@ -20,11 +20,8 @@ For observed data without missings.
 - `nobserved_vars(::SemObservedData)` -> number of observed (manifested) variables
 
 - `samples(::SemObservedData)` -> observed data
-- `obs_cov(::SemObservedData)` -> observed.obs_cov
-- `obs_mean(::SemObservedData)` -> observed.obs_mean
-
-## Implementation
-Subtype of `SemObserved`
+- `obs_cov(::SemObservedData)` -> observed covariance matrix
+- `obs_mean(::SemObservedData)` -> observed mean vector
 """
 struct SemObservedData{D <: Union{Nothing, AbstractMatrix}, S <: Number} <: SemObserved
     data::D
@@ -48,10 +45,6 @@ function SemObservedData(;
     return SemObservedData(data, obs_vars, obs_cov, vec(obs_mean), size(data, 1))
 end
 
-############################################################################################
-### Recommended methods
-############################################################################################
-
 ############################################################################################
 ### additional methods
 ############################################################################################
diff --git a/src/observed/missing.jl b/src/observed/missing.jl
index cf699252..ac8b7ea5 100644
--- a/src/observed/missing.jl
+++ b/src/observed/missing.jl
@@ -25,9 +25,9 @@ For observed data with missing values.
         kwargs...)
 
 # Arguments
-- `specification`: optional SEM model specification ([`SemSpecification`](@ref))
 - `data`: observed data
 - `observed_vars::Vector{Symbol}`: column names of the data (if the object passed as data does not have column names, i.e. is not a data frame)
+- `specification`: optional SEM model specification ([`SemSpecification`](@ref))
 
 # Extended help
 ## Interfaces
@@ -35,10 +35,14 @@ For observed data with missing values.
 - `nobserved_vars(::SemObservedMissing)` -> number of observed variables
 
 - `samples(::SemObservedMissing)` -> data matrix (contains both measured and missing values)
-- `em_model(::SemObservedMissing)` -> `EmMVNModel` that contains the covariance matrix and mean vector found via expectation maximization
 
-## Implementation
-Subtype of `SemObserved`
+## Expectation maximization
+`em_mvn!(::SemObservedMissing)` can be called to fit a covariance matrix and mean vector to the data
+using an expectation maximization (EM) algorithm under the assumption of multivariate normality.
+After, the following methods are available:
+- `em_model(::SemObservedMissing)` -> `EmMVNModel` that contains the covariance matrix and mean vector found via EM
+- `obs_cov(::SemObservedData)` -> EM covariance matrix
+- `obs_mean(::SemObservedData)` -> EM mean vector
 """
 struct SemObservedMissing{T <: Real, S <: Real, E <: EmMVNModel} <: SemObserved
     data::Matrix{Union{T, Missing}}
@@ -91,10 +95,6 @@ function SemObservedMissing(;
     )
 end
 
-############################################################################################
-### Recommended methods
-############################################################################################
-
 ############################################################################################
 ### Additional methods
 ############################################################################################
diff --git a/src/optimizer/Empty.jl b/src/optimizer/Empty.jl
index 45a20db5..1bf0c30a 100644
--- a/src/optimizer/Empty.jl
+++ b/src/optimizer/Empty.jl
@@ -8,12 +8,6 @@ an optimizer part.
 # Constructor
 
     SemOptimizerEmpty()
-
-# Extended help
-
-## Implementation
-
-Subtype of `SemOptimizer`.
 """
 struct SemOptimizerEmpty <: SemOptimizer{:Empty} end
 
diff --git a/src/optimizer/abstract.jl b/src/optimizer/abstract.jl
index 2487b7c5..3b1e9884 100644
--- a/src/optimizer/abstract.jl
+++ b/src/optimizer/abstract.jl
@@ -20,11 +20,17 @@ the online documentation on [Starting values](@ref).
 
 # Examples
 ```julia
-fit(
-    my_model;
+fit(my_model;
     start_val = start_simple,
     start_covariances_latent = 0.5)
 ```
+
+```julia
+using Optim
+
+fit(my_model;
+    algorithm = BFGS())
+```
 """
 function fit(optim::SemOptimizer, model::AbstractSem; start_val = nothing, kwargs...)
     start_params = prepare_start_params(start_val, model; kwargs...)
@@ -49,6 +55,10 @@ prepare_start_params(start_val::Nothing, model::AbstractSemSingle; kwargs...) =
 prepare_start_params(start_val::Nothing, model::AbstractSem; kwargs...) =
     start_simple(model; kwargs...)
 
+# first argument is a function
+prepare_start_params(start_val, model::AbstractSem; kwargs...) =
+    start_val(model; kwargs...)
+
 function prepare_start_params(start_val::AbstractVector, model::AbstractSem; kwargs...)
     (length(start_val) == nparams(model)) || throw(
         DimensionMismatch(
diff --git a/src/optimizer/optim.jl b/src/optimizer/optim.jl
index 8f5404bc..bd57942d 100644
--- a/src/optimizer/optim.jl
+++ b/src/optimizer/optim.jl
@@ -12,11 +12,11 @@ Connects to `Optim.jl` as the optimization backend.
 
     SemOptimizerOptim(;
         algorithm = LBFGS(),
-        options = Optim.Options(;f_tol = 1e-10, x_tol = 1.5e-8),
+        options = Optim.Options(;f_reltol = 1e-10, x_abstol = 1.5e-8),
         kwargs...)
 
 # Arguments
-- `algorithm`: optimization algorithm.
+- `algorithm`: optimization algorithm from `Optim.jl`
 - `options::Optim.Options`: options for the optimization algorithm
 
 # Usage
@@ -67,7 +67,7 @@ SemOptimizer{:Optim}(args...; kwargs...) = SemOptimizerOptim(args...; kwargs...)
 
 SemOptimizerOptim(;
     algorithm = LBFGS(),
-    options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8),
+    options = Optim.Options(;f_reltol = 1e-10, x_abstol = 1.5e-8),
     kwargs...,
 ) = SemOptimizerOptim(algorithm, options)
 
@@ -110,7 +110,7 @@ function fit(
     upper_bounds::Union{AbstractVector, AbstractDict, Nothing} = nothing,
     lower_bound = -Inf,
     upper_bound = Inf,
-    variance_lower_bound::Number = 0.0,
+    variance_lower_bound::Number = -Inf,
     variance_upper_bound::Number = Inf,
     kwargs...,
 )
diff --git a/src/package_extensions/SEMNLOptExt.jl b/src/package_extensions/SEMNLOptExt.jl
index 69721ac9..64c4cff0 100644
--- a/src/package_extensions/SEMNLOptExt.jl
+++ b/src/package_extensions/SEMNLOptExt.jl
@@ -49,10 +49,6 @@ see [Constrained optimization](@ref) in our online documentation.
 - `local_options(::SemOptimizerNLopt)`
 - `equality_constraints(::SemOptimizerNLopt)`
 - `inequality_constraints(::SemOptimizerNLopt)`
-
-## Implementation
-
-Subtype of `SemOptimizer`.
 """
 struct SemOptimizerNLopt{A, A2, B, B2, C} <: SemOptimizer{:NLopt}
     algorithm::A
diff --git a/src/package_extensions/SEMProximalOptExt.jl b/src/package_extensions/SEMProximalOptExt.jl
index 5d400750..ad4c2da2 100644
--- a/src/package_extensions/SEMProximalOptExt.jl
+++ b/src/package_extensions/SEMProximalOptExt.jl
@@ -1,5 +1,6 @@
 """
 Connects to `ProximalAlgorithms.jl` as the optimization backend.
+Can be used for regularized SEM, for a tutorial see the online docs on [Regularization](@ref).
 
 # Constructor
 
@@ -11,8 +12,13 @@ Connects to `ProximalAlgorithms.jl` as the optimization backend.
 
 # Arguments
 - `algorithm`: optimization algorithm.
-- `operator_g`: gradient of the objective function
-- `operator_h`: optional hessian of the objective function
+- `operator_g`: proximal operator (e.g., regularization penalty)
+- `operator_h`: optional second proximal operator
+
+# Usage
+All algorithms and operators from `ProximalAlgorithms.jl` are available,
+for more information see the online docs on [Regularization](@ref) and
+the documentation of `ProximalAlgorithms.jl` / `ProximalOperators.jl`.
 """
 mutable struct SemOptimizerProximal{A, B, C} <: SemOptimizer{:Proximal}
     algorithm::A
diff --git a/src/types.jl b/src/types.jl
index 64a4acba..44d472eb 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -106,7 +106,7 @@ abstract type SemObserved end
 
 """
 Supertype of all objects that can serve as the implied field of a SEM.
-Computed model-implied values that should be compared with the observed data to find parameter estimates,
+Computes model-implied values that should be compared with the observed data to find parameter estimates,
 e. g. the model implied covariance or mean.
 If you would like to implement a different notation, e.g. LISREL, you should implement a subtype of SemImplied.
 """
@@ -168,7 +168,7 @@ end
 # ensemble models
 ############################################################################################
 """
-    (1) SemEnsemble(models..., weights = nothing, kwargs...)
+    (1) SemEnsemble(models...; weights = nothing, kwargs...)
 
     (2) SemEnsemble(;specification, data, groups, column = :group, kwargs...)
 
@@ -189,6 +189,8 @@ Returns a SemEnsemble with fields
 - `sems::Tuple`: `AbstractSem`s.
 - `weights::Vector`: Weights for each model.
 - `param_labels::Vector`: Stores parameter labels and their position.
+
+For instructions on multigroup models, see the online documentation.
 """
 struct SemEnsemble{N, T <: Tuple, V <: AbstractVector, I} <: AbstractSemCollection
     n::N
diff --git a/test/examples/recover_parameters/recover_parameters_twofact.jl b/test/examples/recover_parameters/recover_parameters_twofact.jl
index a3e426cb..a7b4cec9 100644
--- a/test/examples/recover_parameters/recover_parameters_twofact.jl
+++ b/test/examples/recover_parameters/recover_parameters_twofact.jl
@@ -70,7 +70,7 @@ objective!(model_ml, true_val)
 
 optimizer = SemOptimizerOptim(
     BFGS(; linesearch = BackTracking(order = 3), alphaguess = InitialHagerZhang()),# m = 100),
-    Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8),
+    Optim.Options(; f_reltol = 1e-10, x_abstol = 1.5e-8),
 )
 
 solution_ml = fit(optimizer, model_ml)
diff --git a/test/unit_tests/bootstrap.jl b/test/unit_tests/bootstrap.jl
deleted file mode 100644
index a2d5b683..00000000
--- a/test/unit_tests/bootstrap.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-solution_ml = fit(model_ml)
-bs = se_bootstrap(solution_ml; n_boot = 20)
-
-update_se_hessian!(partable, solution_ml)
-update_partable!(partable, solution_ml, bs, :se_boot)
diff --git a/test/unit_tests/partable_dataframe.jl b/test/unit_tests/partable_dataframe.jl
deleted file mode 100644
index 7786e2ec..00000000
--- a/test/unit_tests/partable_dataframe.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-# Convert PramaterTable to DataFrame
-
-# Convert EnsembleParameterTable to DataFrame
diff --git a/test/unit_tests/ridge_id.jl b/test/unit_tests/ridge_id.jl
deleted file mode 100644
index 13366bf0..00000000
--- a/test/unit_tests/ridge_id.jl
+++ /dev/null
@@ -1,8 +0,0 @@
-model_ridge_id = Sem(
-    specification = spec,
-    data = dat,
-    loss = (SemML, SemRidge),
-    α_ridge = 0.001,
-    which_ridge = [:x16, :x17, :x18, :x19, :x20],
-    optimizer = semoptimizer,
-)
diff --git a/test/unit_tests/sorting.jl b/test/unit_tests/sorting.jl
deleted file mode 100644
index 3c61e13c..00000000
--- a/test/unit_tests/sorting.jl
+++ /dev/null
@@ -1,17 +0,0 @@
-############################################################################
-### test variables sorting
-############################################################################
-
-sort_vars!(partable)
-
-model_ml_sorted = Sem(specification = partable, data = dat)
-
-@testset "graph sorting" begin
-    @test model_ml_sorted.implied.I_A isa LowerTriangular
-end
-
-@testset "ml_solution_sorted" begin
-    solution_ml_sorted = fit(model_ml_sorted)
-    update_estimate!(partable, solution_ml_sorted)
-    @test test_estimates(par_ml, partable, 0.01)
-end
diff --git a/test/unit_tests/start_val.jl b/test/unit_tests/start_val.jl
deleted file mode 100644
index 8b137891..00000000
--- a/test/unit_tests/start_val.jl
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/test/unit_tests/unit_tests.jl b/test/unit_tests/unit_tests.jl
index 88309870..7189addd 100644
--- a/test/unit_tests/unit_tests.jl
+++ b/test/unit_tests/unit_tests.jl
@@ -33,11 +33,3 @@ selected_tests = isempty(ARGS) ? collect(keys(available_tests)) : ARGS
         end
     end
 end
-
-@safetestset "SemSpecification" begin
-    include("specification.jl")
-end
-
-@safetestset "Sem model" begin
-    include("model.jl")
-end