@@ -1439,111 +1439,3 @@ end
14391439# r = Ref(tuple(5.0, 2.0, 1.0, 6.0))
14401440# p = Base.unsafe_convert(Ptr{Float64}, r)
14411441# u = unsafe_wrap(Array, p, 4)
1442-
1443- # ------------------------------------------------------------------------------------------------------
1444- """
1445- bv = isoutlier(x; critic=3.0, method::Symbol=:median, threshold=nothing, width=0.0) -> BitVector or BitMatrix
1446-
1447- Return a logical array whose elements are true when an outlier is detected in the corresponding element of `x`.
1448-
1449- If `x` is a matrix, and `width` is not used, then ``isoutlier`` operates on each column of `x` separately.
1450- By default, an outlier is a value that is more than three median absolute deviations (MAD) from the median.
1451- But that can be changed via the `critic` option.
1452-
1453- - `critic`: A number that sets the threshold for detecting outliers. The default is 3.0, which means that
1454- a value is considered an outlier if it is more than 3 MAD from the median (when `method` is `:median`),
1455- or more than 3 standard deviations (when `method` is `:mean`).
1456- - `method`: The method used to calculate the threshold for outliers. It can be one of the following:
1457- - `:median`: Uses the median absolute deviation (MAD) method. Outliers are defined as elements more than
1458- `critic` MAD from the median.
1459- - `:mean`: Uses the mean and standard deviation method. Outliers are defined as elements more than `critic`
1460- standard deviations from the mean. This method is faster but less robust than "median".
1461- - `:quartiles`: Uses the interquartile range (IQR) method. Outliers are defined as elements more than 1.5
1462- interquartile ranges above the upper quartile (75 percent) or below the lower quartile (25 percent).
1463- This method is useful when the data in `x` is not normally distributed.
1464- - `threshold`: Is an alternative to the `method` option. It specifies the percentile thresholds, given as a
1465- two-element array whose elements are in the interval [0, 100]. The first element indicates the lower percentile
1466- threshold, and the second element indicates the upper percentile threshold. It can also be a single number
1467- between (0, 100), which is interpreted as the percentage of end member points that are considered outliers.
1468- For example, `threshold=1` means that the lower and upper thresholds are the 1th and 99th percentiles.
1469- - `width`: If this option is used (only when `x` is a Matrix or a GMTdataset) we detect local outliers using
1470- a moving window method with window length ``width``. This length is given in the same units as the input
1471- data stored in first column of `x`.
1472-
1473- ### Example:
1474- ```julia
1475- x = [57, 59, 60, 100, 59, 58, 57, 58, 300, 61, 62, 60, 62, 58, 57];
1476- findall(isoutlier(x))
1477- 2-element Vector{Int64}:
1478- 4
1479- 9
1480- ```
1481-
1482- ```julia
1483- x = -50.0:50; y = x / 50 .+ 3 .+ 0.25 * rand(length(x));
1484- y[[30,50,60]] = [4,-3,6]; # Add 3 outliers
1485- findall(isoutlier([x y], width=5))
1486- ```
1487- """
1488- function isoutlier (x:: AbstractVector{<:Real} ; critic= 3.0 , method:: Symbol = :median , threshold= nothing )
1489- method in [:median , :mean , :quartiles ] || throw (ArgumentError (" Unknown method: $method . Use :median, :mean or :quartiles" ))
1490- if (threshold != = nothing )
1491- ! (isa (threshold, VecOrMat{<: Real }) || isa (threshold, Real)) &&
1492- throw (ArgumentError (" Threshold: $threshold must be a scalar or a 2 elements array." ))
1493- if (isa (threshold, VecOrMat{<: Real }))
1494- @assert ! (length (threshold) == 2 && threshold[1 ] >= 0.0 && threshold[2 ] <= 100 ) " Threshold must be a 2 elements array in the [0 100] interval"
1495- _thresh = [Float64 (threshold[1 ])/ 100 , Float64 (threshold[2 ])/ 100 ] # Make it [0 1]
1496- else
1497- @assert threshold > 0.0 && threshold < 100 " When threshold is a scalar, it must be in the ]0 100[ interval."
1498- _thresh = [Float64 (threshold)/ 100 , Float64 (100 - threshold)/ 100 ] # Make it [0 1]
1499- end
1500- method = :threshold
1501- end
1502-
1503- bv = BitVector (undef, length (x))
1504- if (method == :median )
1505- _mad, _med = mad (x); _mad *= critic
1506- @inbounds for k in eachindex (x)
1507- bv[k] = abs (x[k] - _med) > _mad
1508- end
1509- elseif (method == :mean )
1510- _mean, _std = nanmean (x), nanstd (x) * critic;
1511- @inbounds for k in eachindex (x)
1512- bv[k] = abs (x[k] - _mean) > _std
1513- end
1514- elseif (method == :threshold )
1515- q_l, q_u = quantile (x, _thresh)
1516- @inbounds for k in eachindex (x)
1517- bv[k] = (x[k] < q_l) || (x[k] > q_u)
1518- end
1519- else # method == :quartiles
1520- q25, q75 = quantile (x, [0.25 , 0.75 ])
1521- crit = (q75 - q25) * 1.5
1522- q_minus, q_plus = q25 - crit, q75 + crit
1523- @inbounds for k in eachindex (x)
1524- bv[k] = (x[k] < q_minus) || (x[k] > q_plus)
1525- end
1526- end
1527- return bv
1528- end
1529-
1530- function isoutlier (mat:: AbstractMatrix{<:Real} ; critic= 3.0 , method:: Symbol = :median , threshold= nothing , width= 0.0 )
1531- width > 0 && return isoutlier (mat2ds (mat), critic= critic, method= method, threshold= threshold, width= width)
1532- bm = BitMatrix (undef, size (mat))
1533- for k = 1 : size (mat,2 )
1534- bm[:,k] .= isoutlier (view (mat, :, k), critic= critic, method= method, threshold= threshold)
1535- end
1536- return bm
1537- end
1538-
1539- function isoutlier (D:: GMTdataset{<:Real,2} ; critic= 3.0 , method:: Symbol = :median , threshold= nothing , width= 0.0 )
1540- (threshold === nothing && width <= 0 ) && error (" Must provide the window length of via the `width` option or use the `threshold` option." )
1541- if (width > 0 )
1542- method in [:median , :mean ] || throw (ArgumentError (" Unknown method: $method . Here use only one of :median or :mean" ))
1543- (method == :mean ) && (method = boxcar) # :boxcar is the name in GMT for 'mean'
1544- Dres = filter1d (D, filter= (type= method, width= width, highpass= true ), E= true )
1545- isoutlier (view (Dres. data, :, 2 ), critic= critic, method= method, threshold= threshold)
1546- else
1547- isoutlier (view (D. data, :, 2 ), critic= critic, method= method, threshold= threshold)
1548- end
1549- end
0 commit comments