Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
6f554ef
Added erf(x) Float64/Float32 Julia implementation
AhmedYKadah Mar 31, 2025
7f4fd2d
changed erf to _erf, got rid of unnecessary branch
AhmedYKadah Mar 31, 2025
e784c9f
fixed syntax error in ccall
AhmedYKadah Mar 31, 2025
da16cb1
fixed syntax error in ccall 2
AhmedYKadah Mar 31, 2025
0a755b6
NaN edge case for erf(x)
AhmedYKadah Mar 31, 2025
6efcec8
added test cases for erf(x)
AhmedYKadah Mar 31, 2025
0fc6d4d
Merge branch 'master' into erf(x)-implementation
AhmedYKadah Aug 2, 2025
3cee8ce
cleaned up erf(Float64)
AhmedYKadah Aug 2, 2025
b819c58
added erf(x::Float32) implementation
AhmedYKadah Sep 13, 2025
57bbaf2
added NaN edge case to erf(x::Float32)
AhmedYKadah Sep 13, 2025
5ad8278
Merge branch 'master' into erf(x)-implementation
AhmedYKadah Sep 13, 2025
26b3b1f
reversed NaN check
AhmedYKadah Sep 13, 2025
693f788
cleaned up float32 polynomials, added accuracy of float32, removed un…
AhmedYKadah Sep 14, 2025
642c1de
Explicit NaN case for erf(Float64)
AhmedYKadah Sep 17, 2025
2911312
Added NaN case to erf(Float32)
AhmedYKadah Sep 17, 2025
bba5f9b
Removed unneccessary indent
AhmedYKadah Sep 17, 2025
c2a37a1
Fixed wrong NaN type
AhmedYKadah Sep 17, 2025
1d45644
added test for NaN for erf
AhmedYKadah Sep 19, 2025
6fd39f1
Merge branch 'master' into erf(x)-implementation
AhmedYKadah Nov 14, 2025
57877e0
added erfc tests
AhmedYKadah Nov 14, 2025
57bd581
added Inf erfc test
AhmedYKadah Nov 14, 2025
a0a5379
Changed interval edges to 16 bit versions for Float64
AhmedYKadah Nov 14, 2025
a9079c8
missed condition
AhmedYKadah Nov 14, 2025
462a3cf
docstring fix
AhmedYKadah Nov 14, 2025
44ae4eb
updated version
AhmedYKadah Nov 15, 2025
f7470ba
Release 2.7.0
AhmedYKadah Nov 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "SpecialFunctions"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "2.6.1"
version = "2.7.0"

[deps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Expand Down
165 changes: 160 additions & 5 deletions src/erf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ for f in (:erf, :erfc)
@eval begin
$f(x::Number) = $internalf(float(x))

$internalf(x::Float64) = ccall(($libopenlibmf, libopenlibm), Float64, (Float64,), x)
$internalf(x::Float32) = ccall(($libopenlibmf0, libopenlibm), Float32, (Float32,), x)
$internalf(x::Float16) = Float16($internalf(Float32(x)))

$internalf(z::Complex{Float64}) = Complex{Float64}(ccall(($openspecfunf, libopenspecfun), Complex{Float64}, (Complex{Float64}, Float64), z, zero(Float64)))
$internalf(z::Complex{Float32}) = Complex{Float32}(ccall(($openspecfunf, libopenspecfun), Complex{Float64}, (Complex{Float64}, Float64), Complex{Float64}(z), Float64(eps(Float32))))
Expand All @@ -28,6 +25,10 @@ for f in (:erf, :erfc)
end
end

_erfc(x::Float64) = ccall((:erfc, libopenlibm), Float64, (Float64,), x)
_erfc(x::Float32) = ccall((:erfcf, libopenlibm), Float32, (Float32,), x)
_erfc(x::Float16) = Float16(_erfc(Float32(x)))

for f in (:erfcx, :erfi, :dawson, :faddeeva)
internalf = Symbol(:_, f)
openspecfunfsym = Symbol(:Faddeeva_, f === :dawson ? :Dawson : f === :faddeeva ? :w : f)
Expand Down Expand Up @@ -96,11 +97,165 @@ See also:
[`erfinv(x)`](@ref erfinv), [`erfcinv(x)`](@ref erfcinv).

# Implementation by
- `Float32`/`Float64`: C standard math library
[libm](https://en.wikipedia.org/wiki/C_mathematical_functions#libm).
- `Float32`: polynomial approximations of erf
- `Float64`: Julia implementation of https://github.com/ARM-software/optimized-routines/blob/master/math/erf.c
- `BigFloat`: C library for multiple-precision floating-point [MPFR](https://www.mpfr.org/)
"""
function erf end

# Fast erf implementation using a mix of
# rational and polynomial approximations.
# Highest measured error is 0.80 ULPs at 0.827964.
function _erf(x::Float64)

# # top 16 bits
ix::UInt16=reinterpret(UInt64,x)>>48
# # top 16, without sign bit
ia::UInt16=ix & 0x7fff

if (ia < 0x3feb)
# a = |x| < 0.84375.

x2 = x * x

if (ia < 0x3fe0)
## a < 0.5 - Use polynomial approximation.

# Minimax approximation of erf of the form x*P(x^2) approximately on the interval [0;0.5]
PA=(0.12837916709551256, -0.3761263890318287, 0.11283791670896592, -0.026866170630075903, 0.005223977428649761, -0.0008548312229989974, 0.00012054654502151287, -1.4906315067498891e-5, 1.6126444349070824e-6, -1.3074259056679966e-7)

r= fma(x,evalpoly(x2,PA),x) ## This fma is crucial for accuracy.
return r
else
## 0.5 <= a < 0.84375 - Use rational approximation.

# Rational approximation on [0x1p-28, 0.84375]
NA=(0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16)
DA=(1,0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18)

P=evalpoly(x2,NA)
Q=evalpoly(x2,DA)

return fma(P / Q, x, x)
end
elseif (ia < 0x3ff4)
## 0.84375 <= |x| < 1.25.

# Rational approximation on [0.84375, 1.25]
NB=( -0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2, 0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5, -0x1.1bf380a96073fp-9 )
DB=( 1, 0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4, 0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7 )

C = 0x1.b0ac16p-1

a = abs(x) - 1.0

P=evalpoly(a,NB)
Q=evalpoly(a,DB)

r= C + P / Q
return copysign(r,x)

elseif (ia < 0x4000)
## 1.25 <= |x| < 2.0.
a = abs(x)
a = a - 1.25

# erfc polynomial approximation
# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25
PC=( 0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2, -0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5, -0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8, -0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12, 0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16, -0x1.578c9e375d37p-19)

# Obtains erfc of |x|
r=1.0-evalpoly(a,PC)
return copysign(r,x)

elseif (ia < 0x400a)
## 2 <= |x| < 3.25.
a = abs(x)
a = fma(0.5, a, -1.0)

# Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2
PD=( 0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3, -0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2, -0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2, 0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3, -0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3, 0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10 )

# Obtains erfc of |x|
r=1.0-evalpoly(a,PD)
return copysign(r,x)

elseif (ia < 0x4012)
## 3.25 <= |x| < 4.5.
a = abs(x)
a = a - 3.25

# Generated using Sollya::remez(erfc(x+a),15,[0;b-a],1,1e-32) with a=3.25 b=4.5
PE=(4.302779463707753e-6, -2.9189025396754986e-5, 9.486433337913454e-5, -0.00019580973528519273, 0.00028656966098267845, -0.0003137999213284758, 0.00026354316698539023, -0.00017004596013480086, 8.179009099315484e-5, -2.6177001557562883e-5, 2.6667235974837568e-6, 2.5840073872012914e-6, -1.7970865794807476e-6, 6.040008157462737e-7, -1.1384065360475506e-7, 9.657905491660664e-9)

r=1.0-evalpoly(a,PE)
return copysign(r,x)

elseif (ia < 0x4018)
## 4.5 <= |x| < 6.0.
a = abs(x)
a = a - 4.5

# Generated using Sollya::remez(erfc(x+a),14,[0;b-a],1,1e-32) with a=4.5 b=6.0
PF=(1.966160232770269e-10, -1.8112993751963606e-9, 8.150538393058677e-9, -2.3841928179141717e-8, 5.086831026559613e-8, -8.405611838790292e-8, 1.1114551347025387e-7, -1.1927287548359634e-7, 1.0374501165951575e-7, -7.205498311583407e-8, 3.885921643476988e-8, -1.558946100318052e-8, 4.352049759187871e-9, -7.505506222192582e-10, 5.996753601814976e-11)

r=1.0-evalpoly(a,PF)
return copysign(r,x)
elseif isnan(x)
return NaN
else
return copysign(1.0,x)
end
end

# Fast erf implementation using
# polynomial approximations of erf and erfc.
# Highest measured error is 1.12 ULPs at x = 1.232469
function _erf(x::Float32)
xabs=abs(x)

if (xabs< 0.5)
# range [0;0.5]
# # erf approximation using erf(x)=x+x*P(x^2) with degree 6
# Sollya::remez(erf(sqrt(x))/sqrt(x)-1,6,[1e-32;0.5],1,1e-32);

p=(0.12837917f0, -0.37612638f0, 0.11283784f0, -0.026865287f0, 0.005218856f0, -0.0008394848f0)
return copysign(fma(evalpoly(x^2,p),x,x),x)

elseif(xabs<1.25)
# range [0.5;1.25]
# # erfc approximation with degree 11
# Sollya::remez(erfc(x+0.5),11,[0;1.25-0.5],1,1e-32);

p=(0.47950011f0, -0.8787826f0, 0.4393913f0, 0.14646415f0, -0.18308467f0, -0.007286422f0, 0.04987047f0, -0.0048868246f0, -0.011067663f0, 0.003422347f0, 0.00073027064f0, -0.0003758171f0)
return copysign(1f0-evalpoly(xabs-0.5f0,p),x)

elseif(xabs<2.5)
# range [1.25;2.5]
# erfc approximation with degree 13
# Sollya::remez(erfc(x+1.25),13,[1.25-1.25;2.5-1.25],1,1e-32);

p=(0.077099875f0, -0.23652112f0, 0.2956514f0, -0.16753574f0, 0.006158593f0, 0.04718712f0, -0.021331023f0, -0.0035262543f0, 0.005461831f0, -0.00047858673f0, -0.0012763853f0, 0.00073386944f0, -0.00017831658f0, 1.7451624f-5)
return copysign(1f0-evalpoly(xabs-1.25f0,p),x)

elseif (xabs<4.0)
# range [2.5;4.0]
# erfc approximation with degree 13
# Sollya::remez(erfc(x+2.5),13,[0;4-2.5],1,1e-32);

p=(0.00040695202f0, -0.002178284f0, 0.0054457085f0, -0.008350053f0, 0.008622011f0, -0.006115167f0, 0.0027899458f0, -0.000519395f0, -0.00030461047f0, 0.00031068458f0, -0.00013866898f0, 3.6909692f-5, -5.682889f-6, 3.929763f-7)
return copysign(1f0-evalpoly(xabs-2.5f0,p),x)

elseif isnan(x)
return NaN32
else
# range [4.0,inf)
return copysign(1f0, x)
end
end

_erf(x::Float16)=Float16(_erf(Float32(x)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you wanted to do a Float16 impl, it should be easier than the others. Specifically, the domain is only to 2, and the accuracy required is much reduced.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100% could wait for a followup PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking that too to be honest.
this and the poli regen.


"""
erf(x, y)

Expand Down
39 changes: 38 additions & 1 deletion test/erf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,47 @@
@testset "real argument" begin
for T in (Float16, Float32, Float64)
@test @inferred(erf(T(1))) isa T
@test erf(T(0.25)) ≈ T(0.27632639016823696) rtol=2*eps(T)
@test erf(T(0.75)) ≈ T(0.7111556336535151) rtol=2*eps(T)
@test erf(T(1)) ≈ T(0.84270079294971486934) rtol=2*eps(T)
@test erf(T(1.5)) ≈ T(0.9661051464753108) rtol=2*eps(T)
@test erf(T(2.5)) ≈ T(0.9995930479825551) rtol=2*eps(T)
@test erf(T(3.5)) ≈ T(0.9999992569016276) rtol=2*eps(T)
@test erf(T(4.5)) ≈ T(0.9999999998033839) rtol=2*eps(T)
@test erf(T(6)) ≈ T(1.0) rtol=2*eps(T)

@test erf(T(-0.25)) ≈ T(-0.27632639016823696) rtol=2*eps(T)
@test erf(T(-0.75)) ≈ T(-0.7111556336535151) rtol=2*eps(T)
@test erf(T(-1)) ≈ T(-0.84270079294971486934) rtol=2*eps(T)
@test erf(T(-1.5)) ≈ T(-0.9661051464753108) rtol=2*eps(T)
@test erf(T(-2.5)) ≈ T(-0.9995930479825551) rtol=2*eps(T)
@test erf(T(-3.5)) ≈ T(-0.9999992569016276) rtol=2*eps(T)
@test erf(T(-4.5)) ≈ T(-0.9999999998033839) rtol=2*eps(T)
@test erf(T(-6)) ≈ T(-1.0) rtol=2*eps(T)

@test isnan(erf(T(NaN)))

@test @inferred(erfc(T(1))) isa T
@test erfc(T(1)) ≈ T(0.15729920705028513066) rtol=2*eps(T)
@test erfc(T(0.25)) ≈ T(0.7236736098317631) rtol=2*eps(T)
@test erfc(T(0.75)) ≈ T(0.28884436634648486) rtol=2*eps(T)
@test erfc(T(1.0)) ≈ T(0.15729920705028513) rtol=2*eps(T)
@test erfc(T(2.0)) ≈ T(0.004677734981047266) rtol=2*eps(T)
@test erfc(T(3.0)) ≈ T(2.209049699858544e-5) rtol=2*eps(T)
@test erfc(T(4.0)) ≈ T(1.541725790028002e-8) rtol=2*eps(T)
@test erfc(T(5.0)) ≈ T(1.537459794428035e-12) rtol=2*eps(T)
@test erfc(T(6.0)) ≈ T(2.1519736712498913e-17) rtol=2*eps(T)
@test erfc(T(Inf)) ≈ T(0.0) rtol=2*eps(T)

@test erfc(T(-0.25)) ≈ T(1.276326390168237) rtol=2*eps(T)
@test erfc(T(-0.75)) ≈ T(1.7111556336535152) rtol=2*eps(T)
@test erfc(T(-1.0)) ≈ T(1.8427007929497148) rtol=2*eps(T)
@test erfc(T(-1.5)) ≈ T(1.9661051464753108) rtol=2*eps(T)
@test erfc(T(-2.5)) ≈ T(1.999593047982555) rtol=2*eps(T)
@test erfc(T(-3.5)) ≈ T(1.9999992569016276) rtol=2*eps(T)
@test erfc(T(-4.5)) ≈ T(1.999999999803384) rtol=2*eps(T)
@test erfc(T(-6.0)) ≈ T(2) rtol=2*eps(T)

@test isnan(erfc(T(NaN)))

@test @inferred(erfcx(T(1))) isa T
@test erfcx(T(1)) ≈ T(0.42758357615580700442) rtol=2*eps(T)
Expand Down