@@ -15,71 +15,130 @@ function __init__()
1515 end
1616end
1717
18+ # # list all functions
19+ # all functions that work for Floats and ComplexFloats
20+ unary_real_complex = (
21+ (:acos , :acos! , :Acos ),
22+ (:asin , :asin! , :Asin ),
23+ (:acosh , :acosh! , :Acosh ),
24+ (:asinh , :asinh! , :Asinh ),
25+ (:sqrt , :sqrt! , :Sqrt ),
26+ (:exp , :exp! , :Exp ),
27+ (:log , :log! , :Ln ),
28+ )
29+
30+ binary_real_complex = (
31+ (:pow , :pow! , :Pow , true ),
32+ (:divide , :divide! , :Div , true ),
33+ )
34+
35+ # all functions that work for Floats only
36+ unary_real = (
37+ (:cbrt , :cbrt! , :Cbrt ),
38+ (:expm1 , :expm1! , :Expm1 ),
39+ (:log1p , :log1p! , :Log1p ),
40+ (:log2 , :log2! , :Log2 ),
41+ (:abs , :abs! , :Abs ),
42+ (:abs2 , :abs2! , :Sqr ),
43+ (:ceil , :ceil! , :Ceil ),
44+ (:floor , :floor! , :Floor ),
45+ (:round , :round! , :Round ),
46+ (:trunc , :trunc! , :Trunc ),
47+ (:cospi , :cospi! , :Cospi ),
48+ (:sinpi , :sinpi! , :Sinpi ),
49+ (:tanpi , :tanpi! , :Tanpi ),
50+ (:acospi , :acospi! , :Acospi ),
51+ (:asinpi , :asinpi! , :Asinpi ),
52+ (:atanpi , :atanpi! , :Atanpi ),
53+ (:cosd , :cosd! , :Cosd ),
54+ (:sind , :sind! , :Sind ),
55+ (:tand , :tand! , :Tand ),
56+ # Enabled only for Real. MKL guarantees higher accuracy, but at a
57+ # substantial performance cost.
58+ (:atan , :atan! , :Atan ),
59+ (:cos , :cos! , :Cos ),
60+ (:sin , :sin! , :Sin ),
61+ (:tan , :tan! , :Tan ),
62+ (:atanh , :atanh! , :Atanh ),
63+ (:cosh , :cosh! , :Cosh ),
64+ (:sinh , :sinh! , :Sinh ),
65+ (:tanh , :tanh! , :Tanh ),
66+ (:log10 , :log10! , :Log10 ),
67+ # now in SpecialFunctions (make smart, maybe?)
68+ (:erf , :erf! , :Erf ),
69+ (:erfc , :erfc! , :Erfc ),
70+ (:erfinv , :erfinv! , :ErfInv ),
71+ (:erfcinv , :erfcinv! , :ErfcInv ),
72+ (:lgamma , :lgamma! , :LGamma ),
73+ (:gamma , :gamma! , :TGamma ),
74+ # Not in Base
75+ (:inv_cbrt , :inv_cbrt! , :InvCbrt ),
76+ (:inv_sqrt , :inv_sqrt! , :InvSqrt ),
77+ (:pow2o3 , :pow2o3! , :Pow2o3 ),
78+ (:pow3o2 , :pow3o2! , :Pow3o2 ),
79+ )
80+
81+ binary_real = (
82+ (:atan , :atan! , :Atan2 , false ),
83+ (:hypot , :hypot! , :Hypot , false ),
84+ # Not in Base
85+ (:atanpi , :atanpi! , :Atan2pi , false ),
86+ )
87+
88+ unary_complex_in = (
89+ (:abs , :abs! , :Abs ),
90+ (:angle , :angle! , :Arg ),
91+ )
92+
93+ unary_complex_inout = (
94+ (:conj , :conj! , :Conj ),
95+ )
96+
97+ # # define functions from previous list for all eligible input types
98+
1899for t in (Float32, Float64, ComplexF32, ComplexF64)
19100 # Unary, real or complex
20- def_unary_op (t, t, :acos , :acos! , :Acos )
21- def_unary_op (t, t, :asin , :asin! , :Asin )
22- def_unary_op (t, t, :acosh , :acosh! , :Acosh )
23- def_unary_op (t, t, :asinh , :asinh! , :Asinh )
24- def_unary_op (t, t, :sqrt , :sqrt! , :Sqrt )
25- def_unary_op (t, t, :exp , :exp! , :Exp )
26- def_unary_op (t, t, :log , :log! , :Ln )
101+ for (f, f!, f_mkl) in unary_real_complex
102+ def_unary_op (t, t, f, f!, f_mkl)
103+ end
27104
28105 # # Binary, real or complex
29- def_binary_op (t, t, :pow , :pow! , :Pow , true )
30- def_binary_op (t, t, :divide , :divide! , :Div , true )
106+ for (f, f!, f_mkl, broadcast) in binary_real_complex
107+ def_binary_op (t, t, f, f!, f_mkl, broadcast)
108+ end
31109end
32110
33111for t in (Float32, Float64)
34- # Unary, real-only
35- def_unary_op (t, t, :cbrt , :cbrt! , :Cbrt )
36- def_unary_op (t, t, :expm1 , :expm1! , :Expm1 )
37- def_unary_op (t, t, :log1p , :log1p! , :Log1p )
38- def_unary_op (t, t, :log2 , :log2! , :Log2 )
39- def_unary_op (t, t, :abs , :abs! , :Abs )
40- def_unary_op (t, t, :abs2 , :abs2! , :Sqr )
41- def_unary_op (t, t, :ceil , :ceil! , :Ceil )
42- def_unary_op (t, t, :floor , :floor! , :Floor )
43- def_unary_op (t, t, :round , :round! , :Round )
44- def_unary_op (t, t, :trunc , :trunc! , :Trunc )
112+ # Unary, real only
113+ for (f, f!, f_mkl) in unary_real
114+ def_unary_op (t, t, f, f!, f_mkl)
115+ end
45116
46- # Enabled only for Real. MKL guarantees higher accuracy, but at a
47- # substantial performance cost.
48- def_unary_op (t, t, :atan , :atan! , :Atan )
49- def_unary_op (t, t, :cos , :cos! , :Cos )
50- def_unary_op (t, t, :sin , :sin! , :Sin )
51- def_unary_op (t, t, :tan , :tan! , :Tan )
52- def_unary_op (t, t, :atanh , :atanh! , :Atanh )
53- def_unary_op (t, t, :cosh , :cosh! , :Cosh )
54- def_unary_op (t, t, :sinh , :sinh! , :Sinh )
55- def_unary_op (t, t, :tanh , :tanh! , :Tanh )
56- def_unary_op (t, t, :log10 , :log10! , :Log10 )
57-
58- # Unary, real-only
59- def_unary_op (t, t, :cospi , :cospi! , :Cospi )
60- def_unary_op (t, t, :sinpi , :sinpi! , :Sinpi )
61- def_unary_op (t, t, :tanpi , :tanpi! , :Tanpi )
62- def_unary_op (t, t, :acospi , :acospi! , :Acospi )
63- def_unary_op (t, t, :asinpi , :asinpi! , :Asinpi )
64- def_unary_op (t, t, :atanpi , :atanpi! , :Atanpi )
65- def_unary_op (t, t, :cosd , :cosd! , :Cosd )
66- def_unary_op (t, t, :sind , :sind! , :Sind )
67- def_unary_op (t, t, :tand , :tand! , :Tand )
117+ for (f, f!, f_mkl, broadcast) in binary_real
118+ def_binary_op (t, t, f, f!, f_mkl, broadcast)
119+ end
120+
121+ # Unary, complex-only
122+ for (f, f!, f_mkl) in unary_complex_inout
123+ def_unary_op (Complex{t}, Complex{t}, f, f!, f_mkl)
124+ end
125+ for (f, f!, f_mkl) in unary_complex_in
126+ def_unary_op (Complex{t}, t, f, f!, f_mkl)
127+ end
128+
129+ # ## cis is special, IntelVectorMath function is based on output
130+ def_unary_op (t, Complex{t}, :cis , :cis! , :CIS ; vmltype= Complex{t})
68131
69132 def_one2two_op (t, t, :sincos , :sincos! , :SinCos )
70133
71- # now in SpecialFunctions (make smart, maybe?)
72- def_unary_op (t, t, :erf , :erf! , :Erf )
73- def_unary_op (t, t, :erfc , :erfc! , :Erfc )
74- def_unary_op (t, t, :erfinv , :erfinv! , :ErfInv )
75- def_unary_op (t, t, :erfcinv , :erfcinv! , :ErfcInv )
76- def_unary_op (t, t, :lgamma , :lgamma! , :LGamma )
77- def_unary_op (t, t, :gamma , :gamma! , :TGamma )
78- # Not in Base
79- def_unary_op (t, t, :inv_cbrt , :inv_cbrt! , :InvCbrt )
80- def_unary_op (t, t, :inv_sqrt , :inv_sqrt! , :InvSqrt )
81- def_unary_op (t, t, :pow2o3 , :pow2o3! , :Pow2o3 )
82- def_unary_op (t, t, :pow3o2 , :pow3o2! , :Pow3o2 )
134+ # Binary, complex-only. These are more accurate but performance is
135+ # either equivalent to Base or slower.
136+ # def_binary_op(Complex{t}, Complex{t}, (:+), :add!, :Add, false)
137+ # def_binary_op(Complex{t}, Complex{t}, (:.+), :add!, :Add, true)
138+ # def_binary_op(Complex{t}, Complex{t}, (:.*), :multiply!, :Mul, true)
139+ # def_binary_op(Complex{t}, Complex{t}, (:-), :subtract!, :Sub, false)
140+ # def_binary_op(Complex{t}, Complex{t}, (:.-), :subtract!, :Sub, true)
141+ # def_binary_op(Complex{t}, Complex{t}, :multiply_conj, :multiply_conj!, :Mul, false)
83142
84143 # # .^ to scalar power
85144 # mklfn = Base.Meta.quot(Symbol("$(vml_prefix(t))Powx"))
@@ -98,28 +157,6 @@ for t in (Float32, Float64)
98157 # out
99158 # end
100159 # end
101-
102- # # Binary, real-only
103- def_binary_op (t, t, :atan , :atan! , :Atan2 , false )
104- def_binary_op (t, t, :atanpi , :atanpi! , :Atan2pi , false )
105- def_binary_op (t, t, :hypot , :hypot! , :Hypot , false )
106-
107- # Unary, complex-only
108- def_unary_op (Complex{t}, Complex{t}, :conj , :conj! , :Conj )
109- def_unary_op (Complex{t}, t, :abs , :abs! , :Abs )
110- def_unary_op (Complex{t}, t, :angle , :angle! , :Arg )
111-
112- # ## cis is special, IntelVectorMath function is based on output
113- def_unary_op (t, Complex{t}, :cis , :cis! , :CIS ; vmltype = Complex{t})
114-
115- # Binary, complex-only. These are more accurate but performance is
116- # either equivalent to Base or slower.
117- # def_binary_op(Complex{t}, Complex{t}, (:+), :add!, :Add, false)
118- # def_binary_op(Complex{t}, Complex{t}, (:.+), :add!, :Add, true)
119- # def_binary_op(Complex{t}, Complex{t}, (:.*), :multiply!, :Mul, true)
120- # def_binary_op(Complex{t}, Complex{t}, (:-), :subtract!, :Sub, false)
121- # def_binary_op(Complex{t}, Complex{t}, (:.-), :subtract!, :Sub, true)
122- # def_binary_op(Complex{t}, Complex{t}, :multiply_conj, :multiply_conj!, :Mul, false)
123160end
124161
125162export VML_LA, VML_HA, VML_EP, vml_set_accuracy, vml_get_accuracy
0 commit comments