@@ -865,10 +865,31 @@ function practical_polynomial_composition(f::StandardBasisPolynomial, g::Standar
865865end
866866
867867# # issue #519 polynomial multiplication via FFT
868+ # #
869+ # # This implements [Cooley-Tukey](https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm) radix-2
868870# # cf. http://www.cs.toronto.edu/~denisp/csc373/docs/tutorial3-adv-writeup.pdf
869- # # Compute recursive_fft
870- # # assumes length(as) = 2^k for some k
871- # # ωₙ is exp(-2pi*im/n) or Cyclotomics.E(n), the latter slower but non-lossy
871+ # #
872+ # # This is **much slower** than that of FFTW.jl (and more restrictive). However it does allow for exact computation
873+ # # using `Cyclotomics.jl`, or with `Mods.jl`.
874+ # # This assumes length(as) = 2^k for some k
875+ # # ωₙ is an nth root of unity, for example `exp(-2pi*im/n)` (also available with `sincos(2pi/n)`) for floating point
876+ # # or Cyclotomics.E(n), the latter much slower but non-lossy.
877+ # #
878+ # # Should implement NTT https://www.nayuki.io/page/number-theoretic-transform-integer-dft to close #519
879+
880+ struct Pad{T} <: AbstractVector{T}
881+ a:: Vector{T}
882+ n:: Int
883+ end
884+ Base. length (a:: Pad ) = a. n
885+ Base. size (a:: Pad ) = (a. n,)
886+ function Base. getindex (a:: Pad , i)
887+ u = length (a. a)
888+ i ≤ u && return a. a[i]
889+ return zero (first (a. a))
890+ end
891+
892+
872893function recursive_fft (as, ωₙ = nothing )
873894 n = length (as)
874895 N = 2 ^ ceil (Int, log2 (n))
@@ -886,7 +907,7 @@ function inverse_fft(as, ωₙ=nothing)
886907 recursive_fft (as, conj (ω)) / n
887908end
888909
889- # note: can write version for big coefficients, but still allocates a bit
910+ # note: could write version for big coefficients, but still allocates a bit
890911function recursive_fft! (ys, as, ωₙ)
891912
892913 n = length (as)
@@ -915,22 +936,19 @@ end
915936
916937# This *should* be faster -- (O(nlog(n)), but this version is definitely not so.
917938# when `ωₙ = Cyclotomics.E` and T,S are integer, this can be exact
939+ # using `FFTW.jl` over `Float64` types is much better and is
940+ # implemented in an extension
918941function poly_multiplication_fft (p:: P , q:: Q , ωₙ= nothing ) where {T,P<: StandardBasisPolynomial{T} ,
919942 S,Q<: StandardBasisPolynomial{S} }
920943 as, bs = coeffs0 (p), coeffs0 (q)
921944 n = maximum (length, (as, bs))
922945 N = 2 ^ ceil (Int, log2 (n))
923-
924- as′ = zeros (promote_type (T,S), 2 N)
925- copy! (view (as′, 1 : length (as)), as)
946+ as′ = Pad (as, 2 N)
947+ bs′ = Pad (bs, 2 N)
926948
927949 ω = something (ωₙ, n -> exp (- 2im * pi / n))(2 N)
928950 âs = recursive_fft (as′, ω)
929-
930- as′ .= 0
931- copy! (view (as′, 1 : length (bs)), bs)
932- b̂s = recursive_fft (as′, ω)
933-
951+ b̂s = recursive_fft (bs′, ω)
934952 âb̂s = âs .* b̂s
935953
936954 PP = promote_type (P,Q)
0 commit comments