@@ -829,6 +829,113 @@ end
829829 x, y
830830end
831831
832+ # ### --------------------------------------------------
833+
834+ # # Issue 511 for alternatives to polynomial composition
835+ # https://ens-lyon.hal.science/ensl-00546102/document
836+ # should be faster, but not using O(nlog(n)) multiplication below.
837+ function ranged_horner (f:: StandardBasisPolynomial , g:: StandardBasisPolynomial , i, l)
838+ out = f[i+ l- 1 ] * one (g)
839+ for j ∈ (l- 2 ): - 1 : 0
840+ out = muladd (out, g, f[i+ j])
841+ end
842+ out
843+ end
844+
845+ # Compute `p(q)`. Seems more performant than
846+ function practical_polynomial_composition (f:: StandardBasisPolynomial , g:: StandardBasisPolynomial )
847+ l = 4
848+ n = degree (f)
849+
850+ kᵢ = ceil (Int, (n + 1 )/ l)
851+ isone (kᵢ) && return f (g)
852+
853+ hij = [ranged_horner (f, g, j* l, l) for j ∈ 0 : (kᵢ- 1 )]
854+
855+ G = g^ l
856+ o = 1
857+ while kᵢ > 1
858+ kᵢ = ceil (Int, kᵢ/ 2 )
859+ isodd (length (hij)) && push! (hij, zero (f))
860+ hij = [hij[2 j+ o] + hij[2 j+ 1 + o]* G for j ∈ 0 : (kᵢ- 1 )]
861+ kᵢ > 1 && (G = G^ 2 )
862+ end
863+
864+ return only (hij)
865+ end
866+
867+ # # issue #519 polynomial multiplication via FFT
868+ # # cf. http://www.cs.toronto.edu/~denisp/csc373/docs/tutorial3-adv-writeup.pdf
869+ # # Compute recursive_fft
870+ # # assumes length(as) = 2^k for some k
871+ # # ωₙ is exp(-2pi*im/n) or Cyclotomics.E(n), the latter slower but non-lossy
872+ function recursive_fft (as, ωₙ = nothing )
873+ n = length (as)
874+ N = 2 ^ ceil (Int, log2 (n))
875+ ω = something (ωₙ, exp (- 2pi * im/ N))
876+ R = typeof (ω * first (as))
877+ ys = Vector {R} (undef, N)
878+ recursive_fft! (ys, as, ω)
879+ ys
880+ end
881+
882+ # # pass in same ωₙ as recursive_fft
883+ function inverse_fft (as, ωₙ= nothing )
884+ n = length (as)
885+ ω = something (ωₙ, exp (- 2pi * im/ n))
886+ recursive_fft (as, conj (ω)) / n
887+ end
888+
889+ # note: can write version for big coefficients, but still allocates a bit
890+ function recursive_fft! (ys, as, ωₙ)
891+
892+ n = length (as)
893+ @assert n == length (ys) == 2 ^ (ceil (Int, log2 (n)))
894+
895+ ω = one (ωₙ) * one (first (as))
896+ isone (n) && (ys[1 ] = ω * as[1 ]; return ys)
897+
898+ o = 1
899+ eidx = o .+ range (0 , n- 2 , step= 2 )
900+ oidx = o .+ range (1 , n- 1 , step= 2 )
901+
902+ n2 = n ÷ 2
903+
904+ ye = recursive_fft! (view (ys, 1 : n2), view (as, eidx), ωₙ^ 2 )
905+ yo = recursive_fft! (view (ys, (n2+ 1 ): n), view (as, oidx), ωₙ^ 2 )
906+
907+ @inbounds for k ∈ o .+ range (0 , n2 - 1 )
908+ yₑ, yₒ = ye[k], yo[k]
909+ ys[k ] = muladd (ω, yₒ, yₑ)
910+ ys[k + n2] = yₑ - ω* yₒ
911+ ω *= ωₙ
912+ end
913+ return ys
914+ end
915+
916+ # This *should* be faster -- (O(nlog(n)), but this version is definitely not so.
917+ # when `ωₙ = Cyclotomics.E` and T,S are integer, this can be exact
918+ function poly_multiplication_fft (p:: P , q:: Q , ωₙ= nothing ) where {T,P<: StandardBasisPolynomial{T} ,
919+ S,Q<: StandardBasisPolynomial{S} }
920+ as, bs = coeffs0 (p), coeffs0 (q)
921+ n = maximum (length, (as, bs))
922+ N = 2 ^ ceil (Int, log2 (n))
923+
924+ as′ = zeros (promote_type (T,S), 2 N)
925+ copy! (view (as′, 1 : length (as)), as)
926+
927+ ω = something (ωₙ, n -> exp (- 2im * pi / n))(2 N)
928+ âs = recursive_fft (as′, ω)
929+
930+ as′ .= 0
931+ copy! (view (as′, 1 : length (bs)), bs)
932+ b̂s = recursive_fft (as′, ω)
933+
934+ âb̂s = âs .* b̂s
935+
936+ PP = promote_type (P,Q)
937+ ⟒ (PP)(inverse_fft (âb̂s, ω))
938+ end
832939
833940
834941# # --------------------------------------------------
0 commit comments