@@ -458,6 +458,16 @@ unittest
458458 assert (generateShuffles2! (4 , 4 ) == [[0 , 1 , 2 , 3 ], [4 , 5 , 6 , 7 ]]);
459459}
460460
461+ unittest
462+ {
463+ enum ai = [0 , 1 , 2 , 3 ];
464+ enum bi = [4 , 5 , 6 , 7 ];
465+ align (32 )
466+ double [4 ] a = [0 , 1 , 2 , 3 ], b = [4 , 5 , 6 , 7 ], c, d;
467+ shuffle3! 1 (a, b, c, d);
468+ assert ([c, d] == [[0.0 , 4 , 1 , 5 ], [2.0 , 6 , 3 , 7 ]]);
469+ }
470+
461471unittest
462472{
463473 enum ai = [0 , 1 , 2 , 3 ];
@@ -490,78 +500,78 @@ import mir.internal.utility;
490500
491501auto vectorize (Kernel, F, size_t N, size_t R)(ref Kernel kernel, ref F[N] a0, ref F[N] b0, ref F[N] a1, ref F[N] b1, ref F[N][R] c)
492502{
493- static if (LDC && F.mant_dig != 64 )
494- {
495- alias V = __vector (F[N]); // @FUTURE@ vector support
496- * cast (V[R]* ) c.ptr = kernel(
497- * cast (V* )a0.ptr, * cast (V* )b0.ptr,
498- * cast (V* )a1.ptr, * cast (V* )b1.ptr);
499- }
500- else
501- static if (F.sizeof <= double .sizeof && F[N].sizeof >= (double [2 ]).sizeof)
502- {
503- import mir.utility;
504- enum S = _avx ? 32u : 16u ;
505- enum M = min(S, F[N].sizeof) / F.sizeof;
506- alias V = __vector (F[M]); // @FUTURE@ vector support
507- enum C = N / M;
508- foreach (i; Iota! C)
509- {
510- auto r = kernel(
511- * cast (V* )(a0.ptr + i * M), * cast (V* )(b0.ptr + i * M),
512- * cast (V* )(a1.ptr + i * M), * cast (V* )(b1.ptr + i * M),
513- );
514- static if (R == 1 )
515- * cast (V* )(c[0 ].ptr + i * M) = r;
516- else
517- foreach (j; Iota! R)
518- * cast (V* )(c[j].ptr + i * M) = r[j];
519- }
520- }
521- else
522- {
503+ // static if (LDC && F.mant_dig != 64)
504+ // {
505+ // alias V = __vector(F[N]); // @FUTURE@ vector support
506+ // *cast(V[R]*) c.ptr = kernel(
507+ // *cast(V*)a0.ptr, *cast(V*)b0.ptr,
508+ // *cast(V*)a1.ptr, *cast(V*)b1.ptr);
509+ // }
510+ // else
511+ // static if (F.sizeof <= double.sizeof && F[N].sizeof >= (double[2]).sizeof)
512+ // {
513+ // import mir.utility;
514+ // enum S = _avx ? 32u : 16u;
515+ // enum M = min(S, F[N].sizeof) / F.sizeof;
516+ // alias V = __vector(F[M]); // @FUTURE@ vector support
517+ // enum C = N / M;
518+ // foreach(i; Iota!C)
519+ // {
520+ // auto r = kernel(
521+ // *cast(V*)(a0.ptr + i * M), *cast(V*)(b0.ptr + i * M),
522+ // *cast(V*)(a1.ptr + i * M), *cast(V*)(b1.ptr + i * M),
523+ // );
524+ // static if (R == 1)
525+ // *cast(V*)(c[0].ptr + i * M) = r;
526+ // else
527+ // foreach(j; Iota!R)
528+ // *cast(V*)(c[j].ptr + i * M) = r[j];
529+ // }
530+ // }
531+ // else
532+ // {
523533 foreach (i; Iota! N)
524534 {
525535 auto r = kernel(a0[i], b0[i], a1[i], b1[i]);
526536 static if (R == 1 )
527- return c[0 ] = r;
537+ c[0 ][i ] = r;
528538 else
529539 foreach (j; Iota! R)
530540 c[j][i] = r[j];
531541 }
532- }
542+ // }
533543}
534544
535545auto vectorize (Kernel, F, size_t N, size_t R)(ref Kernel kernel, ref F[N] a, ref F[N] b, ref F[N][R] c)
536546{
537- static if (LDC && F.mant_dig != 64 && is (__vector (F[N])))
538- {
539- alias V = __vector (F[N]); // @FUTURE@ vector support
540- * cast (V[R]* ) c.ptr = kernel(* cast (V* )a.ptr, * cast (V* )b.ptr);
541- }
542- else
543- static if (F.sizeof <= double .sizeof && F[N].sizeof >= (double [2 ]).sizeof && x86_64)
544- {
545- import mir.utility;
546- enum S = _avx ? 32u : 16u ;
547- enum M = min(S, F[N].sizeof) / F.sizeof;
548- alias V = __vector (F[M]); // @FUTURE@ vector support
549- enum C = N / M;
550- foreach (i; Iota! C)
551- {
552- auto r = kernel(
553- * cast (V* )(a.ptr + i * M),
554- * cast (V* )(b.ptr + i * M),
555- );
556- static if (R == 1 )
557- * cast (V* )(c[0 ].ptr + i * M) = r;
558- else
559- foreach (j; Iota! R)
560- * cast (V* )(c[j].ptr + i * M) = r[j];
561- }
562- }
563- else
564- {
547+ // static if (LDC && F.mant_dig != 64 && is(__vector(F[N])))
548+ // {
549+ // alias V = __vector(F[N]); // @FUTURE@ vector support
550+ // *cast(V[R]*) c.ptr = kernel(*cast(V*)a.ptr, *cast(V*)b.ptr);
551+ // }
552+ // else
553+ // static if (F.sizeof <= double.sizeof && F[N].sizeof >= (double[2]).sizeof && x86_64)
554+ // {
555+ // import mir.utility;
556+ // enum S = _avx ? 32u : 16u;
557+ // enum M = min(S, F[N].sizeof) / F.sizeof;
558+ // alias V = __vector(F[M]); // @FUTURE@ vector support
559+ // enum C = N / M;
560+ // foreach(i; Iota!C)
561+ // {
562+ // auto r = kernel(
563+ // *cast(V*)(a.ptr + i * M),
564+ // *cast(V*)(b.ptr + i * M),
565+ // );
566+ // static if (R == 1)
567+ // *cast(V*)(c[0].ptr + i * M) = r;
568+ // else
569+ // foreach(j; Iota!R)
570+ // *cast(V*)(c[j].ptr + i * M) = r[j];
571+ // }
572+ // }
573+ // else
574+ // {
565575 F[N][R] _c = void ;// Temporary array in case "c" overlaps "a" and/or "b".
566576 foreach (i; Iota! N)
567577 {
@@ -573,7 +583,7 @@ auto vectorize(Kernel, F, size_t N, size_t R)(ref Kernel kernel, ref F[N] a, ref
573583 _c[j][i] = r[j];
574584 }
575585 c = _c;
576- }
586+ // }
577587}
578588
579589// version(unittest)
0 commit comments