@@ -686,6 +686,95 @@ def test_zero_size_temporaries(ctx_factory):
686686 assert out .shape == (0 ,)
687687
688688
689+ def test_c_vector_extensions ():
690+ knl = lp .make_kernel (
691+ "{[i, j1, j2, j3]: 0<=i<10 and 0<=j1,j2,j3<4}" ,
692+ """
693+ <> temp1[j1] = x[i, j1]
694+ <> temp2[j2] = 2*temp1[j2] + 1 {inames=i:j2}
695+ y[i, j3] = temp2[j3]
696+ """ ,
697+ [lp .GlobalArg ("x, y" , shape = lp .auto , dtype = float )],
698+ seq_dependencies = True ,
699+ target = lp .CVectorExtensionsTarget ())
700+
701+ knl = lp .tag_inames (knl , "j2:vec, j1:ilp, j3:ilp" )
702+ knl = lp .tag_array_axes (knl , "temp1,temp2" , "vec" )
703+
704+ print (lp .generate_code_v2 (knl ).device_code ())
705+
706+
707+ def test_omp_simd_tag ():
708+ knl = lp .make_kernel (
709+ "{[i]: 0<=i<16}" ,
710+ """
711+ y[i] = 2 * x[i]
712+ """ )
713+
714+ knl = lp .add_dtypes (knl , {"x" : "float64" })
715+ knl = lp .split_iname (knl , "i" , 4 )
716+ knl = lp .tag_inames (knl , {"i_inner" : lp .OpenMPSIMDTag ()})
717+
718+ code_str = lp .generate_code_v2 (knl ).device_code ()
719+
720+ assert any (line .strip () == "#pragma omp simd"
721+ for line in code_str .split ("\n " ))
722+
723+
724+ def test_vec_tag_with_omp_simd_fallback ():
725+ knl = lp .make_kernel (
726+ "{[i, j1, j2, j3]: 0<=i<10 and 0<=j1,j2,j3<4}" ,
727+ """
728+ <> temp1[j1] = x[i, j1]
729+ <> temp2[j2] = 2*temp1[j2] + 1 {inames=i:j2}
730+ y[i, j3] = temp2[j3]
731+ """ ,
732+ [lp .GlobalArg ("x, y" , shape = lp .auto , dtype = float )],
733+ seq_dependencies = True ,
734+ target = lp .ExecutableCVectorExtensionsTarget ())
735+
736+ knl = lp .tag_inames (knl , {"j1" : lp .VectorizeTag (lp .OpenMPSIMDTag ()),
737+ "j2" : lp .VectorizeTag (lp .OpenMPSIMDTag ()),
738+ "j3" : lp .VectorizeTag (lp .OpenMPSIMDTag ())})
739+ knl = lp .tag_array_axes (knl , "temp1,temp2" , "vec" )
740+
741+ code_str = lp .generate_code_v2 (knl ).device_code ()
742+
743+ assert len ([line
744+ for line in code_str .split ("\n " )
745+ if line .strip () == "#pragma omp simd" ]) == 2
746+
747+ x = np .random .rand (10 , 4 )
748+ _ , (out ,) = knl (x = x )
749+ np .testing .assert_allclose (out , 2 * x + 1 )
750+
751+
752+ def test_vec_extensions_with_multiple_loopy_body_insns ():
753+ knl = lp .make_kernel (
754+ "{[n]: 0<=n<N}" ,
755+ """
756+ for n
757+ ... nop {id=expr_start}
758+ <> tmp = 2.0
759+ dat0[n, 0] = tmp {id=expr_insn}
760+ ... nop {id=statement0}
761+ end
762+ """ ,
763+ seq_dependencies = True ,
764+ target = lp .ExecutableCVectorExtensionsTarget ())
765+
766+ knl = lp .add_dtypes (knl , {"dat0" : "float64" })
767+ knl = lp .split_iname (knl , "n" , 4 , slabs = (1 , 1 ),
768+ inner_iname = "n_batch" )
769+ knl = lp .privatize_temporaries_with_inames (knl , "n_batch" )
770+ knl = lp .tag_array_axes (knl , "tmp" , "vec" )
771+ knl = lp .tag_inames (knl , {
772+ "n_batch" : lp .VectorizeTag (lp .OpenMPSIMDTag ())})
773+
774+ _ , (out ,) = knl (N = 100 )
775+ np .testing .assert_allclose (out , 2 * np .ones ((100 , 1 )))
776+
777+
689778if __name__ == "__main__" :
690779 if len (sys .argv ) > 1 :
691780 exec (sys .argv [1 ])
0 commit comments