@@ -708,6 +708,95 @@ def test_zero_size_temporaries(ctx_factory):
708708 assert out .shape == (0 ,)
709709
710710
711+ def test_c_vector_extensions ():
712+ knl = lp .make_kernel (
713+ "{[i, j1, j2, j3]: 0<=i<10 and 0<=j1,j2,j3<4}" ,
714+ """
715+ <> temp1[j1] = x[i, j1]
716+ <> temp2[j2] = 2*temp1[j2] + 1 {inames=i:j2}
717+ y[i, j3] = temp2[j3]
718+ """ ,
719+ [lp .GlobalArg ("x, y" , shape = lp .auto , dtype = float )],
720+ seq_dependencies = True ,
721+ target = lp .CVectorExtensionsTarget ())
722+
723+ knl = lp .tag_inames (knl , "j2:vec, j1:ilp, j3:ilp" )
724+ knl = lp .tag_array_axes (knl , "temp1,temp2" , "vec" )
725+
726+ print (lp .generate_code_v2 (knl ).device_code ())
727+
728+
729+ def test_omp_simd_tag ():
730+ knl = lp .make_kernel (
731+ "{[i]: 0<=i<16}" ,
732+ """
733+ y[i] = 2 * x[i]
734+ """ )
735+
736+ knl = lp .add_dtypes (knl , {"x" : "float64" })
737+ knl = lp .split_iname (knl , "i" , 4 )
738+ knl = lp .tag_inames (knl , {"i_inner" : lp .OpenMPSIMDTag ()})
739+
740+ code_str = lp .generate_code_v2 (knl ).device_code ()
741+
742+ assert any (line .strip () == "#pragma omp simd"
743+ for line in code_str .split ("\n " ))
744+
745+
746+ def test_vec_tag_with_omp_simd_fallback ():
747+ knl = lp .make_kernel (
748+ "{[i, j1, j2, j3]: 0<=i<10 and 0<=j1,j2,j3<4}" ,
749+ """
750+ <> temp1[j1] = x[i, j1]
751+ <> temp2[j2] = 2*temp1[j2] + 1 {inames=i:j2}
752+ y[i, j3] = temp2[j3]
753+ """ ,
754+ [lp .GlobalArg ("x, y" , shape = lp .auto , dtype = float )],
755+ seq_dependencies = True ,
756+ target = lp .ExecutableCVectorExtensionsTarget ())
757+
758+ knl = lp .tag_inames (knl , {"j1" : lp .VectorizeTag (lp .OpenMPSIMDTag ()),
759+ "j2" : lp .VectorizeTag (lp .OpenMPSIMDTag ()),
760+ "j3" : lp .VectorizeTag (lp .OpenMPSIMDTag ())})
761+ knl = lp .tag_array_axes (knl , "temp1,temp2" , "vec" )
762+
763+ code_str = lp .generate_code_v2 (knl ).device_code ()
764+
765+ assert len ([line
766+ for line in code_str .split ("\n " )
767+ if line .strip () == "#pragma omp simd" ]) == 2
768+
769+ x = np .random .rand (10 , 4 )
770+ _ , (out ,) = knl (x = x )
771+ np .testing .assert_allclose (out , 2 * x + 1 )
772+
773+
774+ def test_vec_extensions_with_multiple_loopy_body_insns ():
775+ knl = lp .make_kernel (
776+ "{[n]: 0<=n<N}" ,
777+ """
778+ for n
779+ ... nop {id=expr_start}
780+ <> tmp = 2.0
781+ dat0[n, 0] = tmp {id=expr_insn}
782+ ... nop {id=statement0}
783+ end
784+ """ ,
785+ seq_dependencies = True ,
786+ target = lp .ExecutableCVectorExtensionsTarget ())
787+
788+ knl = lp .add_dtypes (knl , {"dat0" : "float64" })
789+ knl = lp .split_iname (knl , "n" , 4 , slabs = (1 , 1 ),
790+ inner_iname = "n_batch" )
791+ knl = lp .privatize_temporaries_with_inames (knl , "n_batch" )
792+ knl = lp .tag_array_axes (knl , "tmp" , "vec" )
793+ knl = lp .tag_inames (knl , {
794+ "n_batch" : lp .VectorizeTag (lp .OpenMPSIMDTag ())})
795+
796+ _ , (out ,) = knl (N = 100 )
797+ np .testing .assert_allclose (out , 2 * np .ones ((100 , 1 )))
798+
799+
711800if __name__ == "__main__" :
712801 if len (sys .argv ) > 1 :
713802 exec (sys .argv [1 ])
0 commit comments