@@ -719,6 +719,95 @@ def test_empty_array_stride_check_fortran(ctx_factory):
719719 knl (queue , input = a_f )
720720
721721
722+ def test_c_vector_extensions ():
723+ knl = lp .make_kernel (
724+ "{[i, j1, j2, j3]: 0<=i<10 and 0<=j1,j2,j3<4}" ,
725+ """
726+ <> temp1[j1] = x[i, j1]
727+ <> temp2[j2] = 2*temp1[j2] + 1 {inames=i:j2}
728+ y[i, j3] = temp2[j3]
729+ """ ,
730+ [lp .GlobalArg ("x, y" , shape = lp .auto , dtype = float )],
731+ seq_dependencies = True ,
732+ target = lp .CVectorExtensionsTarget ())
733+
734+ knl = lp .tag_inames (knl , "j2:vec, j1:ilp, j3:ilp" )
735+ knl = lp .tag_array_axes (knl , "temp1,temp2" , "vec" )
736+
737+ print (lp .generate_code_v2 (knl ).device_code ())
738+
739+
740+ def test_omp_simd_tag ():
741+ knl = lp .make_kernel (
742+ "{[i]: 0<=i<16}" ,
743+ """
744+ y[i] = 2 * x[i]
745+ """ )
746+
747+ knl = lp .add_dtypes (knl , {"x" : "float64" })
748+ knl = lp .split_iname (knl , "i" , 4 )
749+ knl = lp .tag_inames (knl , {"i_inner" : lp .OpenMPSIMDTag ()})
750+
751+ code_str = lp .generate_code_v2 (knl ).device_code ()
752+
753+ assert any (line .strip () == "#pragma omp simd"
754+ for line in code_str .split ("\n " ))
755+
756+
757+ def test_vec_tag_with_omp_simd_fallback ():
758+ knl = lp .make_kernel (
759+ "{[i, j1, j2, j3]: 0<=i<10 and 0<=j1,j2,j3<4}" ,
760+ """
761+ <> temp1[j1] = x[i, j1]
762+ <> temp2[j2] = 2*temp1[j2] + 1 {inames=i:j2}
763+ y[i, j3] = temp2[j3]
764+ """ ,
765+ [lp .GlobalArg ("x, y" , shape = lp .auto , dtype = float )],
766+ seq_dependencies = True ,
767+ target = lp .ExecutableCVectorExtensionsTarget ())
768+
769+ knl = lp .tag_inames (knl , {"j1" : lp .VectorizeTag (lp .OpenMPSIMDTag ()),
770+ "j2" : lp .VectorizeTag (lp .OpenMPSIMDTag ()),
771+ "j3" : lp .VectorizeTag (lp .OpenMPSIMDTag ())})
772+ knl = lp .tag_array_axes (knl , "temp1,temp2" , "vec" )
773+
774+ code_str = lp .generate_code_v2 (knl ).device_code ()
775+
776+ assert len ([line
777+ for line in code_str .split ("\n " )
778+ if line .strip () == "#pragma omp simd" ]) == 2
779+
780+ x = np .random .rand (10 , 4 )
781+ _ , (out ,) = knl (x = x )
782+ np .testing .assert_allclose (out , 2 * x + 1 )
783+
784+
785+ def test_vec_extensions_with_multiple_loopy_body_insns ():
786+ knl = lp .make_kernel (
787+ "{[n]: 0<=n<N}" ,
788+ """
789+ for n
790+ ... nop {id=expr_start}
791+ <> tmp = 2.0
792+ dat0[n, 0] = tmp {id=expr_insn}
793+ ... nop {id=statement0}
794+ end
795+ """ ,
796+ seq_dependencies = True ,
797+ target = lp .ExecutableCVectorExtensionsTarget ())
798+
799+ knl = lp .add_dtypes (knl , {"dat0" : "float64" })
800+ knl = lp .split_iname (knl , "n" , 4 , slabs = (1 , 1 ),
801+ inner_iname = "n_batch" )
802+ knl = lp .privatize_temporaries_with_inames (knl , "n_batch" )
803+ knl = lp .tag_array_axes (knl , "tmp" , "vec" )
804+ knl = lp .tag_inames (knl , {
805+ "n_batch" : lp .VectorizeTag (lp .OpenMPSIMDTag ())})
806+
807+ _ , (out ,) = knl (N = 100 )
808+ np .testing .assert_allclose (out , 2 * np .ones ((100 , 1 )))
809+
810+
722811if __name__ == "__main__" :
723812 if len (sys .argv ) > 1 :
724813 exec (sys .argv [1 ])
0 commit comments