|
| 1 | +;=========================== begin_copyright_notice ============================ |
| 2 | +; |
| 3 | +; Copyright (C) 2024 Intel Corporation |
| 4 | +; |
| 5 | +; SPDX-License-Identifier: MIT |
| 6 | +; |
| 7 | +;============================ end_copyright_notice ============================= |
| 8 | + |
| 9 | +; RUN: %opt %use_old_pass_manager% -loop-unroll -vc-peel-loops-dpas-null-acc=true -march=genx64 -mcpu=XeHPC -S < %s | FileCheck %s |
| 10 | + |
| 11 | +target datalayout = "e-p:64:64-p3:32:32-p6:32:32-i64:64-n8:16:32:64" |
| 12 | +target triple = "spir64-unknown-unknown" |
| 13 | + |
| 14 | +; Function Attrs: nofree nosync nounwind readnone |
| 15 | +declare <64 x i32> @llvm.genx.rdregioni.v64i32.v256i32.i16(<256 x i32>, i32, i32, i32, i16, i32) #0 |
| 16 | + |
| 17 | +; Function Attrs: nofree nosync nounwind readnone |
| 18 | +declare <128 x i32> @llvm.genx.rdregioni.v128i32.v512i32.i16(<512 x i32>, i32, i32, i32, i16, i32) #0 |
| 19 | + |
| 20 | +; Function Attrs: nofree nosync nounwind readnone |
| 21 | +declare <512 x i32> @llvm.genx.wrregioni.v512i32.v128i32.i16.i1(<512 x i32>, <128 x i32>, i32, i32, i32, i16, i32, i1) #0 |
| 22 | + |
| 23 | +; Function Attrs: nofree nosync nounwind readnone |
| 24 | +declare <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32>, <128 x i32>, <64 x i32>, i32, i32, i32, i32, i32, i32) #0 |
| 25 | + |
| 26 | +; Function Attrs: noinline nounwind |
| 27 | +define dllexport spir_kernel void @kernel(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i8 addrspace(1)* %2) local_unnamed_addr #1 !spirv.ParameterDecorations !8 !intel_reqd_sub_group_size !11 { |
| 28 | + ; CHECK: call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> |
| 29 | + ; CHECK: call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> |
| 30 | + ; CHECK: call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> |
| 31 | + ; CHECK: call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> |
| 32 | + %4 = ptrtoint i8 addrspace(1)* %1 to i64 |
| 33 | + %5 = ptrtoint i8 addrspace(1)* %2 to i64 |
| 34 | + br label %6 |
| 35 | + |
| 36 | +6: ; preds = %6, %3 |
| 37 | + ; CHECK: phi <512 x i32> |
| 38 | + |
| 39 | + ; CHECK: call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> |
| 40 | + ; CHECK: call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> |
| 41 | + ; CHECK: call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> |
| 42 | + ; CHECK: call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> |
| 43 | + |
| 44 | + %indvars.iv159 = phi i64 [ 0, %3 ], [ %indvars.iv.next160, %6 ] |
| 45 | + %indvars.iv = phi i64 [ 0, %3 ], [ %indvars.iv.next, %6 ] |
| 46 | + %.0140155 = phi i32 [ 0, %3 ], [ %23, %6 ] |
| 47 | + %phiacc = phi <512 x i32> [ zeroinitializer, %3 ], [ %dst3, %6 ] |
| 48 | + %7 = shl nsw i64 %indvars.iv159, 2 |
| 49 | + %8 = add i64 %7, %4 |
| 50 | + %9 = inttoptr i64 %8 to <256 x i32> addrspace(1)* |
| 51 | + %10 = load <256 x i32>, <256 x i32> addrspace(1)* %9, align 16 |
| 52 | + %11 = shl nsw i64 %indvars.iv, 2 |
| 53 | + %12 = add i64 %11, %5 |
| 54 | + %13 = inttoptr i64 %12 to <128 x i32> addrspace(1)* |
| 55 | + %14 = load <128 x i32>, <128 x i32> addrspace(1)* %13, align 16 |
| 56 | + %indvars.iv.next160 = add nuw nsw i64 %indvars.iv159, 256 |
| 57 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 128 |
| 58 | + |
| 59 | + %15 = call <64 x i32> @llvm.genx.rdregioni.v64i32.v256i32.i16(<256 x i32> %10, i32 8, i32 8, i32 1, i16 0, i32 8) |
| 60 | + %acc0 = call <128 x i32> @llvm.genx.rdregioni.v128i32.v512i32.i16(<512 x i32> %phiacc, i32 1, i32 1, i32 0, i16 0, i32 0) |
| 61 | + %16 = call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> %acc0, <128 x i32> %14, <64 x i32> %15, i32 8, i32 8, i32 8, i32 8, i32 1, i32 1) |
| 62 | + %dst0 = call <512 x i32> @llvm.genx.wrregioni.v512i32.v128i32.i16.i1(<512 x i32> %phiacc, <128 x i32> %16, i32 1, i32 1, i32 0, i16 0, i32 0, i1 true) |
| 63 | + |
| 64 | + %17 = call <64 x i32> @llvm.genx.rdregioni.v64i32.v256i32.i16(<256 x i32> %10, i32 8, i32 8, i32 1, i16 256, i32 8) |
| 65 | + %acc1 = call <128 x i32> @llvm.genx.rdregioni.v128i32.v512i32.i16(<512 x i32> %dst0, i32 1, i32 1, i32 0, i16 512, i32 0) |
| 66 | + %18 = call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> %acc1, <128 x i32> %14, <64 x i32> %15, i32 8, i32 8, i32 8, i32 8, i32 1, i32 1) |
| 67 | + %dst1 = call <512 x i32> @llvm.genx.wrregioni.v512i32.v128i32.i16.i1(<512 x i32> %dst0, <128 x i32> %18, i32 1, i32 1, i32 0, i16 512, i32 0, i1 true) |
| 68 | + |
| 69 | + %19 = call <64 x i32> @llvm.genx.rdregioni.v64i32.v256i32.i16(<256 x i32> %10, i32 8, i32 8, i32 1, i16 512, i32 8) |
| 70 | + %acc2 = call <128 x i32> @llvm.genx.rdregioni.v128i32.v512i32.i16(<512 x i32> %dst1, i32 1, i32 1, i32 0, i16 1024, i32 0) |
| 71 | + %20 = call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> %acc2, <128 x i32> %14, <64 x i32> %15, i32 8, i32 8, i32 8, i32 8, i32 1, i32 1) |
| 72 | + %dst2 = call <512 x i32> @llvm.genx.wrregioni.v512i32.v128i32.i16.i1(<512 x i32> %dst1, <128 x i32> %20, i32 1, i32 1, i32 0, i16 1024, i32 0, i1 true) |
| 73 | + |
| 74 | + %21 = call <64 x i32> @llvm.genx.rdregioni.v64i32.v256i32.i16(<256 x i32> %10, i32 8, i32 8, i32 1, i16 768, i32 8) |
| 75 | + %acc3 = call <128 x i32> @llvm.genx.rdregioni.v128i32.v512i32.i16(<512 x i32> %dst2, i32 1, i32 1, i32 0, i16 1536, i32 0) |
| 76 | + %22 = call <128 x i32> @llvm.genx.dpas2.v128i32.v128i32.v128i32.v64i32(<128 x i32> %acc3, <128 x i32> %14, <64 x i32> %15, i32 8, i32 8, i32 8, i32 8, i32 1, i32 1) |
| 77 | + %dst3 = call <512 x i32> @llvm.genx.wrregioni.v512i32.v128i32.i16.i1(<512 x i32> %dst2, <128 x i32> %22, i32 1, i32 1, i32 0, i16 1536, i32 0, i1 true) |
| 78 | + |
| 79 | + %23 = add nuw nsw i32 %.0140155, 1 |
| 80 | + %exitcond.not = icmp eq i32 %23, 16 |
| 81 | + br i1 %exitcond.not, label %24, label %6 |
| 82 | + |
| 83 | +24: ; preds = %6 |
| 84 | + %res = phi <512 x i32> [ %dst3, %6 ] |
| 85 | + %25 = ptrtoint i8 addrspace(1)* %0 to i64 |
| 86 | + %26 = bitcast i8 addrspace(1)* %0 to <512 x i32> addrspace(1)* |
| 87 | + store <512 x i32> %res, <512 x i32> addrspace(1)* %26, align 16 |
| 88 | + ret void |
| 89 | +} |
| 90 | + |
| 91 | +attributes #0 = { nofree nosync nounwind readnone "target-cpu"="XeHPC" } |
| 92 | +attributes #1 = { noinline nounwind "CMGenxMain" "oclrt"="1" "target-cpu"="XeHPC" } |
| 93 | + |
| 94 | +!spirv.MemoryModel = !{!0} |
| 95 | +!opencl.enable.FP_CONTRACT = !{} |
| 96 | +!spirv.Source = !{!1} |
| 97 | +!opencl.spir.version = !{!2} |
| 98 | +!opencl.ocl.version = !{!1} |
| 99 | +!opencl.used.extensions = !{!3} |
| 100 | +!opencl.used.optional.core.features = !{!3} |
| 101 | +!spirv.Generator = !{!4} |
| 102 | +!genx.kernels = !{!5} |
| 103 | + |
| 104 | +!0 = !{i32 2, i32 2} |
| 105 | +!1 = !{i32 0, i32 0} |
| 106 | +!2 = !{i32 1, i32 2} |
| 107 | +!3 = !{} |
| 108 | +!4 = !{i16 6, i16 14} |
| 109 | +!5 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @kernel, !"kernel", !6, i32 0, i32 0, !6, !7, i32 0} |
| 110 | +!6 = !{i32 0, i32 0, i32 0} |
| 111 | +!7 = !{!"svmptr_t", !"svmptr_t", !"svmptr_t"} |
| 112 | +!8 = !{!9, !9, !9} |
| 113 | +!9 = !{!10} |
| 114 | +!10 = !{i32 5625, i32 0} |
| 115 | +!11 = !{i32 1} |
0 commit comments