[llvm] 034fe0e - [AMDGPU] Added udot2 op_sel test. NFC.
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 9 12:30:04 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-04-09T12:19:42-07:00
New Revision: 034fe0e03d9eb3c4690012fbe095486f049670ca
URL: https://github.com/llvm/llvm-project/commit/034fe0e03d9eb3c4690012fbe095486f049670ca
DIFF: https://github.com/llvm/llvm-project/commit/034fe0e03d9eb3c4690012fbe095486f049670ca.diff
LOG: [AMDGPU] Added udot2 op_sel test. NFC.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
index d1f1b16a612f4..5a413ebc18cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
@@ -3,8 +3,9 @@
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp)
+declare i32 @llvm.amdgcn.workitem.id.x()
-; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_clamp
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_clamp:
; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_udot2_clamp(
@@ -21,7 +22,7 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_no_clamp
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_no_clamp:
; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_udot2_no_clamp(
@@ -37,3 +38,23 @@ entry:
store i32 %r.val, i32 addrspace(1)* %r
ret void
}
+
+; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_op_sel:
+; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}}
+; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}}
+define amdgpu_kernel void @test_llvm_amdgcn_udot2_op_sel(
+ i32 addrspace(1)* %r,
+ <2 x i16> addrspace(1)* %b,
+ i32 %c) {
+entry:
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b, i32 %id
+ %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
+ %b.elt0 = extractelement <2 x i16> %b.val, i32 0
+ %b.elt1 = extractelement <2 x i16> %b.val, i32 1
+ %b0 = insertelement <2 x i16> undef, i16 %b.elt1, i32 0
+ %b1 = insertelement <2 x i16> %b0, i16 %b.elt0, i32 1
+ %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> <i16 1, i16 1>, <2 x i16> %b1, i32 %c, i1 0)
+ store i32 %r.val, i32 addrspace(1)* %r
+ ret void
+}
More information about the llvm-commits
mailing list