[PATCH] D59543: [AMDGPU] Ban i8 min3 promotion.
Neil Henning via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 06:29:36 PDT 2019
sheredom created this revision.
sheredom added reviewers: arsenm, nhaehnle.
sheredom added a project: AMDGPU.
Herald added subscribers: llvm-commits, t-tye, tpr, dstuttard, yaxunl, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
I found this really weird WWM-related case whereby through the WWM transformations our isel lowering was trying to promote 2 min's into a min3 for the i8 type, which our hardware doesn't support.
The new min3_i8.ll test case would previously spew the error:
PromoteIntegerResult #0: t69: i8 = SMIN3 t70, Constant:i8<0>, t68
Before the simple fix to our isel lowering to not do it for i8 MVT's.
Repository:
rL LLVM
https://reviews.llvm.org/D59543
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
test/CodeGen/AMDGPU/min3_i8.ll
Index: test/CodeGen/AMDGPU/min3_i8.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/min3_i8.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+; GCN-LABEL: amdgpu_ps_main:
+define amdgpu_ps void @_amdgpu_ps_main(<2 x i32> %arg, i32 %arg1, i32 inreg %arg2) {
+ %tmp = bitcast <2 x i32> %arg to i64
+ %tmp23 = inttoptr i64 %tmp to [4294967295 x i8] addrspace(4)*
+ %tmp30 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp23, i64 0, i64 0
+ %tmp31 = bitcast i8 addrspace(4)* %tmp30 to <4 x i32> addrspace(4)*
+ %tmp37 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp31, align 16
+ %tmp50 = tail call i32 asm sideeffect "; %1", "=v,0"(i32 %arg1)
+ %tmp51 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp50, i32 127)
+ %tmp60 = bitcast i32 %tmp51 to <4 x i8>
+ %tmp61 = extractelement <4 x i8> %tmp60, i64 0
+ %tmp62 = icmp slt i8 0, %tmp61
+ %tmp63 = select i1 %tmp62, i8 0, i8 %tmp61
+ %tmp65 = bitcast i32 %arg1 to <4 x i8>
+ %tmp66 = extractelement <4 x i8> %tmp65, i64 0
+ %tmp67 = icmp slt i8 %tmp63, %tmp66
+ %tmp68 = select i1 %tmp67, i8 %tmp63, i8 %tmp66
+ %tmp69 = insertelement <4 x i8> undef, i8 %tmp68, i64 0
+ %tmp70 = bitcast <4 x i8> %tmp69 to i32
+ %tmp71 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 127, i32 %tmp70, i32 276, i32 15, i32 14, i1 false)
+ %tmp72 = bitcast i32 %tmp71 to <4 x i8>
+ %tmp73 = extractelement <4 x i8> %tmp72, i64 0
+ %tmp74 = icmp slt i8 %tmp68, %tmp73
+ %tmp75 = select i1 %tmp74, i8 %tmp68, i8 %tmp73
+ %tmp99 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %arg1)
+ br label %bb106
+
+bb106:
+ %tmp29 = icmp eq i32 %arg1, 0
+ br i1 %tmp29, label %._crit_edge, label %bb113
+
+bb113:
+ %tmp115 = tail call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp37, i32 0, i32 0, i1 false, i1 false)
+ %tmp116 = bitcast float %tmp115 to <4 x i8>
+ %tmp117 = extractelement <4 x i8> %tmp116, i32 0
+ %tmp118 = icmp sgt i8 0, %tmp117
+ br label %._crit_edge
+
+._crit_edge:
+
+; GCN: s_endpgm
+ ret void
+}
+
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg)
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1 immarg, i1 immarg)
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32)
+declare i32 @llvm.amdgcn.wwm.i32(i32)
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8133,9 +8133,8 @@
// Only do this if the inner op has one use since this will just increases
// register pressure for no benefit.
-
if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
- !VT.isVector() && VT != MVT::f64 &&
+ !VT.isVector() && VT != MVT::f64 && VT != MVT::i8 &&
((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) {
// max(max(a, b), c) -> max3(a, b, c)
// min(min(a, b), c) -> min3(a, b, c)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59543.191282.patch
Type: text/x-patch
Size: 3110 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190319/c0a240f8/attachment.bin>
More information about the llvm-commits
mailing list