[llvm] 67cfefe - [AMDGPU] Fix WQM failure with single block inactive demote

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Thu May 6 05:03:04 PDT 2021


Author: Carl Ritson
Date: 2021-05-06T21:02:26+09:00
New Revision: 67cfefebbbbb3a5923c47c31293a8f76596de8be

URL: https://github.com/llvm/llvm-project/commit/67cfefebbbbb3a5923c47c31293a8f76596de8be
DIFF: https://github.com/llvm/llvm-project/commit/67cfefebbbbb3a5923c47c31293a8f76596de8be.diff

LOG: [AMDGPU] Fix WQM failure with single block inactive demote

Instruction test for inactive kill/demote needs to be based on
actual opcode not whether instruction would be lowered to demote.

Reviewed By: piotr

Differential Revision: https://reviews.llvm.org/D101966

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 51a019bb9d2a..38548eaf9478 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -923,7 +923,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
     } else {
       // Static: kill does nothing
       MachineInstr *NewTerm = nullptr;
-      if (IsDemote) {
+      if (MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
         LIS->RemoveMachineInstrFromMaps(MI);
       } else {
         assert(MBB.succ_size() == 1);

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
index e88b70fb449a..cff597bf77f3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
@@ -1184,6 +1184,44 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
   ret void
 }
 
+define amdgpu_ps void @static_exact_nop(float %arg0, float %arg1) {
+; SI-LABEL: static_exact_nop:
+; SI:       ; %bb.0: ; %.entry
+; SI-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; SI-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
+; SI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: static_exact_nop:
+; GFX9:       ; %bb.0: ; %.entry
+; GFX9-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; GFX9-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-32-LABEL: static_exact_nop:
+; GFX10-32:       ; %bb.0: ; %.entry
+; GFX10-32-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0, v0
+; GFX10-32-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
+; GFX10-32-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
+; GFX10-32-NEXT:    s_endpgm
+;
+; GFX10-64-LABEL: static_exact_nop:
+; GFX10-64:       ; %bb.0: ; %.entry
+; GFX10-64-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
+; GFX10-64-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; GFX10-64-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
+; GFX10-64-NEXT:    s_endpgm
+.entry:
+  %c0 = fcmp olt float %arg0, 0.000000e+00
+  %c1 = fcmp oge float %arg1, 0.0
+  call void @llvm.amdgcn.wqm.demote(i1 true)
+  %tmp1 = select i1 %c0, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  ret void
+}
+
+
 declare void @llvm.amdgcn.wqm.demote(i1) #0
 declare i1 @llvm.amdgcn.live.mask() #0
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0


        


More information about the llvm-commits mailing list