[llvm] 6afc4b0 - [AMDGPU] WQM: Ensure exact mode placement before branches

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 6 02:14:54 PDT 2023


Author: Carl Ritson
Date: 2023-06-06T18:11:35+09:00
New Revision: 6afc4b0629c8dc26236af72688b8c036cf090c32

URL: https://github.com/llvm/llvm-project/commit/6afc4b0629c8dc26236af72688b8c036cf090c32
DIFF: https://github.com/llvm/llvm-project/commit/6afc4b0629c8dc26236af72688b8c036cf090c32.diff

LOG: [AMDGPU] WQM: Ensure exact mode placement before branches

Fix for D151797 where the change accidentally allowed exit to
exact mode between branch instructions.

Reviewed By: dstuttard

Differential Revision: https://reviews.llvm.org/D152228

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
    llvm/test/CodeGen/AMDGPU/wqm-terminators.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index a0500cbd4cd39..c93b2382e2f3c 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1373,6 +1373,10 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
         Needs = StateExact | StateWQM | StateStrict;
       }
 
+      // Exact mode exit can occur in terminators, but must be before branches.
+      if (MI.isBranch() && OutNeeds == StateExact)
+        Needs = StateExact;
+
       ++Next;
     } else {
       // End of basic block

diff  --git a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
index 344c9997860db..059fdca13fae2 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
@@ -36,9 +36,11 @@ body: |
   ; CHECK-NEXT:   S_CMP_EQ_U32 [[COPY1]], 0, implicit-def $scc
   ; CHECK-NEXT:   undef %5.sub0:vreg_64 = V_MUL_F32_e64 0, [[COPY2]].sub0, 0, [[COPY2]].sub1, 0, 0, implicit $mode, implicit $exec
   ; CHECK-NEXT:   %5.sub1:vreg_64 = V_MUL_F32_e64 0, [[COPY2]].sub0, 0, [[COPY2]].sub1, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+  ; CHECK-NEXT:   $exec_lo = S_AND_B32 $exec_lo, [[COPY]], implicit-def $scc
+  ; CHECK-NEXT:   $scc = COPY [[COPY3]]
   ; CHECK-NEXT:   [[IMAGE_SAMPLE_V3_V2_gfx10_:%[0-9]+]]:vreg_96 = IMAGE_SAMPLE_V3_V2_gfx10 %5, [[DEF]], [[DEF1]], 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
   ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
-  ; CHECK-NEXT:   $exec_lo = S_AND_B32_term $exec_lo, [[COPY]], implicit-def $scc
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:


        


More information about the llvm-commits mailing list