[llvm] 6afc4b0 - [AMDGPU] WQM: Ensure exact mode placement before branches
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 6 02:14:54 PDT 2023
Author: Carl Ritson
Date: 2023-06-06T18:11:35+09:00
New Revision: 6afc4b0629c8dc26236af72688b8c036cf090c32
URL: https://github.com/llvm/llvm-project/commit/6afc4b0629c8dc26236af72688b8c036cf090c32
DIFF: https://github.com/llvm/llvm-project/commit/6afc4b0629c8dc26236af72688b8c036cf090c32.diff
LOG: [AMDGPU] WQM: Ensure exact mode placement before branches
Fix for D151797 where the change accidentally allowed exit to
exact mode between branch instructions.
Reviewed By: dstuttard
Differential Revision: https://reviews.llvm.org/D152228
Added:
Modified:
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index a0500cbd4cd39..c93b2382e2f3c 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1373,6 +1373,10 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
Needs = StateExact | StateWQM | StateStrict;
}
+ // Exact mode exit can occur in terminators, but must be before branches.
+ if (MI.isBranch() && OutNeeds == StateExact)
+ Needs = StateExact;
+
++Next;
} else {
// End of basic block
diff --git a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
index 344c9997860db..059fdca13fae2 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
@@ -36,9 +36,11 @@ body: |
; CHECK-NEXT: S_CMP_EQ_U32 [[COPY1]], 0, implicit-def $scc
; CHECK-NEXT: undef %5.sub0:vreg_64 = V_MUL_F32_e64 0, [[COPY2]].sub0, 0, [[COPY2]].sub1, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: %5.sub1:vreg_64 = V_MUL_F32_e64 0, [[COPY2]].sub0, 0, [[COPY2]].sub1, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+ ; CHECK-NEXT: $exec_lo = S_AND_B32 $exec_lo, [[COPY]], implicit-def $scc
+ ; CHECK-NEXT: $scc = COPY [[COPY3]]
; CHECK-NEXT: [[IMAGE_SAMPLE_V3_V2_gfx10_:%[0-9]+]]:vreg_96 = IMAGE_SAMPLE_V3_V2_gfx10 %5, [[DEF]], [[DEF1]], 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
- ; CHECK-NEXT: $exec_lo = S_AND_B32_term $exec_lo, [[COPY]], implicit-def $scc
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
More information about the llvm-commits
mailing list