[llvm] Revert "AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis" (PR #78468)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 17 08:37:52 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Petar Avramovic (petar-avramovic)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->76145
---
Patch is 127.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78468.diff
17 Files Affected:
- (modified) llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h (-19)
- (modified) llvm/lib/CodeGen/MachineUniformityAnalysis.cpp (+19)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp (+1-151)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (-8)
- (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp (+5-3)
- (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.h (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir (+25-45)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir (+85-249)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir (+65-221)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir (+31-65)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll (-1)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
index 1039ac4e5189b3..e6da099751e7ae 100644
--- a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
@@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo(
MachineFunction &F, const MachineCycleInfo &cycleInfo,
const MachineDomTree &domTree, bool HasBranchDivergence);
-/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
-class MachineUniformityAnalysisPass : public MachineFunctionPass {
- MachineUniformityInfo UI;
-
-public:
- static char ID;
-
- MachineUniformityAnalysisPass();
-
- MachineUniformityInfo &getUniformityInfo() { return UI; }
- const MachineUniformityInfo &getUniformityInfo() const { return UI; }
-
- bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
- // TODO: verify analysis
-};
-
} // namespace llvm
#endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 131138e0649e4c..3e0fe2b1ba087f 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
namespace {
+/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
+class MachineUniformityAnalysisPass : public MachineFunctionPass {
+ MachineUniformityInfo UI;
+
+public:
+ static char ID;
+
+ MachineUniformityAnalysisPass();
+
+ MachineUniformityInfo &getUniformityInfo() { return UI; }
+ const MachineUniformityInfo &getUniformityInfo() const { return UI; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+ // TODO: verify analysis
+};
+
class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
public:
static char ID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index 9ac74ef55b275f..4cd8b1ec1051f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -16,10 +16,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "SILowerI1Copies.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineUniformityAnalysis.h"
-#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
@@ -45,152 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineUniformityAnalysisPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-class DivergenceLoweringHelper : public PhiLoweringHelper {
-public:
- DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
- MachinePostDominatorTree *PDT,
- MachineUniformityInfo *MUI);
-
-private:
- MachineUniformityInfo *MUI = nullptr;
-
-public:
- void markAsLaneMask(Register DstReg) const override;
- void getCandidatesForLowering(
- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
- void collectIncomingValuesFromPhi(
- const MachineInstr *MI,
- SmallVectorImpl<Incoming> &Incomings) const override;
- void replaceDstReg(Register NewReg, Register OldReg,
- MachineBasicBlock *MBB) override;
- void buildMergeLaneMasks(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, const DebugLoc &DL,
- Register DstReg, Register PrevReg,
- Register CurReg) override;
- void constrainAsLaneMask(Incoming &In) override;
-};
-
-DivergenceLoweringHelper::DivergenceLoweringHelper(
- MachineFunction *MF, MachineDominatorTree *DT,
- MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
- : PhiLoweringHelper(MF, DT, PDT), MUI(MUI) {}
-
-// _(s1) -> SReg_32/64(s1)
-void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
- assert(MRI->getType(DstReg) == LLT::scalar(1));
-
- if (MRI->getRegClassOrNull(DstReg)) {
- MRI->constrainRegClass(DstReg, ST->getBoolRC());
- return;
- }
-
- MRI->setRegClass(DstReg, ST->getBoolRC());
-}
-
-void DivergenceLoweringHelper::getCandidatesForLowering(
- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
- LLT S1 = LLT::scalar(1);
-
- // Add divergent i1 phis to the list
- for (MachineBasicBlock &MBB : *MF) {
- for (MachineInstr &MI : MBB.phis()) {
- Register Dst = MI.getOperand(0).getReg();
- if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
- Vreg1Phis.push_back(&MI);
- }
- }
-}
-
-void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
- const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
- for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
- Incomings.emplace_back(MI->getOperand(i).getReg(),
- MI->getOperand(i + 1).getMBB(), Register());
- }
-}
-
-void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
- MachineBasicBlock *MBB) {
- BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
- .addReg(NewReg);
-}
-
-// Get pointers to build instruction just after MI (skips phis if needed)
-static std::pair<MachineBasicBlock *, MachineBasicBlock::iterator>
-getInsertAfterPtrs(MachineInstr *MI) {
- MachineBasicBlock *InsertMBB = MI->getParent();
- return {InsertMBB,
- InsertMBB->SkipPHIsAndLabels(std::next(MI->getIterator()))};
-}
-
-// bb.previous
-// %PrevReg = ...
-//
-// bb.current
-// %CurReg = ...
-//
-// %DstReg - not defined
-//
-// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
-//
-// bb.previous
-// %PrevReg = ...
-// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
-//
-// bb.current
-// %CurReg = ...
-// %CurRegCopy:sreg_32(s1) = COPY %CurReg
-// ...
-// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
-// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
-// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
-//
-// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
-void DivergenceLoweringHelper::buildMergeLaneMasks(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
- Register DstReg, Register PrevReg, Register CurReg) {
- // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
- // TODO: check if inputs are constants or results of a compare.
-
- Register PrevRegCopy = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- auto [PrevMBB, AfterPrevReg] = getInsertAfterPtrs(MRI->getVRegDef(PrevReg));
- BuildMI(*PrevMBB, AfterPrevReg, DL, TII->get(AMDGPU::COPY), PrevRegCopy)
- .addReg(PrevReg);
- Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
- .addReg(PrevRegCopy)
- .addReg(ExecReg);
-
- Register CurRegCopy = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- auto [CurMBB, AfterCurReg] = getInsertAfterPtrs(MRI->getVRegDef(CurReg));
- BuildMI(*CurMBB, AfterCurReg, DL, TII->get(AMDGPU::COPY), CurRegCopy)
- .addReg(CurReg);
- Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
- .addReg(ExecReg)
- .addReg(CurRegCopy);
-
- BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
- .addReg(PrevMaskedReg)
- .addReg(CurMaskedReg);
-}
-
-void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
-
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
"AMDGPU GlobalISel divergence lowering", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
"AMDGPU GlobalISel divergence lowering", false, false)
@@ -205,14 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
MachineFunction &MF) {
- MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
- MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
- MachineUniformityInfo &MUI =
- getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
-
- DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
-
- bool Changed = false;
- Changed |= Helper.lowerPhis();
- return Changed;
+ return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5c37f163e37075..1d31c6b8fde93a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -210,14 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI->getType(DefReg);
- // Lane mask PHIs, PHI where all register operands have sgpr register class
- // with S1 LLT, are already selected in divergence lowering pass.
- if (I.getOpcode() == AMDGPU::PHI) {
- assert(MRI->getType(DefReg) == LLT::scalar(1));
- assert(TRI.isSGPRClass(MRI->getRegClass(DefReg)));
- return true;
- }
-
if (DefTy == LLT::scalar(1)) {
if (!AllowRiskySelect) {
LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index db06ef0250470d..cfa0c21def791d 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper {
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register DstReg, Register PrevReg,
Register CurReg) override;
- void constrainAsLaneMask(Incoming &In) override;
+ void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
bool lowerCopiesFromI1();
bool lowerCopiesToI1();
@@ -619,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() {
for (auto &Incoming : Incomings) {
MachineBasicBlock &IMBB = *Incoming.Block;
if (PIA.isSource(IMBB)) {
- constrainAsLaneMask(Incoming);
+ constrainIncomingRegisterTakenAsIs(Incoming);
SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
} else {
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
@@ -911,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
}
}
-void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
+void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
+ return;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index f0fe9bb4d03469..5099d39c2d1415 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -91,7 +91,7 @@ class PhiLoweringHelper {
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register DstReg,
Register PrevReg, Register CurReg) = 0;
- virtual void constrainAsLaneMask(Incoming &In) = 0;
+ virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index 38a4e81b5c2596..7a68aec1a1c555 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
-; REQUIRES: do-not-run-me
; Divergent phis that don't require lowering using lane mask merging
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
index 56f2812b590a8d..d314ebe355f51d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
@@ -46,7 +46,7 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
@@ -126,7 +126,6 @@ body: |
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2
@@ -137,17 +136,12 @@ body: |
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]]
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C4]], [[C3]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI]](s1), [[C4]], [[C3]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
@@ -197,37 +191,30 @@ body: |
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.1
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -292,28 +279,25 @@ body: |
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %39(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]]
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/78468
More information about the llvm-commits
mailing list