[llvm] d37ced8 - AMDGPU: refactor phi lowering from SILowerI1Copies (NFCI) (#75349)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 03:20:12 PST 2023


Author: Petar Avramovic
Date: 2023-12-15T12:20:07+01:00
New Revision: d37ced88809cb4d2df57ec80887b3f8801ca719b

URL: https://github.com/llvm/llvm-project/commit/d37ced88809cb4d2df57ec80887b3f8801ca719b
DIFF: https://github.com/llvm/llvm-project/commit/d37ced88809cb4d2df57ec80887b3f8801ca719b.diff

LOG: AMDGPU: refactor phi lowering from SILowerI1Copies (NFCI) (#75349)

Make abstract class PhiLoweringHelper and expose it for use in
GlobalISel path.
SILowerI1Copies implements PhiLoweringHelper as Vreg1LoweringHelper and
it is equivalent to SILowerI1Copies.
Notable change that createLaneMaskReg now clones attributes from
register that has lane mask attributes instead of creating register with
lane mask register class. This is because lane masks have
different(more) attributes in GlobalISel.

patch 2 from: https://github.com/llvm/llvm-project/pull/73337

Added: 
    llvm/lib/Target/AMDGPU/SILowerI1Copies.h

Modified: 
    llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 68c8f4024e7300..cfa0c21def791d 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -21,57 +21,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SILowerI1Copies.h"
 #include "AMDGPU.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Target/CGPassBuilderOption.h"
 
 #define DEBUG_TYPE "si-i1-copies"
 
 using namespace llvm;
 
-static unsigned createLaneMaskReg(MachineFunction &MF);
-static unsigned insertUndefLaneMask(MachineBasicBlock &MBB);
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+                                    MachineRegisterInfo *MRI,
+                                    Register LaneMaskRegAttrs);
 
 namespace {
 
-struct Incoming {
-  Register Reg;
-  MachineBasicBlock *Block;
-  Register UpdatedReg;
-
-  Incoming(Register Reg, MachineBasicBlock *Block, Register UpdatedReg)
-      : Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
-};
-
 class SILowerI1Copies : public MachineFunctionPass {
 public:
   static char ID;
 
-private:
-  bool IsWave32 = false;
-  MachineFunction *MF = nullptr;
-  MachineDominatorTree *DT = nullptr;
-  MachinePostDominatorTree *PDT = nullptr;
-  MachineRegisterInfo *MRI = nullptr;
-  const GCNSubtarget *ST = nullptr;
-  const SIInstrInfo *TII = nullptr;
-
-  unsigned ExecReg;
-  unsigned MovOp;
-  unsigned AndOp;
-  unsigned OrOp;
-  unsigned XorOp;
-  unsigned AndN2Op;
-  unsigned OrN2Op;
-
-  DenseSet<unsigned> ConstrainRegs;
-
-public:
   SILowerI1Copies() : MachineFunctionPass(ID) {
     initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
   }
@@ -86,29 +55,53 @@ class SILowerI1Copies : public MachineFunctionPass {
     AU.addRequired<MachinePostDominatorTree>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
+};
+
+class Vreg1LoweringHelper : public PhiLoweringHelper {
+public:
+  Vreg1LoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
+                      MachinePostDominatorTree *PDT);
 
 private:
-  bool lowerCopiesFromI1();
-  bool lowerPhis();
-  bool lowerCopiesToI1();
-  bool isConstantLaneMask(Register Reg, bool &Val) const;
+  DenseSet<Register> ConstrainRegs;
+
+public:
+  void markAsLaneMask(Register DstReg) const override;
+  void getCandidatesForLowering(
+      SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
+  void collectIncomingValuesFromPhi(
+      const MachineInstr *MI,
+      SmallVectorImpl<Incoming> &Incomings) const override;
+  void replaceDstReg(Register NewReg, Register OldReg,
+                     MachineBasicBlock *MBB) override;
   void buildMergeLaneMasks(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, const DebugLoc &DL,
-                           unsigned DstReg, unsigned PrevReg, unsigned CurReg);
-  MachineBasicBlock::iterator
-  getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+                           Register DstReg, Register PrevReg,
+                           Register CurReg) override;
+  void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
 
+  bool lowerCopiesFromI1();
+  bool lowerCopiesToI1();
+  bool cleanConstrainRegs(bool Changed);
   bool isVreg1(Register Reg) const {
     return Reg.isVirtual() && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
   }
-
-  bool isLaneMaskReg(unsigned Reg) const {
-    return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
-           TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
-               ST->getWavefrontSize();
-  }
 };
 
+Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
+                                         MachineDominatorTree *DT,
+                                         MachinePostDominatorTree *PDT)
+    : PhiLoweringHelper(MF, DT, PDT) {}
+
+bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
+  assert(Changed || ConstrainRegs.empty());
+  for (Register Reg : ConstrainRegs)
+    MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
+  ConstrainRegs.clear();
+
+  return Changed;
+}
+
 /// Helper class that determines the relationship between incoming values of a
 /// phi in the control flow graph to determine where an incoming value can
 /// simply be taken as a scalar lane mask as-is, and where it needs to be
@@ -311,6 +304,7 @@ class LoopFinder {
   /// blocks, so that the SSA updater doesn't have to search all the way to the
   /// function entry.
   void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
+                      MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
                       ArrayRef<Incoming> Incomings = {}) {
     assert(LoopLevel < CommonDominators.size());
 
@@ -319,13 +313,15 @@ class LoopFinder {
       Dom = DT.findNearestCommonDominator(Dom, Incoming.Block);
 
     if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
-      SSAUpdater.AddAvailableValue(Dom, insertUndefLaneMask(*Dom));
+      SSAUpdater.AddAvailableValue(
+          Dom, insertUndefLaneMask(Dom, &MRI, LaneMaskRegAttrs));
     } else {
       // The dominator is part of the loop or the given blocks, so add the
       // undef value to unreachable predecessors instead.
       for (MachineBasicBlock *Pred : Dom->predecessors()) {
         if (!inLoopLevel(*Pred, LoopLevel, Incomings))
-          SSAUpdater.AddAvailableValue(Pred, insertUndefLaneMask(*Pred));
+          SSAUpdater.AddAvailableValue(
+              Pred, insertUndefLaneMask(Pred, &MRI, LaneMaskRegAttrs));
       }
     }
   }
@@ -415,19 +411,19 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
   return new SILowerI1Copies();
 }
 
-static unsigned createLaneMaskReg(MachineFunction &MF) {
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  return MRI.createVirtualRegister(ST.isWave32() ? &AMDGPU::SReg_32RegClass
-                                                 : &AMDGPU::SReg_64RegClass);
+Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
+                                 Register LaneMaskRegAttrs) {
+  return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
 }
 
-static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
-  MachineFunction &MF = *MBB.getParent();
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+                                    MachineRegisterInfo *MRI,
+                                    Register LaneMaskRegAttrs) {
+  MachineFunction &MF = *MBB->getParent();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
-  unsigned UndefReg = createLaneMaskReg(MF);
-  BuildMI(MBB, MBB.getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
+  Register UndefReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+  BuildMI(*MBB, MBB->getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
           UndefReg);
   return UndefReg;
 }
@@ -444,47 +440,17 @@ static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
 bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
   // Only need to run this in SelectionDAG path.
   if (TheMF.getProperties().hasProperty(
-        MachineFunctionProperties::Property::Selected))
+          MachineFunctionProperties::Property::Selected))
     return false;
 
-  MF = &TheMF;
-  MRI = &MF->getRegInfo();
-  DT = &getAnalysis<MachineDominatorTree>();
-  PDT = &getAnalysis<MachinePostDominatorTree>();
-
-  ST = &MF->getSubtarget<GCNSubtarget>();
-  TII = ST->getInstrInfo();
-  IsWave32 = ST->isWave32();
-
-  if (IsWave32) {
-    ExecReg = AMDGPU::EXEC_LO;
-    MovOp = AMDGPU::S_MOV_B32;
-    AndOp = AMDGPU::S_AND_B32;
-    OrOp = AMDGPU::S_OR_B32;
-    XorOp = AMDGPU::S_XOR_B32;
-    AndN2Op = AMDGPU::S_ANDN2_B32;
-    OrN2Op = AMDGPU::S_ORN2_B32;
-  } else {
-    ExecReg = AMDGPU::EXEC;
-    MovOp = AMDGPU::S_MOV_B64;
-    AndOp = AMDGPU::S_AND_B64;
-    OrOp = AMDGPU::S_OR_B64;
-    XorOp = AMDGPU::S_XOR_B64;
-    AndN2Op = AMDGPU::S_ANDN2_B64;
-    OrN2Op = AMDGPU::S_ORN2_B64;
-  }
+  Vreg1LoweringHelper Helper(&TheMF, &getAnalysis<MachineDominatorTree>(),
+                             &getAnalysis<MachinePostDominatorTree>());
 
   bool Changed = false;
-  Changed |= lowerCopiesFromI1();
-  Changed |= lowerPhis();
-  Changed |= lowerCopiesToI1();
-
-  assert(Changed || ConstrainRegs.empty());
-  for (unsigned Reg : ConstrainRegs)
-    MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
-  ConstrainRegs.clear();
-
-  return Changed;
+  Changed |= Helper.lowerCopiesFromI1();
+  Changed |= Helper.lowerPhis();
+  Changed |= Helper.lowerCopiesToI1();
+  return Helper.cleanConstrainRegs(Changed);
 }
 
 #ifndef NDEBUG
@@ -496,7 +462,7 @@ static bool isVRegCompatibleReg(const SIRegisterInfo &TRI,
 }
 #endif
 
-bool SILowerI1Copies::lowerCopiesFromI1() {
+bool Vreg1LoweringHelper::lowerCopiesFromI1() {
   bool Changed = false;
   SmallVector<MachineInstr *, 4> DeadCopies;
 
@@ -539,23 +505,43 @@ bool SILowerI1Copies::lowerCopiesFromI1() {
   return Changed;
 }
 
-bool SILowerI1Copies::lowerPhis() {
+PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
+                                     MachineDominatorTree *DT,
+                                     MachinePostDominatorTree *PDT)
+    : MF(MF), DT(DT), PDT(PDT) {
+  MRI = &MF->getRegInfo();
+
+  ST = &MF->getSubtarget<GCNSubtarget>();
+  TII = ST->getInstrInfo();
+  IsWave32 = ST->isWave32();
+
+  if (IsWave32) {
+    ExecReg = AMDGPU::EXEC_LO;
+    MovOp = AMDGPU::S_MOV_B32;
+    AndOp = AMDGPU::S_AND_B32;
+    OrOp = AMDGPU::S_OR_B32;
+    XorOp = AMDGPU::S_XOR_B32;
+    AndN2Op = AMDGPU::S_ANDN2_B32;
+    OrN2Op = AMDGPU::S_ORN2_B32;
+  } else {
+    ExecReg = AMDGPU::EXEC;
+    MovOp = AMDGPU::S_MOV_B64;
+    AndOp = AMDGPU::S_AND_B64;
+    OrOp = AMDGPU::S_OR_B64;
+    XorOp = AMDGPU::S_XOR_B64;
+    AndN2Op = AMDGPU::S_ANDN2_B64;
+    OrN2Op = AMDGPU::S_ORN2_B64;
+  }
+}
+
+bool PhiLoweringHelper::lowerPhis() {
   MachineSSAUpdater SSAUpdater(*MF);
   LoopFinder LF(*DT, *PDT);
   PhiIncomingAnalysis PIA(*PDT, TII);
   SmallVector<MachineInstr *, 4> Vreg1Phis;
   SmallVector<Incoming, 4> Incomings;
 
-#ifndef NDEBUG
-  DenseSet<unsigned> PhiRegisters;
-#endif
-
-  for (MachineBasicBlock &MBB : *MF) {
-    for (MachineInstr &MI : MBB.phis()) {
-      if (isVreg1(MI.getOperand(0).getReg()))
-        Vreg1Phis.push_back(&MI);
-    }
-  }
+  getCandidatesForLowering(Vreg1Phis);
   if (Vreg1Phis.empty())
     return false;
 
@@ -571,28 +557,10 @@ bool SILowerI1Copies::lowerPhis() {
     LLVM_DEBUG(dbgs() << "Lower PHI: " << *MI);
 
     Register DstReg = MI->getOperand(0).getReg();
-    MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
-                                      : &AMDGPU::SReg_64RegClass);
-
-    // Collect incoming values.
-    for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
-      assert(i + 1 < MI->getNumOperands());
-      Register IncomingReg = MI->getOperand(i).getReg();
-      MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB();
-      MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
-
-      if (IncomingDef->getOpcode() == AMDGPU::COPY) {
-        IncomingReg = IncomingDef->getOperand(1).getReg();
-        assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
-        assert(!IncomingDef->getOperand(1).getSubReg());
-      } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
-        continue;
-      } else {
-        assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
-      }
+    markAsLaneMask(DstReg);
+    initializeLaneMaskRegisterAttributes(DstReg);
 
-      Incomings.emplace_back(IncomingReg, IncomingMBB, Register{});
-    }
+    collectIncomingValuesFromPhi(MI, Incomings);
 
     // Sort the incomings such that incoming values that dominate other incoming
     // values are sorted earlier. This allows us to do some amount of on-the-fly
@@ -625,10 +593,11 @@ bool SILowerI1Copies::lowerPhis() {
     SSAUpdater.Initialize(DstReg);
 
     if (FoundLoopLevel) {
-      LF.addLoopEntries(FoundLoopLevel, SSAUpdater, Incomings);
+      LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs,
+                        Incomings);
 
       for (auto &Incoming : Incomings) {
-        Incoming.UpdatedReg = createLaneMaskReg(*MF);
+        Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
         SSAUpdater.AddAvailableValue(Incoming.Block, Incoming.UpdatedReg);
       }
 
@@ -644,14 +613,16 @@ bool SILowerI1Copies::lowerPhis() {
       PIA.analyze(MBB, Incomings);
 
       for (MachineBasicBlock *MBB : PIA.predecessors())
-        SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB));
+        SSAUpdater.AddAvailableValue(
+            MBB, insertUndefLaneMask(MBB, MRI, LaneMaskRegAttrs));
 
       for (auto &Incoming : Incomings) {
         MachineBasicBlock &IMBB = *Incoming.Block;
         if (PIA.isSource(IMBB)) {
+          constrainIncomingRegisterTakenAsIs(Incoming);
           SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
         } else {
-          Incoming.UpdatedReg = createLaneMaskReg(*MF);
+          Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
           SSAUpdater.AddAvailableValue(&IMBB, Incoming.UpdatedReg);
         }
       }
@@ -669,7 +640,7 @@ bool SILowerI1Copies::lowerPhis() {
 
     Register NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
     if (NewReg != DstReg) {
-      MRI->replaceRegWith(NewReg, DstReg);
+      replaceDstReg(NewReg, DstReg, &MBB);
       MI->eraseFromParent();
     }
 
@@ -678,7 +649,7 @@ bool SILowerI1Copies::lowerPhis() {
   return true;
 }
 
-bool SILowerI1Copies::lowerCopiesToI1() {
+bool Vreg1LoweringHelper::lowerCopiesToI1() {
   bool Changed = false;
   MachineSSAUpdater SSAUpdater(*MF);
   LoopFinder LF(*DT, *PDT);
@@ -705,8 +676,9 @@ bool SILowerI1Copies::lowerCopiesToI1() {
 
       LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
 
-      MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
-                                        : &AMDGPU::SReg_64RegClass);
+      markAsLaneMask(DstReg);
+      initializeLaneMaskRegisterAttributes(DstReg);
+
       if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
         continue;
 
@@ -716,7 +688,7 @@ bool SILowerI1Copies::lowerCopiesToI1() {
 
       if (!SrcReg.isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
         assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
-        unsigned TmpReg = createLaneMaskReg(*MF);
+        Register TmpReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
         BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
             .addReg(SrcReg)
             .addImm(0);
@@ -739,7 +711,7 @@ bool SILowerI1Copies::lowerCopiesToI1() {
       if (FoundLoopLevel) {
         SSAUpdater.Initialize(DstReg);
         SSAUpdater.AddAvailableValue(&MBB, DstReg);
-        LF.addLoopEntries(FoundLoopLevel, SSAUpdater);
+        LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs);
 
         buildMergeLaneMasks(MBB, MI, DL, DstReg,
                             SSAUpdater.GetValueInMiddleOfBlock(&MBB), SrcReg);
@@ -754,7 +726,7 @@ bool SILowerI1Copies::lowerCopiesToI1() {
   return Changed;
 }
 
-bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const {
+bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
   const MachineInstr *MI;
   for (;;) {
     MI = MRI->getUniqueVRegDef(Reg);
@@ -807,7 +779,7 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
 /// Return a point at the end of the given \p MBB to insert SALU instructions
 /// for lane mask calculation. Take terminators and SCC into account.
 MachineBasicBlock::iterator
-SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
+PhiLoweringHelper::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
   auto InsertionPt = MBB.getFirstTerminator();
   bool TerminatorsUseSCC = false;
   for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) {
@@ -833,10 +805,53 @@ SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
   llvm_unreachable("SCC used by terminator but no def in block");
 }
 
-void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator I,
-                                          const DebugLoc &DL, unsigned DstReg,
-                                          unsigned PrevReg, unsigned CurReg) {
+// VReg_1 -> SReg_32 or SReg_64
+void Vreg1LoweringHelper::markAsLaneMask(Register DstReg) const {
+  MRI->setRegClass(DstReg, ST->getBoolRC());
+}
+
+void Vreg1LoweringHelper::getCandidatesForLowering(
+    SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB.phis()) {
+      if (isVreg1(MI.getOperand(0).getReg()))
+        Vreg1Phis.push_back(&MI);
+    }
+  }
+}
+
+void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
+    const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
+  for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
+    assert(i + 1 < MI->getNumOperands());
+    Register IncomingReg = MI->getOperand(i).getReg();
+    MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB();
+    MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
+
+    if (IncomingDef->getOpcode() == AMDGPU::COPY) {
+      IncomingReg = IncomingDef->getOperand(1).getReg();
+      assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
+      assert(!IncomingDef->getOperand(1).getSubReg());
+    } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+      continue;
+    } else {
+      assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
+    }
+
+    Incomings.emplace_back(IncomingReg, IncomingMBB, Register());
+  }
+}
+
+void Vreg1LoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
+                                        MachineBasicBlock *MBB) {
+  MRI->replaceRegWith(NewReg, OldReg);
+}
+
+void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
+                                              MachineBasicBlock::iterator I,
+                                              const DebugLoc &DL,
+                                              Register DstReg, Register PrevReg,
+                                              Register CurReg) {
   bool PrevVal = false;
   bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
   bool CurVal = false;
@@ -855,13 +870,13 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
     return;
   }
 
-  unsigned PrevMaskedReg = 0;
-  unsigned CurMaskedReg = 0;
+  Register PrevMaskedReg;
+  Register CurMaskedReg;
   if (!PrevConstant) {
     if (CurConstant && CurVal) {
       PrevMaskedReg = PrevReg;
     } else {
-      PrevMaskedReg = createLaneMaskReg(*MF);
+      PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
       BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
           .addReg(PrevReg)
           .addReg(ExecReg);
@@ -872,7 +887,7 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
     if (PrevConstant && PrevVal) {
       CurMaskedReg = CurReg;
     } else {
-      CurMaskedReg = createLaneMaskReg(*MF);
+      CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
       BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
           .addReg(CurReg)
           .addReg(ExecReg);
@@ -895,3 +910,7 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
         .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
   }
 }
+
+void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
+  return;
+}

diff  --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
new file mode 100644
index 00000000000000..5099d39c2d1415
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -0,0 +1,97 @@
+//===-- SILowerI1Copies.h --------------------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Interface definition of the PhiLoweringHelper class that implements lane
+/// mask merging algorithm for divergent i1 phis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "GCNSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+
+namespace llvm {
+
+/// Incoming for lane maks phi as machine instruction, incoming register \p Reg
+/// and incoming block \p Block are taken from machine instruction.
+/// \p UpdatedReg (if valid) is \p Reg lane mask merged with another lane mask.
+struct Incoming {
+  Register Reg;
+  MachineBasicBlock *Block;
+  Register UpdatedReg;
+
+  Incoming(Register Reg, MachineBasicBlock *Block, Register UpdatedReg)
+      : Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
+};
+
+Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs);
+
+class PhiLoweringHelper {
+public:
+  PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
+                    MachinePostDominatorTree *PDT);
+  virtual ~PhiLoweringHelper() = default;
+
+protected:
+  bool IsWave32 = false;
+  MachineFunction *MF = nullptr;
+  MachineDominatorTree *DT = nullptr;
+  MachinePostDominatorTree *PDT = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+  const GCNSubtarget *ST = nullptr;
+  const SIInstrInfo *TII = nullptr;
+  Register LaneMaskRegAttrs;
+
+#ifndef NDEBUG
+  DenseSet<Register> PhiRegisters;
+#endif
+
+  Register ExecReg;
+  unsigned MovOp;
+  unsigned AndOp;
+  unsigned OrOp;
+  unsigned XorOp;
+  unsigned AndN2Op;
+  unsigned OrN2Op;
+
+public:
+  bool lowerPhis();
+  bool isConstantLaneMask(Register Reg, bool &Val) const;
+  MachineBasicBlock::iterator
+  getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+
+  void initializeLaneMaskRegisterAttributes(Register LaneMask) {
+    LaneMaskRegAttrs = LaneMask;
+  }
+
+  bool isLaneMaskReg(Register Reg) const {
+    return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
+           TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
+               ST->getWavefrontSize();
+  }
+
+  // Helpers from lowerPhis that are 
diff erent between sdag and global-isel.
+
+  virtual void markAsLaneMask(Register DstReg) const = 0;
+  virtual void getCandidatesForLowering(
+      SmallVectorImpl<MachineInstr *> &Vreg1Phis) const = 0;
+  virtual void
+  collectIncomingValuesFromPhi(const MachineInstr *MI,
+                               SmallVectorImpl<Incoming> &Incomings) const = 0;
+  virtual void replaceDstReg(Register NewReg, Register OldReg,
+                             MachineBasicBlock *MBB) = 0;
+  virtual void buildMergeLaneMasks(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I,
+                                   const DebugLoc &DL, Register DstReg,
+                                   Register PrevReg, Register CurReg) = 0;
+  virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
+};
+
+} // end namespace llvm


        


More information about the llvm-commits mailing list