[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port GCNCreateVOPD to NPM (PR #130059)

Akshat Oke via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Mar 11 23:03:34 PDT 2025


https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/130059

>From ee0ed7e6fdce69d98a05e42327a305228797a9de Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 5 Mar 2025 10:52:00 +0000
Subject: [PATCH 1/4] [AMDGPU][NPM] Port GCNCreateVOPD to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |  7 ++-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  4 +-
 llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp      | 53 ++++++++++++-------
 4 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 00355d8fb5e5f..95340f1287d8d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -358,6 +358,11 @@ class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
   PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);
 };
 
+class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM);
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -443,7 +448,7 @@ extern char &SIFormMemoryClausesID;
 void initializeSIPostRABundlerLegacyPass(PassRegistry &);
 extern char &SIPostRABundlerLegacyID;
 
-void initializeGCNCreateVOPDPass(PassRegistry &);
+void initializeGCNCreateVOPDLegacyPass(PassRegistry &);
 extern char &GCNCreateVOPDID;
 
 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 16ae23133a549..98b0bc7358e9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -104,6 +104,7 @@ MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUse
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
+MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 856b5eb359c49..b06e87baa4ea9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -547,7 +547,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesLegacyPass(*PR);
   initializeSIPostRABundlerLegacyPass(*PR);
-  initializeGCNCreateVOPDPass(*PR);
+  initializeGCNCreateVOPDLegacyPass(*PR);
   initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
   initializeAMDGPUAAWrapperPassPass(*PR);
   initializeAMDGPUExternalAAWrapperPass(*PR);
@@ -2150,7 +2150,7 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
 
 void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
-    // TODO: addPass(GCNCreateVOPDPass());
+    addPass(GCNCreateVOPDPass());
   }
   // TODO: addPass(SIMemoryLegalizerPass());
   // TODO: addPass(SIInsertWaitcntsPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 798279b279da3..32a26469d616b 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "gcn-create-vopd"
@@ -36,7 +37,7 @@ using namespace llvm;
 
 namespace {
 
-class GCNCreateVOPD : public MachineFunctionPass {
+class GCNCreateVOPD {
 private:
   class VOPDCombineInfo {
   public:
@@ -49,20 +50,8 @@ class GCNCreateVOPD : public MachineFunctionPass {
   };
 
 public:
-  static char ID;
   const GCNSubtarget *ST = nullptr;
 
-  GCNCreateVOPD() : MachineFunctionPass(ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  StringRef getPassName() const override {
-    return "GCN Create VOPD Instructions";
-  }
-
   bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
     auto *FirstMI = CI.FirstMI;
     auto *SecondMI = CI.SecondMI;
@@ -112,9 +101,7 @@ class GCNCreateVOPD : public MachineFunctionPass {
     return true;
   }
 
-  bool runOnMachineFunction(MachineFunction &MF) override {
-    if (skipFunction(MF.getFunction()))
-      return false;
+  bool run(MachineFunction &MF) {
     ST = &MF.getSubtarget<GCNSubtarget>();
     if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
       return false;
@@ -163,11 +150,39 @@ class GCNCreateVOPD : public MachineFunctionPass {
   }
 };
 
+class GCNCreateVOPDLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+  GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override {
+    return "GCN Create VOPD Instructions";
+  }
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    if (skipFunction(MF.getFunction()))
+      return false;
+
+    return GCNCreateVOPD().run(MF);
+  }
+};
+
 } // namespace
 
-char GCNCreateVOPD::ID = 0;
+PreservedAnalyses llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
+                                         MachineFunctionAnalysisManager &AM) {
+  if (!GCNCreateVOPD().run(MF))
+    return PreservedAnalyses::all();
+  return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
+}
+
+char GCNCreateVOPDLegacy::ID = 0;
 
-char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID;
+char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID;
 
-INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions",
+INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions",
                 false, false)

>From 94ec994f55a8b02e8b070e82d6253eb9de67ca97 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Mon, 10 Mar 2025 04:27:24 +0000
Subject: [PATCH 2/4] clang format

---
 llvm/lib/Target/AMDGPU/AMDGPU.h          |   3 +-
 llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp | 186 ++++++++++++-----------
 2 files changed, 97 insertions(+), 92 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 95340f1287d8d..96f23432685de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -360,7 +360,8 @@ class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
 
 class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> {
 public:
-  PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM);
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &AM);
 };
 
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 32a26469d616b..22123f738c948 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -49,105 +49,108 @@ class GCNCreateVOPD {
     MachineInstr *SecondMI;
   };
 
-public:
-  const GCNSubtarget *ST = nullptr;
-
-  bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
-    auto *FirstMI = CI.FirstMI;
-    auto *SecondMI = CI.SecondMI;
-    unsigned Opc1 = FirstMI->getOpcode();
-    unsigned Opc2 = SecondMI->getOpcode();
-    unsigned EncodingFamily =
-        AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
-    int NewOpcode =
-        AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
-                            AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
-    assert(NewOpcode != -1 &&
-           "Should have previously determined this as a possible VOPD\n");
-
-    auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
-                            FirstMI->getDebugLoc(), SII->get(NewOpcode))
-                        .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
-
-    namespace VOPD = AMDGPU::VOPD;
-    MachineInstr *MI[] = {FirstMI, SecondMI};
-    auto InstInfo =
-        AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
-
-    for (auto CompIdx : VOPD::COMPONENTS) {
-      auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
-      VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
-    }
-
-    for (auto CompIdx : VOPD::COMPONENTS) {
-      auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
-      for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
-        auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+  public:
+    const GCNSubtarget *ST = nullptr;
+
+    bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
+      auto *FirstMI = CI.FirstMI;
+      auto *SecondMI = CI.SecondMI;
+      unsigned Opc1 = FirstMI->getOpcode();
+      unsigned Opc2 = SecondMI->getOpcode();
+      unsigned EncodingFamily =
+          AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
+      int NewOpcode =
+          AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+                              AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+      assert(NewOpcode != -1 &&
+             "Should have previously determined this as a possible VOPD\n");
+
+      auto VOPDInst =
+          BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(),
+                  SII->get(NewOpcode))
+              .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
+
+      namespace VOPD = AMDGPU::VOPD;
+      MachineInstr *MI[] = {FirstMI, SecondMI};
+      auto InstInfo =
+          AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
+
+      for (auto CompIdx : VOPD::COMPONENTS) {
+        auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
         VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
       }
-    }
 
-    SII->fixImplicitOperands(*VOPDInst);
-    for (auto CompIdx : VOPD::COMPONENTS)
-      VOPDInst.copyImplicitOps(*MI[CompIdx]);
+      for (auto CompIdx : VOPD::COMPONENTS) {
+        auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+        for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum;
+             ++CompSrcIdx) {
+          auto MCOprIdx =
+              InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+          VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+        }
+      }
 
-    LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
-                      << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
+      SII->fixImplicitOperands(*VOPDInst);
+      for (auto CompIdx : VOPD::COMPONENTS)
+        VOPDInst.copyImplicitOps(*MI[CompIdx]);
 
-    for (auto CompIdx : VOPD::COMPONENTS)
-      MI[CompIdx]->eraseFromParent();
+      LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
+                        << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
 
-    ++NumVOPDCreated;
-    return true;
-  }
+      for (auto CompIdx : VOPD::COMPONENTS)
+        MI[CompIdx]->eraseFromParent();
 
-  bool run(MachineFunction &MF) {
-    ST = &MF.getSubtarget<GCNSubtarget>();
-    if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
-      return false;
-    LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
-
-    const SIInstrInfo *SII = ST->getInstrInfo();
-    bool Changed = false;
-
-    SmallVector<VOPDCombineInfo> ReplaceCandidates;
-
-    for (auto &MBB : MF) {
-      auto MII = MBB.begin(), E = MBB.end();
-      while (MII != E) {
-        auto *FirstMI = &*MII;
-        MII = next_nodbg(MII, MBB.end());
-        if (MII == MBB.end())
-          break;
-        if (FirstMI->isDebugInstr())
-          continue;
-        auto *SecondMI = &*MII;
-        unsigned Opc = FirstMI->getOpcode();
-        unsigned Opc2 = SecondMI->getOpcode();
-        llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
-        llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
-        VOPDCombineInfo CI;
-
-        if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
-          CI = VOPDCombineInfo(FirstMI, SecondMI);
-        else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
-          CI = VOPDCombineInfo(SecondMI, FirstMI);
-        else
-          continue;
-        // checkVOPDRegConstraints cares about program order, but doReplace
-        // cares about X-Y order in the constituted VOPD
-        if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
-          ReplaceCandidates.push_back(CI);
-          ++MII;
+      ++NumVOPDCreated;
+      return true;
+    }
+
+    bool run(MachineFunction &MF) {
+      ST = &MF.getSubtarget<GCNSubtarget>();
+      if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
+        return false;
+      LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
+
+      const SIInstrInfo *SII = ST->getInstrInfo();
+      bool Changed = false;
+
+      SmallVector<VOPDCombineInfo> ReplaceCandidates;
+
+      for (auto &MBB : MF) {
+        auto MII = MBB.begin(), E = MBB.end();
+        while (MII != E) {
+          auto *FirstMI = &*MII;
+          MII = next_nodbg(MII, MBB.end());
+          if (MII == MBB.end())
+            break;
+          if (FirstMI->isDebugInstr())
+            continue;
+          auto *SecondMI = &*MII;
+          unsigned Opc = FirstMI->getOpcode();
+          unsigned Opc2 = SecondMI->getOpcode();
+          llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+          llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
+          VOPDCombineInfo CI;
+
+          if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+            CI = VOPDCombineInfo(FirstMI, SecondMI);
+          else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+            CI = VOPDCombineInfo(SecondMI, FirstMI);
+          else
+            continue;
+          // checkVOPDRegConstraints cares about program order, but doReplace
+          // cares about X-Y order in the constituted VOPD
+          if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+            ReplaceCandidates.push_back(CI);
+            ++MII;
+          }
         }
       }
-    }
-    for (auto &CI : ReplaceCandidates) {
-      Changed |= doReplace(SII, CI);
-    }
+      for (auto &CI : ReplaceCandidates) {
+        Changed |= doReplace(SII, CI);
+      }
 
-    return Changed;
-  }
+      return Changed;
+    }
 };
 
 class GCNCreateVOPDLegacy : public MachineFunctionPass {
@@ -173,8 +176,9 @@ class GCNCreateVOPDLegacy : public MachineFunctionPass {
 
 } // namespace
 
-PreservedAnalyses llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
-                                         MachineFunctionAnalysisManager &AM) {
+PreservedAnalyses
+llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
+                             MachineFunctionAnalysisManager &AM) {
   if (!GCNCreateVOPD().run(MF))
     return PreservedAnalyses::all();
   return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();

>From 612e0ab5752d887dae8104c9ed5a208e3a0be940 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 11 Mar 2025 09:03:31 +0000
Subject: [PATCH 3/4] add test

---
 llvm/test/CodeGen/AMDGPU/vopd-combine.mir | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
index 8d5060177c63d..5a13401c1631c 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
@@ -4,6 +4,8 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
 
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
+
 --- |
   @lds = external addrspace(3) global [8 x i8]
   define void @vopd_schedule() { ret void }

>From 1aeebc20c7edad2e6b6e2b1d410fb778016fc304 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 12 Mar 2025 06:03:07 +0000
Subject: [PATCH 4/4] format and sort registry

---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   2 +-
 llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp      | 181 +++++++++---------
 2 files changed, 90 insertions(+), 93 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 98b0bc7358e9d..b1dba132e5bf6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -103,8 +103,8 @@ MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass())
 MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
-MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
+MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 22123f738c948..ccc711a0bcc4e 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -49,108 +49,105 @@ class GCNCreateVOPD {
     MachineInstr *SecondMI;
   };
 
-  public:
-    const GCNSubtarget *ST = nullptr;
-
-    bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
-      auto *FirstMI = CI.FirstMI;
-      auto *SecondMI = CI.SecondMI;
-      unsigned Opc1 = FirstMI->getOpcode();
-      unsigned Opc2 = SecondMI->getOpcode();
-      unsigned EncodingFamily =
-          AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
-      int NewOpcode =
-          AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
-                              AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
-      assert(NewOpcode != -1 &&
-             "Should have previously determined this as a possible VOPD\n");
-
-      auto VOPDInst =
-          BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(),
-                  SII->get(NewOpcode))
-              .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
-
-      namespace VOPD = AMDGPU::VOPD;
-      MachineInstr *MI[] = {FirstMI, SecondMI};
-      auto InstInfo =
-          AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
-
-      for (auto CompIdx : VOPD::COMPONENTS) {
-        auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
-        VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
-      }
+public:
+  const GCNSubtarget *ST = nullptr;
+
+  bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
+    auto *FirstMI = CI.FirstMI;
+    auto *SecondMI = CI.SecondMI;
+    unsigned Opc1 = FirstMI->getOpcode();
+    unsigned Opc2 = SecondMI->getOpcode();
+    unsigned EncodingFamily =
+        AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
+    int NewOpcode =
+        AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+                            AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+    assert(NewOpcode != -1 &&
+           "Should have previously determined this as a possible VOPD\n");
+
+    auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
+                            FirstMI->getDebugLoc(), SII->get(NewOpcode))
+                        .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
+
+    namespace VOPD = AMDGPU::VOPD;
+    MachineInstr *MI[] = {FirstMI, SecondMI};
+    auto InstInfo =
+        AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
+
+    for (auto CompIdx : VOPD::COMPONENTS) {
+      auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
+      VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+    }
 
-      for (auto CompIdx : VOPD::COMPONENTS) {
-        auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
-        for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum;
-             ++CompSrcIdx) {
-          auto MCOprIdx =
-              InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
-          VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
-        }
+    for (auto CompIdx : VOPD::COMPONENTS) {
+      auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+      for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
+        auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+        VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
       }
+    }
 
-      SII->fixImplicitOperands(*VOPDInst);
-      for (auto CompIdx : VOPD::COMPONENTS)
-        VOPDInst.copyImplicitOps(*MI[CompIdx]);
+    SII->fixImplicitOperands(*VOPDInst);
+    for (auto CompIdx : VOPD::COMPONENTS)
+      VOPDInst.copyImplicitOps(*MI[CompIdx]);
 
-      LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
-                        << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
+    LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
+                      << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
 
-      for (auto CompIdx : VOPD::COMPONENTS)
-        MI[CompIdx]->eraseFromParent();
+    for (auto CompIdx : VOPD::COMPONENTS)
+      MI[CompIdx]->eraseFromParent();
 
-      ++NumVOPDCreated;
-      return true;
-    }
+    ++NumVOPDCreated;
+    return true;
+  }
 
-    bool run(MachineFunction &MF) {
-      ST = &MF.getSubtarget<GCNSubtarget>();
-      if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
-        return false;
-      LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
-
-      const SIInstrInfo *SII = ST->getInstrInfo();
-      bool Changed = false;
-
-      SmallVector<VOPDCombineInfo> ReplaceCandidates;
-
-      for (auto &MBB : MF) {
-        auto MII = MBB.begin(), E = MBB.end();
-        while (MII != E) {
-          auto *FirstMI = &*MII;
-          MII = next_nodbg(MII, MBB.end());
-          if (MII == MBB.end())
-            break;
-          if (FirstMI->isDebugInstr())
-            continue;
-          auto *SecondMI = &*MII;
-          unsigned Opc = FirstMI->getOpcode();
-          unsigned Opc2 = SecondMI->getOpcode();
-          llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
-          llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
-          VOPDCombineInfo CI;
-
-          if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
-            CI = VOPDCombineInfo(FirstMI, SecondMI);
-          else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
-            CI = VOPDCombineInfo(SecondMI, FirstMI);
-          else
-            continue;
-          // checkVOPDRegConstraints cares about program order, but doReplace
-          // cares about X-Y order in the constituted VOPD
-          if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
-            ReplaceCandidates.push_back(CI);
-            ++MII;
-          }
+  bool run(MachineFunction &MF) {
+    ST = &MF.getSubtarget<GCNSubtarget>();
+    if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
+      return false;
+    LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
+
+    const SIInstrInfo *SII = ST->getInstrInfo();
+    bool Changed = false;
+
+    SmallVector<VOPDCombineInfo> ReplaceCandidates;
+
+    for (auto &MBB : MF) {
+      auto MII = MBB.begin(), E = MBB.end();
+      while (MII != E) {
+        auto *FirstMI = &*MII;
+        MII = next_nodbg(MII, MBB.end());
+        if (MII == MBB.end())
+          break;
+        if (FirstMI->isDebugInstr())
+          continue;
+        auto *SecondMI = &*MII;
+        unsigned Opc = FirstMI->getOpcode();
+        unsigned Opc2 = SecondMI->getOpcode();
+        llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+        llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
+        VOPDCombineInfo CI;
+
+        if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+          CI = VOPDCombineInfo(FirstMI, SecondMI);
+        else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+          CI = VOPDCombineInfo(SecondMI, FirstMI);
+        else
+          continue;
+        // checkVOPDRegConstraints cares about program order, but doReplace
+        // cares about X-Y order in the constituted VOPD
+        if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+          ReplaceCandidates.push_back(CI);
+          ++MII;
         }
       }
-      for (auto &CI : ReplaceCandidates) {
-        Changed |= doReplace(SII, CI);
-      }
-
-      return Changed;
     }
+    for (auto &CI : ReplaceCandidates) {
+      Changed |= doReplace(SII, CI);
+    }
+
+    return Changed;
+  }
 };
 
 class GCNCreateVOPDLegacy : public MachineFunctionPass {



More information about the llvm-branch-commits mailing list