[llvm] AMDGPU/NewPM Port GCNDPPCombine to NPM (PR #105816)

Wed Aug 28 23:44:22 PDT 2024

https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/105816

>From 28fee48eea727831b954064e91a20b7bcc41dd1f Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Fri, 23 Aug 2024 11:28:09 +0000
Subject: [PATCH 1/5] AMDGPU/NewPM Port GCNDPPCombine to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |  4 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  5 +-
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp      | 51 ++++++++++++-----
 llvm/lib/Target/AMDGPU/GCNDPPCombine.h        | 55 +++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/dpp64_combine.mir    |  2 +
 llvm/test/CodeGen/AMDGPU/dpp_combine.mir      |  1 +
 .../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir |  3 +
 .../AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir        |  1 +
 llvm/test/CodeGen/AMDGPU/vopc_dpp.mir         |  1 +
 10 files changed, 106 insertions(+), 18 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/GCNDPPCombine.h

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6eb641db076958..717e5f511ef2f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -157,8 +157,8 @@ struct AMDGPULowerBufferFatPointersPass
 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
 extern char &AMDGPURewriteOutArgumentsID;
 
-void initializeGCNDPPCombinePass(PassRegistry &);
-extern char &GCNDPPCombineID;
+void initializeGCNDPPCombineLegacyPass(PassRegistry &);
+extern char &GCNDPPCombineLegacyID;
 
 void initializeSIFoldOperandsLegacyPass(PassRegistry &);
 extern char &SIFoldOperandsLegacyID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 10e394ed03df8f..9976a8199d7047 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -98,4 +98,5 @@ MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
+MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 86cc9d1ecde817..a769bc9e486573 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,6 +28,7 @@
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPUTargetTransformInfo.h"
 #include "AMDGPUUnifyDivergentExitNodes.h"
+#include "GCNDPPCombine.h"
 #include "GCNIterativeScheduler.h"
 #include "GCNSchedStrategy.h"
 #include "GCNVOPDUtils.h"
@@ -403,7 +404,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeR600VectorRegMergerPass(*PR);
   initializeGlobalISel(*PR);
   initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
-  initializeGCNDPPCombinePass(*PR);
+  initializeGCNDPPCombineLegacyPass(*PR);
   initializeSILowerI1CopiesLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeSILowerWWMCopiesPass(*PR);
@@ -1273,7 +1274,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
   // XXX - Can we get away without running DeadMachineInstructionElim again?
   addPass(&SIFoldOperandsLegacyID);
   if (EnableDPPCombine)
-    addPass(&GCNDPPCombineID);
+    addPass(&GCNDPPCombineLegacyID);
   addPass(&SILoadStoreOptimizerID);
   if (isPassEnabled(EnableSDWAPeephole)) {
     addPass(&SIPeepholeSDWAID);
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 0ac079c69e605f..ce67cf077ce1f9 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -37,6 +37,7 @@
 // The mov_dpp instruction should reside in the same BB as all its uses
 //===----------------------------------------------------------------------===//
 
+#include "GCNDPPCombine.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -51,7 +52,7 @@ STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
 
 namespace {
 
-class GCNDPPCombine : public MachineFunctionPass {
+class GCNDPPCombine {
   MachineRegisterInfo *MRI;
   const SIInstrInfo *TII;
   const GCNSubtarget *ST;
@@ -76,12 +77,17 @@ class GCNDPPCombine : public MachineFunctionPass {
 
   bool combineDPPMov(MachineInstr &MI) const;
 
+  int getDPPOp(unsigned Op, bool IsShrinkable) const;
+  bool isShrinkable(MachineInstr &MI) const;
+
+public:
+  bool run(MachineFunction &MF);
+};
+class GCNDPPCombineLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  GCNDPPCombine() : MachineFunctionPass(ID) {
-    initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
-  }
+  GCNDPPCombineLegacy() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -96,22 +102,19 @@ class GCNDPPCombine : public MachineFunctionPass {
     return MachineFunctionProperties()
       .set(MachineFunctionProperties::Property::IsSSA);
   }
-
-private:
-  int getDPPOp(unsigned Op, bool IsShrinkable) const;
-  bool isShrinkable(MachineInstr &MI) const;
 };
 
 } // end anonymous namespace
 
-INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+INITIALIZE_PASS(GCNDPPCombineLegacy, DEBUG_TYPE, "GCN DPP Combine", false,
+                false)
 
-char GCNDPPCombine::ID = 0;
+char GCNDPPCombineLegacy::ID = 0;
 
-char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+char &llvm::GCNDPPCombineLegacyID = GCNDPPCombineLegacy::ID;
 
 FunctionPass *llvm::createGCNDPPCombinePass() {
-  return new GCNDPPCombine();
+  return new GCNDPPCombineLegacy();
 }
 
 bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
@@ -749,9 +752,17 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
   return !Rollback;
 }
 
-bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction())) {
+    return false;
+  }
+  GCNDPPCombine Impl;
+  return Impl.run(MF);
+}
+
+bool GCNDPPCombine::run(MachineFunction &MF) {
   ST = &MF.getSubtarget<GCNSubtarget>();
-  if (!ST->hasDPP() || skipFunction(MF.getFunction()))
+  if (!ST->hasDPP())
     return false;
 
   MRI = &MF.getRegInfo();
@@ -781,3 +792,15 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
   }
   return Changed;
 }
+
+PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
+                                         MachineFunctionAnalysisManager &) {
+  GCNDPPCombine Impl;
+  bool Changed = Impl.run(MF);
+  if (!Changed) {
+    return PreservedAnalyses::all();
+  }
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
new file mode 100644
index 00000000000000..922e5b4de18e15
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -0,0 +1,55 @@
+//=======- GCNDPPCombine.h - optimization for DPP instructions ---==========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
+// operand. If any of the use instruction cannot be combined with the mov the
+// whole sequence is reverted.
+//
+// $old = ...
+// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
+//                            dpp_controls..., $row_mask, $bank_mask,
+//                            $bound_ctrl
+// $res = VALU $dpp_value [, src1]
+//
+// to
+//
+// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
+//                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
+//
+// Combining rules :
+//
+// if $row_mask and $bank_mask are fully enabled (0xF) and
+//    $bound_ctrl==DPP_BOUND_ZERO or $old==0
+// -> $combined_old = undef,
+//    $combined_bound_ctrl = DPP_BOUND_ZERO
+//
+// if the VALU op is binary and
+//    $bound_ctrl==DPP_BOUND_OFF and
+//    $old==identity value (immediate) for the VALU op
+// -> $combined_old = src1,
+//    $combined_bound_ctrl = DPP_BOUND_OFF
+//
+// Otherwise cancel.
+//
+// The mov_dpp instruction should reside in the same BB as all its uses
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNDPPCombinePass : public MachinePassInfoMixin<GCNDPPCombinePass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MAM);
+};
+
+} // end namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index 9a6a54bbc4e497..ecff9683bac732 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,5 +1,7 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 # RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 
 ---
 # GCN-LABEL: name: dpp64_old_impdef
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index a1c3970a5bae90..179d0becf6693a 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 
 ---
 # old is undefined: only combine when masks are fully enabled and
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 1151bde02ef62c..3c20fdff9fbf66 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,9 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index 7e286a4dd678eb..b1e23808e91a9b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
 
 ---
 name:            test_cvt_f32_bf8_byte0
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 123893674ff5e9..a927b836280c2f 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 
 ---
 

>From 51eb2d1be26864fd65520f002f16e04a200a3828 Mon Sep 17 00:00:00 2001
From: Akshat Oke <76596238+Akshat-Oke at users.noreply.github.com>
Date: Sun, 25 Aug 2024 20:39:16 +0530
Subject: [PATCH 2/5] Replace -run-pass with -passes in all but one test

---
 llvm/test/CodeGen/AMDGPU/dpp64_combine.mir           | 2 --
 llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir       | 3 ---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir | 1 -
 llvm/test/CodeGen/AMDGPU/vopc_dpp.mir                | 1 -
 4 files changed, 7 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index ecff9683bac732..d16d45eef1e897 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,6 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 # RUN: llc -mtriple=amdgcn -mcpu=gfx940 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 3c20fdff9fbf66..43355dc694dc62 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,8 +1,5 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index b1e23808e91a9b..324c0037511e1c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,5 +1,4 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index a927b836280c2f..3c1b3c95513169 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -1,5 +1,4 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 
 ---

>From b3354db914787c7b4040b0556962427ae4f3f195 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Mon, 26 Aug 2024 05:40:42 +0000
Subject: [PATCH 3/5] newlines and PassInfoMixin

---
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 2 +-
 llvm/lib/Target/AMDGPU/GCNDPPCombine.h   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index ce67cf077ce1f9..e167a581bd08ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -803,4 +803,4 @@ PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
   auto PA = getMachineFunctionPassPreservedAnalyses();
   PA.preserveSet<CFGAnalyses>();
   return PA;
-}
\ No newline at end of file
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
index 922e5b4de18e15..8caccf119bdb19 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -44,7 +44,7 @@
 #include "llvm/CodeGen/MachinePassManager.h"
 
 namespace llvm {
-class GCNDPPCombinePass : public MachinePassInfoMixin<GCNDPPCombinePass> {
+class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
 public:
   PreservedAnalyses run(MachineFunction &MF,
                         MachineFunctionAnalysisManager &MAM);
@@ -52,4 +52,4 @@ class GCNDPPCombinePass : public MachinePassInfoMixin<GCNDPPCombinePass> {
 
 } // end namespace llvm
 
-#endif
\ No newline at end of file
+#endif

>From 59d603d12e30d01e9b8f67e073ab8fe348ff4096 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 28 Aug 2024 09:02:45 +0000
Subject: [PATCH 4/5] Apply suggestions

---
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp      |  7 ++--
 llvm/lib/Target/AMDGPU/GCNDPPCombine.h        | 39 +++----------------
 llvm/test/CodeGen/AMDGPU/dpp64_combine.mir    |  4 +-
 .../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir |  6 +--
 .../AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir        |  1 +
 llvm/test/CodeGen/AMDGPU/vopc_dpp.mir         |  2 +-
 6 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index e167a581bd08ea..eb6186eaf55ef5 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -83,6 +83,7 @@ class GCNDPPCombine {
 public:
   bool run(MachineFunction &MF);
 };
+
 class GCNDPPCombineLegacy : public MachineFunctionPass {
 public:
   static char ID;
@@ -756,8 +757,7 @@ bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction())) {
     return false;
   }
-  GCNDPPCombine Impl;
-  return Impl.run(MF);
+  return GCNDPPCombine().run(MF);
 }
 
 bool GCNDPPCombine::run(MachineFunction &MF) {
@@ -795,8 +795,7 @@ bool GCNDPPCombine::run(MachineFunction &MF) {
 
 PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
                                          MachineFunctionAnalysisManager &) {
-  GCNDPPCombine Impl;
-  bool Changed = Impl.run(MF);
+  bool Changed = GCNDPPCombine().run(MF);
   if (!Changed) {
     return PreservedAnalyses::all();
   }
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
index 8caccf119bdb19..55036dd3e2dcd6 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -1,42 +1,10 @@
-//=======- GCNDPPCombine.h - optimization for DPP instructions ---==========//
+//=======--- GCNDPPCombine.h - optimization for DPP instructions ---==========//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
-// operand. If any of the use instruction cannot be combined with the mov the
-// whole sequence is reverted.
-//
-// $old = ...
-// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
-//                            dpp_controls..., $row_mask, $bank_mask,
-//                            $bound_ctrl
-// $res = VALU $dpp_value [, src1]
-//
-// to
-//
-// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
-//                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
-//
-// Combining rules :
-//
-// if $row_mask and $bank_mask are fully enabled (0xF) and
-//    $bound_ctrl==DPP_BOUND_ZERO or $old==0
-// -> $combined_old = undef,
-//    $combined_bound_ctrl = DPP_BOUND_ZERO
-//
-// if the VALU op is binary and
-//    $bound_ctrl==DPP_BOUND_OFF and
-//    $old==identity value (immediate) for the VALU op
-// -> $combined_old = src1,
-//    $combined_bound_ctrl = DPP_BOUND_OFF
-//
-// Otherwise cancel.
-//
-// The mov_dpp instruction should reside in the same BB as all its uses
-//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
 #define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
@@ -48,6 +16,11 @@ class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
 public:
   PreservedAnalyses run(MachineFunction &MF,
                         MachineFunctionAnalysisManager &MAM);
+
+  MachineFunctionProperties getRequiredProperties() {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index d16d45eef1e897..9a6a54bbc4e497 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,5 +1,5 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
 
 ---
 # GCN-LABEL: name: dpp64_old_impdef
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 43355dc694dc62..1151bde02ef62c 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,6 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index 324c0037511e1c..b1e23808e91a9b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,4 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 3c1b3c95513169..123893674ff5e9 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
 
 ---
 

>From eecdcd6a74ad3a1e556a59c5b0f5120a7c1bc24b Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Thu, 29 Aug 2024 06:38:11 +0000
Subject: [PATCH 5/5] Add MFPropsModifier and apply suggestions

---
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 13 +++++++++----
 llvm/lib/Target/AMDGPU/GCNDPPCombine.h   |  2 +-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index eb6186eaf55ef5..3e1a79062ff0cc 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -754,9 +754,9 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
 }
 
 bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction())) {
+  if (skipFunction(MF.getFunction()))
     return false;
-  }
+
   return GCNDPPCombine().run(MF);
 }
 
@@ -795,10 +795,15 @@ bool GCNDPPCombine::run(MachineFunction &MF) {
 
 PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
                                          MachineFunctionAnalysisManager &) {
+  if (MF.getFunction().hasOptNone())
+    return PreservedAnalyses::all();
+
+  MFPropsModifier _(*this, MF);
+
   bool Changed = GCNDPPCombine().run(MF);
-  if (!Changed) {
+  if (!Changed)
     return PreservedAnalyses::all();
-  }
+
   auto PA = getMachineFunctionPassPreservedAnalyses();
   PA.preserveSet<CFGAnalyses>();
   return PA;
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
index 55036dd3e2dcd6..8f119054e6c0b0 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -25,4 +25,4 @@ class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H