[llvm] fdca2c3 - AMDGPU/NewPM Port GCNDPPCombine to NPM (#105816)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 02:19:56 PDT 2024
Author: Akshat Oke
Date: 2024-08-29T14:49:52+05:30
New Revision: fdca2c33a1f33f4886d969ea0f0219764c7b6b59
URL: https://github.com/llvm/llvm-project/commit/fdca2c33a1f33f4886d969ea0f0219764c7b6b59
DIFF: https://github.com/llvm/llvm-project/commit/fdca2c33a1f33f4886d969ea0f0219764c7b6b59.diff
LOG: AMDGPU/NewPM Port GCNDPPCombine to NPM (#105816)
Co-authored-by: Akshat Oke <Akshat.Oke at amd.com>
Added:
llvm/lib/Target/AMDGPU/GCNDPPCombine.h
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.h
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
llvm/test/CodeGen/AMDGPU/dpp_combine.mir
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6eb641db076958..717e5f511ef2f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -157,8 +157,8 @@ struct AMDGPULowerBufferFatPointersPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;
-void initializeGCNDPPCombinePass(PassRegistry &);
-extern char &GCNDPPCombineID;
+void initializeGCNDPPCombineLegacyPass(PassRegistry &);
+extern char &GCNDPPCombineLegacyID;
void initializeSIFoldOperandsLegacyPass(PassRegistry &);
extern char &SIFoldOperandsLegacyID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 10e394ed03df8f..9976a8199d7047 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -98,4 +98,5 @@ MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
+MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
#undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 86cc9d1ecde817..a769bc9e486573 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,6 +28,7 @@
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
+#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
@@ -403,7 +404,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeR600VectorRegMergerPass(*PR);
initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
- initializeGCNDPPCombinePass(*PR);
+ initializeGCNDPPCombineLegacyPass(*PR);
initializeSILowerI1CopiesLegacyPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeSILowerWWMCopiesPass(*PR);
@@ -1273,7 +1274,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsLegacyID);
if (EnableDPPCombine)
- addPass(&GCNDPPCombineID);
+ addPass(&GCNDPPCombineLegacyID);
addPass(&SILoadStoreOptimizerID);
if (isPassEnabled(EnableSDWAPeephole)) {
addPass(&SIPeepholeSDWAID);
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 0ac079c69e605f..3e1a79062ff0cc 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -37,6 +37,7 @@
// The mov_dpp instruction should reside in the same BB as all its uses
//===----------------------------------------------------------------------===//
+#include "GCNDPPCombine.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -51,7 +52,7 @@ STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
namespace {
-class GCNDPPCombine : public MachineFunctionPass {
+class GCNDPPCombine {
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const GCNSubtarget *ST;
@@ -76,12 +77,18 @@ class GCNDPPCombine : public MachineFunctionPass {
bool combineDPPMov(MachineInstr &MI) const;
+ int getDPPOp(unsigned Op, bool IsShrinkable) const;
+ bool isShrinkable(MachineInstr &MI) const;
+
+public:
+ bool run(MachineFunction &MF);
+};
+
+class GCNDPPCombineLegacy : public MachineFunctionPass {
public:
static char ID;
- GCNDPPCombine() : MachineFunctionPass(ID) {
- initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
- }
+ GCNDPPCombineLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -96,22 +103,19 @@ class GCNDPPCombine : public MachineFunctionPass {
return MachineFunctionProperties()
.set(MachineFunctionProperties::Property::IsSSA);
}
-
-private:
- int getDPPOp(unsigned Op, bool IsShrinkable) const;
- bool isShrinkable(MachineInstr &MI) const;
};
} // end anonymous namespace
-INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+INITIALIZE_PASS(GCNDPPCombineLegacy, DEBUG_TYPE, "GCN DPP Combine", false,
+ false)
-char GCNDPPCombine::ID = 0;
+char GCNDPPCombineLegacy::ID = 0;
-char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+char &llvm::GCNDPPCombineLegacyID = GCNDPPCombineLegacy::ID;
FunctionPass *llvm::createGCNDPPCombinePass() {
- return new GCNDPPCombine();
+ return new GCNDPPCombineLegacy();
}
bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
@@ -749,9 +753,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
return !Rollback;
}
-bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ return GCNDPPCombine().run(MF);
+}
+
+bool GCNDPPCombine::run(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
- if (!ST->hasDPP() || skipFunction(MF.getFunction()))
+ if (!ST->hasDPP())
return false;
MRI = &MF.getRegInfo();
@@ -781,3 +792,19 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
}
return Changed;
}
+
+PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ if (MF.getFunction().hasOptNone())
+ return PreservedAnalyses::all();
+
+ MFPropsModifier _(*this, MF);
+
+ bool Changed = GCNDPPCombine().run(MF);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
new file mode 100644
index 00000000000000..8f119054e6c0b0
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -0,0 +1,28 @@
+//=======--- GCNDPPCombine.h - optimization for DPP instructions ---==========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MAM);
+
+ MachineFunctionProperties getRequiredProperties() {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index a1c3970a5bae90..179d0becf6693a 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
---
# old is undefined: only combine when masks are fully enabled and
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index 7e286a4dd678eb..b1e23808e91a9b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
---
name: test_cvt_f32_bf8_byte0
More information about the llvm-commits
mailing list