[llvm] AMDGPU/NewPM Port GCNDPPCombine to NPM (PR #105816)
Akshat Oke via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 23:44:22 PDT 2024
https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/105816
>From 28fee48eea727831b954064e91a20b7bcc41dd1f Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Fri, 23 Aug 2024 11:28:09 +0000
Subject: [PATCH 1/5] AMDGPU/NewPM Port GCNDPPCombine to NPM
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 4 +-
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 +
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 5 +-
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 51 ++++++++++++-----
llvm/lib/Target/AMDGPU/GCNDPPCombine.h | 55 +++++++++++++++++++
llvm/test/CodeGen/AMDGPU/dpp64_combine.mir | 2 +
llvm/test/CodeGen/AMDGPU/dpp_combine.mir | 1 +
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 3 +
.../AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir | 1 +
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir | 1 +
10 files changed, 106 insertions(+), 18 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/GCNDPPCombine.h
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6eb641db076958..717e5f511ef2f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -157,8 +157,8 @@ struct AMDGPULowerBufferFatPointersPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;
-void initializeGCNDPPCombinePass(PassRegistry &);
-extern char &GCNDPPCombineID;
+void initializeGCNDPPCombineLegacyPass(PassRegistry &);
+extern char &GCNDPPCombineLegacyID;
void initializeSIFoldOperandsLegacyPass(PassRegistry &);
extern char &SIFoldOperandsLegacyID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 10e394ed03df8f..9976a8199d7047 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -98,4 +98,5 @@ MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
+MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
#undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 86cc9d1ecde817..a769bc9e486573 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,6 +28,7 @@
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
+#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
@@ -403,7 +404,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeR600VectorRegMergerPass(*PR);
initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
- initializeGCNDPPCombinePass(*PR);
+ initializeGCNDPPCombineLegacyPass(*PR);
initializeSILowerI1CopiesLegacyPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeSILowerWWMCopiesPass(*PR);
@@ -1273,7 +1274,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsLegacyID);
if (EnableDPPCombine)
- addPass(&GCNDPPCombineID);
+ addPass(&GCNDPPCombineLegacyID);
addPass(&SILoadStoreOptimizerID);
if (isPassEnabled(EnableSDWAPeephole)) {
addPass(&SIPeepholeSDWAID);
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 0ac079c69e605f..ce67cf077ce1f9 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -37,6 +37,7 @@
// The mov_dpp instruction should reside in the same BB as all its uses
//===----------------------------------------------------------------------===//
+#include "GCNDPPCombine.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -51,7 +52,7 @@ STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
namespace {
-class GCNDPPCombine : public MachineFunctionPass {
+class GCNDPPCombine {
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const GCNSubtarget *ST;
@@ -76,12 +77,17 @@ class GCNDPPCombine : public MachineFunctionPass {
bool combineDPPMov(MachineInstr &MI) const;
+ int getDPPOp(unsigned Op, bool IsShrinkable) const;
+ bool isShrinkable(MachineInstr &MI) const;
+
+public:
+ bool run(MachineFunction &MF);
+};
+class GCNDPPCombineLegacy : public MachineFunctionPass {
public:
static char ID;
- GCNDPPCombine() : MachineFunctionPass(ID) {
- initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
- }
+ GCNDPPCombineLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -96,22 +102,19 @@ class GCNDPPCombine : public MachineFunctionPass {
return MachineFunctionProperties()
.set(MachineFunctionProperties::Property::IsSSA);
}
-
-private:
- int getDPPOp(unsigned Op, bool IsShrinkable) const;
- bool isShrinkable(MachineInstr &MI) const;
};
} // end anonymous namespace
-INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+INITIALIZE_PASS(GCNDPPCombineLegacy, DEBUG_TYPE, "GCN DPP Combine", false,
+ false)
-char GCNDPPCombine::ID = 0;
+char GCNDPPCombineLegacy::ID = 0;
-char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+char &llvm::GCNDPPCombineLegacyID = GCNDPPCombineLegacy::ID;
FunctionPass *llvm::createGCNDPPCombinePass() {
- return new GCNDPPCombine();
+ return new GCNDPPCombineLegacy();
}
bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
@@ -749,9 +752,17 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
return !Rollback;
}
-bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction())) {
+ return false;
+ }
+ GCNDPPCombine Impl;
+ return Impl.run(MF);
+}
+
+bool GCNDPPCombine::run(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
- if (!ST->hasDPP() || skipFunction(MF.getFunction()))
+ if (!ST->hasDPP())
return false;
MRI = &MF.getRegInfo();
@@ -781,3 +792,15 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
}
return Changed;
}
+
+PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ GCNDPPCombine Impl;
+ bool Changed = Impl.run(MF);
+ if (!Changed) {
+ return PreservedAnalyses::all();
+ }
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
new file mode 100644
index 00000000000000..922e5b4de18e15
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -0,0 +1,55 @@
+//=======- GCNDPPCombine.h - optimization for DPP instructions ---==========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
+// operand. If any of the use instruction cannot be combined with the mov the
+// whole sequence is reverted.
+//
+// $old = ...
+// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
+// dpp_controls..., $row_mask, $bank_mask,
+// $bound_ctrl
+// $res = VALU $dpp_value [, src1]
+//
+// to
+//
+// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
+// dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
+//
+// Combining rules :
+//
+// if $row_mask and $bank_mask are fully enabled (0xF) and
+// $bound_ctrl==DPP_BOUND_ZERO or $old==0
+// -> $combined_old = undef,
+// $combined_bound_ctrl = DPP_BOUND_ZERO
+//
+// if the VALU op is binary and
+// $bound_ctrl==DPP_BOUND_OFF and
+// $old==identity value (immediate) for the VALU op
+// -> $combined_old = src1,
+// $combined_bound_ctrl = DPP_BOUND_OFF
+//
+// Otherwise cancel.
+//
+// The mov_dpp instruction should reside in the same BB as all its uses
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNDPPCombinePass : public MachinePassInfoMixin<GCNDPPCombinePass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MAM);
+};
+
+} // end namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index 9a6a54bbc4e497..ecff9683bac732 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,5 +1,7 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
---
# GCN-LABEL: name: dpp64_old_impdef
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index a1c3970a5bae90..179d0becf6693a 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
---
# old is undefined: only combine when masks are fully enabled and
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 1151bde02ef62c..3c20fdff9fbf66 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,9 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
---
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index 7e286a4dd678eb..b1e23808e91a9b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
---
name: test_cvt_f32_bf8_byte0
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 123893674ff5e9..a927b836280c2f 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
---
>From 51eb2d1be26864fd65520f002f16e04a200a3828 Mon Sep 17 00:00:00 2001
From: Akshat Oke <76596238+Akshat-Oke at users.noreply.github.com>
Date: Sun, 25 Aug 2024 20:39:16 +0530
Subject: [PATCH 2/5] Replace -run-pass with -passes in all but one test
---
llvm/test/CodeGen/AMDGPU/dpp64_combine.mir | 2 --
llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 3 ---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir | 1 -
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir | 1 -
4 files changed, 7 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index ecff9683bac732..d16d45eef1e897 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,6 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
---
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 3c20fdff9fbf66..43355dc694dc62 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,8 +1,5 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
---
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index b1e23808e91a9b..324c0037511e1c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,5 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index a927b836280c2f..3c1b3c95513169 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -1,5 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
---
>From b3354db914787c7b4040b0556962427ae4f3f195 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Mon, 26 Aug 2024 05:40:42 +0000
Subject: [PATCH 3/5] newlines and PassInfoMixin
---
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNDPPCombine.h | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index ce67cf077ce1f9..e167a581bd08ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -803,4 +803,4 @@ PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
return PA;
-}
\ No newline at end of file
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
index 922e5b4de18e15..8caccf119bdb19 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -44,7 +44,7 @@
#include "llvm/CodeGen/MachinePassManager.h"
namespace llvm {
-class GCNDPPCombinePass : public MachinePassInfoMixin<GCNDPPCombinePass> {
+class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MAM);
@@ -52,4 +52,4 @@ class GCNDPPCombinePass : public MachinePassInfoMixin<GCNDPPCombinePass> {
} // end namespace llvm
-#endif
\ No newline at end of file
+#endif
>From 59d603d12e30d01e9b8f67e073ab8fe348ff4096 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 28 Aug 2024 09:02:45 +0000
Subject: [PATCH 4/5] Apply suggestions
---
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 7 ++--
llvm/lib/Target/AMDGPU/GCNDPPCombine.h | 39 +++----------------
llvm/test/CodeGen/AMDGPU/dpp64_combine.mir | 4 +-
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 6 +--
.../AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir | 1 +
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir | 2 +-
6 files changed, 16 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index e167a581bd08ea..eb6186eaf55ef5 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -83,6 +83,7 @@ class GCNDPPCombine {
public:
bool run(MachineFunction &MF);
};
+
class GCNDPPCombineLegacy : public MachineFunctionPass {
public:
static char ID;
@@ -756,8 +757,7 @@ bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction())) {
return false;
}
- GCNDPPCombine Impl;
- return Impl.run(MF);
+ return GCNDPPCombine().run(MF);
}
bool GCNDPPCombine::run(MachineFunction &MF) {
@@ -795,8 +795,7 @@ bool GCNDPPCombine::run(MachineFunction &MF) {
PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &) {
- GCNDPPCombine Impl;
- bool Changed = Impl.run(MF);
+ bool Changed = GCNDPPCombine().run(MF);
if (!Changed) {
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
index 8caccf119bdb19..55036dd3e2dcd6 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -1,42 +1,10 @@
-//=======- GCNDPPCombine.h - optimization for DPP instructions ---==========//
+//=======--- GCNDPPCombine.h - optimization for DPP instructions ---==========//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
-// operand. If any of the use instruction cannot be combined with the mov the
-// whole sequence is reverted.
-//
-// $old = ...
-// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
-// dpp_controls..., $row_mask, $bank_mask,
-// $bound_ctrl
-// $res = VALU $dpp_value [, src1]
-//
-// to
-//
-// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
-// dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
-//
-// Combining rules :
-//
-// if $row_mask and $bank_mask are fully enabled (0xF) and
-// $bound_ctrl==DPP_BOUND_ZERO or $old==0
-// -> $combined_old = undef,
-// $combined_bound_ctrl = DPP_BOUND_ZERO
-//
-// if the VALU op is binary and
-// $bound_ctrl==DPP_BOUND_OFF and
-// $old==identity value (immediate) for the VALU op
-// -> $combined_old = src1,
-// $combined_bound_ctrl = DPP_BOUND_OFF
-//
-// Otherwise cancel.
-//
-// The mov_dpp instruction should reside in the same BB as all its uses
-//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
#define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
@@ -48,6 +16,11 @@ class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MAM);
+
+ MachineFunctionProperties getRequiredProperties() {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index d16d45eef1e897..9a6a54bbc4e497 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,5 +1,5 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
-# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
---
# GCN-LABEL: name: dpp64_old_impdef
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 43355dc694dc62..1151bde02ef62c 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,6 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
---
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
index 324c0037511e1c..b1e23808e91a9b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir
@@ -1,4 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -passes=gcn-dpp-combine %s -o - | FileCheck -check-prefix=GFX12 %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 3c1b3c95513169..123893674ff5e9 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
---
>From eecdcd6a74ad3a1e556a59c5b0f5120a7c1bc24b Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Thu, 29 Aug 2024 06:38:11 +0000
Subject: [PATCH 5/5] Add MFPropsModifier and apply suggestions
---
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 13 +++++++++----
llvm/lib/Target/AMDGPU/GCNDPPCombine.h | 2 +-
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index eb6186eaf55ef5..3e1a79062ff0cc 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -754,9 +754,9 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
}
bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction())) {
+ if (skipFunction(MF.getFunction()))
return false;
- }
+
return GCNDPPCombine().run(MF);
}
@@ -795,10 +795,15 @@ bool GCNDPPCombine::run(MachineFunction &MF) {
PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &) {
+ if (MF.getFunction().hasOptNone())
+ return PreservedAnalyses::all();
+
+ MFPropsModifier _(*this, MF);
+
bool Changed = GCNDPPCombine().run(MF);
- if (!Changed) {
+ if (!Changed)
return PreservedAnalyses::all();
- }
+
auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
return PA;
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
index 55036dd3e2dcd6..8f119054e6c0b0 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.h
@@ -25,4 +25,4 @@ class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
More information about the llvm-commits
mailing list