[llvm] 518292d - [PowerPC] Add the MacroFusion support for Power8
QingShan Zhang via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 11 22:17:52 PDT 2020
Author: QingShan Zhang
Date: 2020-03-12T05:15:41Z
New Revision: 518292dbdfceb496361b1c92e732e2ccf2a55548
URL: https://github.com/llvm/llvm-project/commit/518292dbdfceb496361b1c92e732e2ccf2a55548
DIFF: https://github.com/llvm/llvm-project/commit/518292dbdfceb496361b1c92e732e2ccf2a55548.diff
LOG: [PowerPC] Add the MacroFusion support for Power8
This patch is intend to implement the missing P8 MacroFusion for LLVM
according to Power8 User's Manual Section 10.1.12 Instruction Fusion
Differential Revision: https://reviews.llvm.org/D70651
Added:
llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
llvm/lib/Target/PowerPC/PPCMacroFusion.def
llvm/lib/Target/PowerPC/PPCMacroFusion.h
llvm/test/CodeGen/PowerPC/macro-fusion.ll
Modified:
llvm/lib/Target/PowerPC/CMakeLists.txt
llvm/lib/Target/PowerPC/PPC.td
llvm/lib/Target/PowerPC/PPCSubtarget.cpp
llvm/lib/Target/PowerPC/PPCSubtarget.h
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 1893d6e32c9a..91021d4e584e 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_target(PowerPCCodeGen
PPCMCInstLower.cpp
PPCMachineFunctionInfo.cpp
PPCMachineScheduler.cpp
+ PPCMacroFusion.cpp
PPCMIPeephole.cpp
PPCRegisterInfo.cpp
PPCQPXLoadSplat.cpp
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index bd6b9dd04181..fc817631e0ac 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -166,6 +166,16 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
+def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
+ "Target supports instruction fusion">;
+def FeatureAddiLoadFusion : SubtargetFeature<"fuse-addi-load",
+ "HasAddiLoadFusion", "true",
+ "Power8 Addi-Load fusion",
+ [FeatureFusion]>;
+def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
+ "HasAddisLoadFusion", "true",
+ "Power8 Addis-Load fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -279,7 +289,8 @@ def ProcessorFeatures {
FeatureDirectMove,
FeatureICBT,
FeaturePartwordAtomic];
- list<SubtargetFeature> P8SpecificFeatures = [];
+ list<SubtargetFeature> P8SpecificFeatures = [FeatureAddiLoadFusion,
+ FeatureAddisLoadFusion];
list<SubtargetFeature> P8InheritableFeatures =
!listconcat(P7InheritableFeatures, P8AdditionalFeatures);
list<SubtargetFeature> P8Features =
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
new file mode 100644
index 000000000000..bde3f5918a23
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -0,0 +1,203 @@
+//===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the PowerPC implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MacroFusion.h"
+
+using namespace llvm;
+namespace {
+
+class FusionFeature {
+public:
+ typedef SmallDenseSet<unsigned> FusionOpSet;
+
+ enum FusionKind {
+ #define FUSION_KIND(KIND) FK_##KIND
+ #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \
+ FUSION_KIND(KIND),
+ #include "PPCMacroFusion.def"
+ FUSION_KIND(END)
+ };
+private:
+ // Each fusion feature is assigned with one fusion kind. All the
+ // instructions with the same fusion kind have the same fusion characteristic.
+ FusionKind Kd;
+ // True if this feature is enabled.
+ bool Supported;
+ // li rx, si
+ // load rt, ra, rx
+ // The dependent operand index in the second op(load). And the negative means
+ // it could be any one.
+ int DepOpIdx;
+ // The first fusion op set.
+ FusionOpSet OpSet1;
+ // The second fusion op set.
+ FusionOpSet OpSet2;
+public:
+ FusionFeature(FusionKind Kind, bool HasFeature, int Index,
+ const FusionOpSet &First, const FusionOpSet &Second) :
+ Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First),
+ OpSet2(Second) {}
+
+ bool hasOp1(unsigned Opc) const { return OpSet1.count(Opc) != 0; }
+ bool hasOp2(unsigned Opc) const { return OpSet2.count(Opc) != 0; }
+ bool isSupported() const { return Supported; }
+ Optional<unsigned> depOpIdx() const {
+ if (DepOpIdx < 0)
+ return None;
+ return DepOpIdx;
+ }
+
+ FusionKind getKind() const { return Kd; }
+};
+
+static bool matchingRegOps(const MachineInstr &FirstMI,
+ int FirstMIOpIndex,
+ const MachineInstr &SecondMI,
+ int SecondMIOpIndex) {
+ const MachineOperand &Op1 = FirstMI.getOperand(FirstMIOpIndex);
+ const MachineOperand &Op2 = SecondMI.getOperand(SecondMIOpIndex);
+ if (!Op1.isReg() || !Op2.isReg())
+ return false;
+
+ return Op1.getReg() == Op2.getReg();
+}
+
+// Return true if the FirstMI meets the constraints of SecondMI according to
+// fusion specification.
+static bool checkOpConstraints(FusionFeature::FusionKind Kd,
+ const MachineInstr &FirstMI,
+ const MachineInstr &SecondMI) {
+ switch (Kd) {
+ // The hardware didn't require any specific check for the fused instructions'
+ // operands. Therefore, return true to indicate that, it is fusable.
+ default: return true;
+ // [addi rt,ra,si - lxvd2x xt,ra,rb] etc.
+ case FusionFeature::FK_AddiLoad: {
+ // lxvd2x(ra) cannot be zero
+ const MachineOperand &RA = SecondMI.getOperand(1);
+ if (!RA.isReg())
+ return true;
+
+ return Register::isVirtualRegister(RA.getReg()) ||
+ (RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8);
+ }
+ // [addis rt,ra,si - ld rt,ds(ra)] etc.
+ case FusionFeature::FK_AddisLoad: {
+ const MachineOperand &RT = SecondMI.getOperand(0);
+ if (!RT.isReg())
+ return true;
+
+ // Only check it for non-virtual register.
+ if (!Register::isVirtualRegister(RT.getReg()))
+ // addis(rt) = ld(ra) = ld(rt)
+ // ld(rt) cannot be zero
+ if (!matchingRegOps(SecondMI, 0, SecondMI, 2) ||
+ (RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8))
+ return false;
+
+ // addis(si) first 12 bits must be all 1s or all 0s
+ const MachineOperand &SI = FirstMI.getOperand(2);
+ if (!SI.isImm())
+ return true;
+ int64_t Imm = SI.getImm();
+ if (((Imm & 0xFFF0) != 0) || ((Imm & 0xFFF0) != 0xFFF0))
+ return false;
+
+ // If si = 1111111111110000 and the msb of the d/ds field of the load equals
+ // 1, then fusion does not occur.
+ if ((Imm & 0xFFF0) == 0xFFF0) {
+ const MachineOperand &D = SecondMI.getOperand(1);
+ if (!D.isImm())
+ return true;
+
+ // 14 bit for DS field, while 16 bit for D field.
+ int MSB = 15;
+ if (SecondMI.getOpcode() == PPC::LD)
+ MSB = 13;
+
+ return (D.getImm() & (1ULL << MSB)) == 0;
+ }
+ return true;
+ }
+ }
+
+ llvm_unreachable("All the cases should have been handled");
+ return true;
+}
+
+/// Check if the instr pair, FirstMI and SecondMI, should be fused together.
+/// Given SecondMI, when FirstMI is unspecified, then check if SecondMI may be
+/// part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in
+ // the def file.
+ using namespace PPC;
+
+ const PPCSubtarget &ST = static_cast<const PPCSubtarget&>(TSI);
+ static const FusionFeature FusionFeatures[] = {
+ #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \
+ FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\
+ { OPSET2 } },
+ #include "PPCMacroFusion.def"
+ };
+ #undef FUSION_KIND
+
+ for (auto &Feature : FusionFeatures) {
+ // Skip if the feature is not supported.
+ if (!Feature.isSupported())
+ continue;
+
+ // Only when the SecondMI is fusable, we are starting to look for the
+ // fusable FirstMI.
+ if (Feature.hasOp2(SecondMI.getOpcode())) {
+ // If FirstMI == nullptr, that means, we're only checking whether SecondMI
+ // can be fused at all.
+ if (!FirstMI)
+ return true;
+
+ // Checking if the FirstMI is fusable with the SecondMI.
+ if (!Feature.hasOp1(FirstMI->getOpcode()))
+ continue;
+
+ auto DepOpIdx = Feature.depOpIdx();
+ if (DepOpIdx.hasValue()) {
+ // Checking if the result of the FirstMI is the desired operand of the
+ // SecondMI if the DepOpIdx is set. Otherwise, ignore it.
+ if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx))
+ return false;
+ }
+
+ // Checking more on the instruction operands.
+ if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // end anonymous namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation () {
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
+
+} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
new file mode 100644
index 000000000000..c7e4e7c22e0a
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -0,0 +1,45 @@
+//=== ---- PPCMacroFusion.def - PowerPC MacroFuson Candidates -v-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https)//llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains descriptions of the macro-fusion pair for PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef FUSION_FEATURE
+
+// Each FUSION_FEATURE is assigned with one TYPE, and can be enabled/disabled
+// by HAS_FEATURE. The instructions pair is fusable only when the opcode
+// of the first instruction is in OPSET1, and the second instruction opcode is
+// in OPSET2. And if DEP_OP_IDX >=0, we will check the result of first OP is
+// the operand of the second op with DEP_OP_IDX as its operand index. We assume
+// that the result of the first op is its operand zero.
+#define FUSION_FEATURE(TYPE, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2)
+
+#endif
+
+#ifndef FUSION_OP_SET
+#define FUSION_OP_SET(...) __VA_ARGS__
+#endif
+
+// Power8 User Manual Section 10.1.12, Instruction Fusion
+// {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx,
+// lvewx, lvx, lxsdx}
+FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL), \
+ FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \
+ LVX, LXSDX))
+
+// {addis) followed by one of these {ld, lbz, lhz, lwz}
+FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \
+ FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8))
+
+#undef FUSION_FEATURE
+#undef FUSION_OP_SET
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.h b/llvm/lib/Target/PowerPC/PPCMacroFusion.h
new file mode 100644
index 000000000000..91cbedf4558f
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.h
@@ -0,0 +1,22 @@
+//===- PPCMacroFusion.h - PowerPC Macro Fusion ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the PowerPC definition of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createPowerPCMacroFusionDAGMutation());
+/// to PPCPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation();
+} // llvm
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 0549df1ec5cc..b0c0f30a56ec 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -111,6 +111,9 @@ void PPCSubtarget::initializeEnvironment() {
IsQPXStackUnaligned = false;
HasHTM = false;
HasFloat128 = false;
+ HasFusion = false;
+ HasAddiLoadFusion = false;
+ HasAddisLoadFusion = false;
IsISA3_0 = false;
UseLongCalls = false;
SecurePlt = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index a9a417106ae1..be061d9ce0a1 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -135,6 +135,9 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
bool HasDirectMove;
bool HasHTM;
bool HasFloat128;
+ bool HasFusion;
+ bool HasAddiLoadFusion;
+ bool HasAddisLoadFusion;
bool IsISA3_0;
bool UseLongCalls;
bool SecurePlt;
@@ -306,6 +309,9 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
bool hasFloat128() const { return HasFloat128; }
bool isISA3_0() const { return IsISA3_0; }
bool useLongCalls() const { return UseLongCalls; }
+ bool hasFusion() const { return HasFusion; }
+ bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
+ bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 8aa1f1bcaef8..5e5df91fc4ab 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
#include "PPCMachineScheduler.h"
+#include "PPCMacroFusion.h"
#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
#include "PPCTargetTransformInfo.h"
@@ -275,6 +276,9 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
std::make_unique<GenericScheduler>(C));
// add DAG Mutations here.
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.hasFusion())
+ DAG->addMutation(createPowerPCMacroFusionDAGMutation());
+
return DAG;
}
@@ -286,6 +290,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
std::make_unique<PPCPostRASchedStrategy>(C) :
std::make_unique<PostGenericScheduler>(C), true);
// add DAG Mutations here.
+ if (ST.hasFusion())
+ DAG->addMutation(createPowerPCMacroFusionDAGMutation());
return DAG;
}
diff --git a/llvm/test/CodeGen/PowerPC/macro-fusion.ll b/llvm/test/CodeGen/PowerPC/macro-fusion.ll
new file mode 100644
index 000000000000..0e9ac85a1861
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/macro-fusion.ll
@@ -0,0 +1,21 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-misched -debug-only=machine-scheduler \
+; RUN: -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK-P8
+
+ at m = local_unnamed_addr global i64 0, align 8
+
+define i64 @fuse_addis_ld() {
+entry:
+; CHECK-P8: ********** MI Scheduling **********
+; CHECK-P8-LABEL: fuse_addis_ld:%bb.0 entry
+; CHECK-P8: Macro fuse: SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) / ADDIStocHA8 - LD
+; CHECK-P8: SU([[SU0]]): %[[REG3:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @m
+; CHECK-P8: SU([[SU1]]): %{{[0-9]+}}:g8rc = LD target-flags(ppc-toc-lo) @m, %[[REG3]]
+; CHECK-P8: ********** MI Scheduling **********
+; CHECK-P8-LABEL: fuse_addis_ld:%bb.0 entry
+; CHECK-P8: Macro fuse: SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) / ADDIStocHA8 - LD
+; CHECK-P8: SU([[SU0]]): renamable $x[[REG3:[0-9]+]] = ADDIStocHA8 $x2, @m
+; CHECK-P8: SU([[SU1]]): renamable $x[[REG3]] = LD target-flags(ppc-toc-lo) @m, renamable $x[[REG3]]
+ %0 = load i64, i64* @m, align 8
+ ret i64 %0
+}
More information about the llvm-commits
mailing list