[llvm] r283243 - AArch64: Macrofusion: Split features, add missing combinations.
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 4 12:28:21 PDT 2016
Author: matze
Date: Tue Oct 4 14:28:21 2016
New Revision: 283243
URL: http://llvm.org/viewvc/llvm-project?rev=283243&view=rev
Log:
AArch64: Macrofusion: Split features, add missing combinations.
AArch64InstrInfo::shouldScheduleAdjacent() determines whether two
instruction can benefit from macroop fusion on apple CPUs. The list
turned out to be incomplete:
- the "rr" variants of the instructions were missing
- even the "rs" variants can have shift value == 0 and behave like the
"rr" variants
This also splits the MacropFusion target feature into
ArithmeticBccFusion and ArithmeticCbzFusion.
Differential Revision: https://reviews.llvm.org/D25142
Modified:
llvm/trunk/lib/Target/AArch64/AArch64.td
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
llvm/trunk/test/CodeGen/AArch64/misched-fusion.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=283243&r1=283242&r2=283243&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Tue Oct 4 14:28:21 2016
@@ -94,9 +94,13 @@ def FeatureAlternateSExtLoadCVTF32Patter
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
"true", "Use alternative pattern for sextload convert to f32">;
-def FeatureMacroOpFusion : SubtargetFeature<
- "macroop-fusion", "HasMacroOpFusion", "true",
- "CPU supports macro op fusion">;
+def FeatureArithmeticBccFusion : SubtargetFeature<
+ "arith-bcc-fusion", "HasArithmeticBccFusion", "true",
+ "CPU fuses arithmetic+bcc operations">;
+
+def FeatureArithmeticCbzFusion : SubtargetFeature<
+ "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
+ "CPU fuses arithmetic + cbz/cbnz operations">;
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
@@ -204,7 +208,8 @@ def ProcCyclone : SubtargetFeature<"cycl
FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
FeatureFPARMv8,
- FeatureMacroOpFusion,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
FeatureNEON,
FeaturePerfMon,
FeatureSlowMisaligned128Store,
@@ -244,7 +249,7 @@ def ProcVulcan : SubtargetFeature<"vulc
FeatureCRC,
FeatureCrypto,
FeatureFPARMv8,
- FeatureMacroOpFusion,
+ FeatureArithmeticBccFusion,
FeatureNEON,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=283243&r1=283242&r2=283243&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Tue Oct 4 14:28:21 2016
@@ -1876,39 +1876,80 @@ bool AArch64InstrInfo::shouldClusterMemO
bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
MachineInstr &Second) const {
- if (Subtarget.hasMacroOpFusion()) {
+ if (Subtarget.hasArithmeticBccFusion()) {
// Fuse CMN, CMP, TST followed by Bcc.
unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::Bcc) {
switch (First.getOpcode()) {
default:
return false;
- case AArch64::SUBSWri:
case AArch64::ADDSWri:
- case AArch64::ANDSWri:
- case AArch64::SUBSXri:
+ case AArch64::ADDSWrr:
case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !hasShiftedReg(Second);
}
}
+ }
+ if (Subtarget.hasArithmeticCbzFusion()) {
// Fuse ALU operations followed by CBZ/CBNZ.
+ unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
switch (First.getOpcode()) {
default:
return false;
case AArch64::ADDWri:
+ case AArch64::ADDWrr:
case AArch64::ADDXri:
+ case AArch64::ADDXrr:
case AArch64::ANDWri:
+ case AArch64::ANDWrr:
case AArch64::ANDXri:
+ case AArch64::ANDXrr:
case AArch64::EORWri:
+ case AArch64::EORWrr:
case AArch64::EORXri:
+ case AArch64::EORXrr:
case AArch64::ORRWri:
+ case AArch64::ORRWrr:
case AArch64::ORRXri:
+ case AArch64::ORRXrr:
case AArch64::SUBWri:
+ case AArch64::SUBWrr:
case AArch64::SUBXri:
+ case AArch64::SUBXrr:
return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !hasShiftedReg(Second);
}
}
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=283243&r1=283242&r2=283243&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Tue Oct 4 14:28:21 2016
@@ -80,7 +80,8 @@ protected:
bool Misaligned128StoreIsSlow = false;
bool AvoidQuadLdStPairs = false;
bool UseAlternateSExtLoadCVTF32Pattern = false;
- bool HasMacroOpFusion = false;
+ bool HasArithmeticBccFusion = false;
+ bool HasArithmeticCbzFusion = false;
bool DisableLatencySchedHeuristic = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
@@ -188,7 +189,8 @@ public:
bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern;
}
- bool hasMacroOpFusion() const { return HasMacroOpFusion; }
+ bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
+ bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
Modified: llvm/trunk/test/CodeGen/AArch64/misched-fusion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/misched-fusion.ll?rev=283243&r1=283242&r2=283243&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/misched-fusion.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/misched-fusion.ll Tue Oct 4 14:28:21 2016
@@ -1,4 +1,4 @@
-; RUN: llc -o - %s -mattr=+macroop-fusion,+use-postra-scheduler | FileCheck %s
+; RUN: llc -o - %s -mattr=+arith-cbz-fusion,+use-postra-scheduler | FileCheck %s
; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s
target triple = "arm64-apple-ios"
More information about the llvm-commits
mailing list