[llvm-branch-commits] [llvm] 815dd4b - [AArch64] Add Cortex CPU subtarget features for instruction fusion.
Sjoerd Meijer via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jan 25 01:16:37 PST 2021
Author: Sjoerd Meijer
Date: 2021-01-25T09:11:29Z
New Revision: 815dd4b2920887741f905c5922e5bbf935348cce
URL: https://github.com/llvm/llvm-project/commit/815dd4b2920887741f905c5922e5bbf935348cce
DIFF: https://github.com/llvm/llvm-project/commit/815dd4b2920887741f905c5922e5bbf935348cce.diff
LOG: [AArch64] Add Cortex CPU subtarget features for instruction fusion.
This adds subtarget features for AES, literal, and compare and branch
instruction fusion for different Cortex CPUs.
Patch by: Cassie Jones.
Differential Revision: https://reviews.llvm.org/D94457
Added:
Modified:
llvm/lib/Target/AArch64/AArch64.td
llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/test/CodeGen/AArch64/misched-fusion-addr.ll
llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 15c7130b24f3..762855207d2b 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -218,6 +218,10 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;
+def FeatureCmpBccFusion : SubtargetFeature<
+ "cmp-bcc-fusion", "HasCmpBccFusion", "true",
+ "CPU fuses cmp+bcc operations">;
+
def FeatureFuseAddress : SubtargetFeature<
"fuse-address", "HasFuseAddress", "true",
"CPU fuses address generation and memory operations">;
@@ -615,6 +619,9 @@ def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
FeatureDotProd,
FeatureFPARMv8,
FeatureFullFP16,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeatureRAS,
FeatureRCPC,
@@ -627,6 +634,7 @@ def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon
]>;
@@ -658,6 +666,7 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
HasV8_2aOps,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeatureRCPC,
FeatureCrypto,
@@ -669,7 +678,9 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", [
HasV8_2aOps,
+ FeatureCmpBccFusion,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON, FeatureRCPC,
FeatureCrypto,
FeatureFullFP16,
@@ -680,6 +691,7 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
"CortexA78",
"Cortex-A78 ARM processors", [
HasV8_2aOps,
+ FeatureCmpBccFusion,
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
@@ -696,6 +708,7 @@ def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily",
"CortexA78C",
"Cortex-A78C ARM processors", [
HasV8_2aOps,
+ FeatureCmpBccFusion,
FeatureCrypto,
FeatureDotProd,
FeatureFlagM,
@@ -723,6 +736,7 @@ def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", [
HasV8_2aOps,
+ FeatureCmpBccFusion,
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 9a2103579a6a..f3b8ef16d6f9 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -21,7 +21,7 @@ namespace {
/// CMN, CMP, TST followed by Bcc
static bool isArithmeticBccPair(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
+ const MachineInstr &SecondMI, bool CmpOnly) {
if (SecondMI.getOpcode() != AArch64::Bcc)
return false;
@@ -29,6 +29,13 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
if (FirstMI == nullptr)
return true;
+ // If we're in CmpOnly mode, we only fuse arithmetic instructions that
+ // discard their result.
+ if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR ||
+ FirstMI->getOperand(0).getReg() == AArch64::WZR)) {
+ return false;
+ }
+
switch (FirstMI->getOpcode()) {
case AArch64::ADDSWri:
case AArch64::ADDSWrr:
@@ -380,8 +387,11 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
// All checking functions assume that the 1st instr is a wildcard if it is
// unspecified.
- if (ST.hasArithmeticBccFusion() && isArithmeticBccPair(FirstMI, SecondMI))
- return true;
+ if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) {
+ bool CmpOnly = !ST.hasArithmeticBccFusion();
+ if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly))
+ return true;
+ }
if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b34911eafdd2..8fe2f125982f 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -221,6 +221,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
+ bool HasCmpBccFusion = false;
bool HasFuseAddress = false;
bool HasFuseAES = false;
bool HasFuseArithmeticLogic = false;
@@ -377,6 +378,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+ bool hasCmpBccFusion() const { return HasCmpBccFusion; }
bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll
index 694c8a0d0765..489cc849b908 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll
@@ -1,4 +1,5 @@
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-address | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
index 95a419bd7398..ef01326f376c 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
@@ -2,8 +2,14 @@
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a76 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78c| FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x1 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
index 74180d92089b..be67e9f852e1 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
@@ -1,6 +1,8 @@
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
More information about the llvm-branch-commits
mailing list