[llvm] r323782 - [AArch64] Add new target feature to fuse address generation with load or store
Evandro Menezes via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 30 08:28:01 PST 2018
Author: evandro
Date: Tue Jan 30 08:28:01 2018
New Revision: 323782
URL: http://llvm.org/viewvc/llvm-project?rev=323782&view=rev
Log:
[AArch64] Add new target feature to fuse address generation with load or store
This feature enables the fusion of the address generation and a
corresponding load or store together.
Differential revision: https://reviews.llvm.org/D42393
Added:
llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64.td
llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp
llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=323782&r1=323781&r2=323782&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Tue Jan 30 08:28:01 2018
@@ -120,6 +120,10 @@ def FeatureArithmeticCbzFusion : Subtarg
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;
+def FeatureFuseAddress : SubtargetFeature<
+ "fuse-address", "HasFuseAddress", "true",
+ "CPU fuses address generation and memory operations">;
+
def FeatureFuseAES : SubtargetFeature<
"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
@@ -346,6 +350,7 @@ def ProcExynosM3 : SubtargetFeature<"exy
FeatureCrypto,
FeatureExynosCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseLiterals,
FeatureLSLFast,
Modified: llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp?rev=323782&r1=323781&r2=323782&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp Tue Jan 30 08:28:01 2018
@@ -150,6 +150,39 @@ static bool shouldScheduleAdjacent(const
SecondMI.getOperand(3).getImm() == 48);
}
+ if (ST.hasFuseAddress()) {
+ // Fuse address generation and loads and stores.
+ if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
+ FirstOpcode == AArch64::ADR ||
+ FirstOpcode == AArch64::ADRP) &&
+ ((SecondOpcode == AArch64::STRBBui ||
+ SecondOpcode == AArch64::STRBui ||
+ SecondOpcode == AArch64::STRDui ||
+ SecondOpcode == AArch64::STRHHui ||
+ SecondOpcode == AArch64::STRHui ||
+ SecondOpcode == AArch64::STRQui ||
+ SecondOpcode == AArch64::STRSui ||
+ SecondOpcode == AArch64::STRWui ||
+ SecondOpcode == AArch64::STRXui ||
+ SecondOpcode == AArch64::LDRBBui ||
+ SecondOpcode == AArch64::LDRBui ||
+ SecondOpcode == AArch64::LDRDui ||
+ SecondOpcode == AArch64::LDRHHui ||
+ SecondOpcode == AArch64::LDRHui ||
+ SecondOpcode == AArch64::LDRQui ||
+ SecondOpcode == AArch64::LDRSBWui ||
+ SecondOpcode == AArch64::LDRSBXui ||
+ SecondOpcode == AArch64::LDRSHWui ||
+ SecondOpcode == AArch64::LDRSHXui ||
+ SecondOpcode == AArch64::LDRSWui ||
+ SecondOpcode == AArch64::LDRSui ||
+ SecondOpcode == AArch64::LDRWui ||
+ SecondOpcode == AArch64::LDRXui) &&
+ (FirstOpcode != AArch64::ADR ||
+ SecondMI.getOperand(2).getImm() == 0)))
+ return true;
+ }
+
return false;
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=323782&r1=323781&r2=323782&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Tue Jan 30 08:28:01 2018
@@ -111,6 +111,7 @@ protected:
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
+ bool HasFuseAddress = false;
bool HasFuseAES = false;
bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
@@ -236,6 +237,7 @@ public:
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+ bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
Added: llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll?rev=323782&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll Tue Jan 30 08:28:01 2018
@@ -0,0 +1,112 @@
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-address | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s
+
+target triple = "aarch64-unknown"
+
+ at var_8bit = global i8 0
+ at var_16bit = global i16 0
+ at var_32bit = global i32 0
+ at var_64bit = global i64 0
+ at var_128bit = global i128 0
+ at var_half = global half 0.0
+ at var_float = global float 0.0
+ at var_double = global double 0.0
+ at var_double2 = global <2 x double> <double 0.0, double 0.0>
+
+define void @ldst_8bit() {
+ %val8 = load volatile i8, i8* @var_8bit
+ %ext = zext i8 %val8 to i64
+ %add = add i64 %ext, 1
+ %val16 = trunc i64 %add to i16
+ store volatile i16 %val16, i16* @var_16bit
+ ret void
+
+; CHECK-LABEL: ldst_8bit:
+; CHECK: adrp [[RB:x[0-9]+]], var_8bit
+; CHECK-NEXT: ldrb {{w[0-9]+}}, {{\[}}[[RB]], {{#?}}:lo12:var_8bit{{\]}}
+; CHECK: adrp [[RH:x[0-9]+]], var_16bit
+; CHECK-NEXT: strh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}}
+}
+
+define void @ldst_16bit() {
+ %val16 = load volatile i16, i16* @var_16bit
+ %ext = zext i16 %val16 to i64
+ %add = add i64 %ext, 1
+ %val32 = trunc i64 %add to i32
+ store volatile i32 %val32, i32* @var_32bit
+ ret void
+
+; CHECK-LABEL: ldst_16bit:
+; CHECK: adrp [[RH:x[0-9]+]], var_16bit
+; CHECK-NEXT: ldrh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}}
+; CHECK: adrp [[RW:x[0-9]+]], var_32bit
+; CHECK-NEXT: str {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}}
+}
+
+define void @ldst_32bit() {
+ %val32 = load volatile i32, i32* @var_32bit
+ %ext = zext i32 %val32 to i64
+ %val64 = add i64 %ext, 1
+ store volatile i64 %val64, i64* @var_64bit
+ ret void
+
+; CHECK-LABEL: ldst_32bit:
+; CHECK: adrp [[RW:x[0-9]+]], var_32bit
+; CHECK-NEXT: ldr {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}}
+; CHECK: adrp [[RL:x[0-9]+]], var_64bit
+; CHECK-NEXT: str {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}}
+}
+
+define void @ldst_64bit() {
+ %val64 = load volatile i64, i64* @var_64bit
+ %ext = zext i64 %val64 to i128
+ %val128 = add i128 %ext, 1
+ store volatile i128 %val128, i128* @var_128bit
+ ret void
+
+; CHECK-LABEL: ldst_64bit:
+; CHECK: adrp [[RL:x[0-9]+]], var_64bit
+; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}}
+; CHECK: adrp [[RQ:x[0-9]+]], var_128bit
+; CHECK-NEXT: add {{x[0-9]+}}, [[RQ]], {{#?}}:lo12:var_128bit
+}
+
+define void @ldst_half() {
+ %valh = load volatile half, half* @var_half
+ %valf = fpext half %valh to float
+ store volatile float %valf, float* @var_float
+ ret void
+
+; CHECK-LABEL: ldst_half:
+; CHECK: adrp [[RH:x[0-9]+]], var_half
+; CHECK-NEXT: ldr {{h[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_half{{\]}}
+; CHECK: adrp [[RF:x[0-9]+]], var_float
+; CHECK-NEXT: str {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}}
+}
+
+define void @ldst_float() {
+ %valf = load volatile float, float* @var_float
+ %vald = fpext float %valf to double
+ store volatile double %vald, double* @var_double
+ ret void
+
+; CHECK-LABEL: ldst_float:
+; CHECK: adrp [[RF:x[0-9]+]], var_float
+; CHECK-NEXT: ldr {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}}
+; CHECK: adrp [[RD:x[0-9]+]], var_double
+; CHECK-NEXT: str {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
+}
+
+define void @ldst_double() {
+ %vald = load volatile double, double* @var_double
+ %val = insertelement <2 x double> undef, double %vald, i32 0
+ %vald2 = insertelement <2 x double> %val, double %vald, i32 1
+ store volatile <2 x double> %vald2, <2 x double>* @var_double2
+ ret void
+
+; CHECK-LABEL: ldst_double:
+; CHECK: adrp [[RD:x[0-9]+]], var_double
+; CHECK-NEXT: add {{x[0-9]+}}, [[RD]], {{#?}}:lo12:var_double
+; CHECK: adrp [[RQ:x[0-9]+]], var_double2
+; CHECK-NEXT: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
+}
More information about the llvm-commits
mailing list