[llvm] r323782 - [AArch64] Add new target feature to fuse address generation with load or store

Tue Jan 30 08:28:01 PST 2018

Author: evandro
Date: Tue Jan 30 08:28:01 2018
New Revision: 323782

URL: http://llvm.org/viewvc/llvm-project?rev=323782&view=rev
Log:
[AArch64] Add new target feature to fuse address generation with load or store

This feature enables the fusion of the address generation and a
corresponding load or store together.

Differential revision: https://reviews.llvm.org/D42393

Added:
    llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64.td
    llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp
    llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h

Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=323782&r1=323781&r2=323782&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Tue Jan 30 08:28:01 2018
@@ -120,6 +120,10 @@ def FeatureArithmeticCbzFusion : Subtarg
     "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
     "CPU fuses arithmetic + cbz/cbnz operations">;
 
+def FeatureFuseAddress : SubtargetFeature<
+    "fuse-address", "HasFuseAddress", "true",
+    "CPU fuses address generation and memory operations">;
+
 def FeatureFuseAES : SubtargetFeature<
     "fuse-aes", "HasFuseAES", "true",
     "CPU fuses AES crypto operations">;
@@ -346,6 +350,7 @@ def ProcExynosM3 : SubtargetFeature<"exy
                                      FeatureCrypto,
                                      FeatureExynosCheapAsMoveHandling,
                                      FeatureFPARMv8,
+                                     FeatureFuseAddress,
                                      FeatureFuseAES,
                                      FeatureFuseLiterals,
                                      FeatureLSLFast,

Modified: llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp?rev=323782&r1=323781&r2=323782&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp Tue Jan 30 08:28:01 2018
@@ -150,6 +150,39 @@ static bool shouldScheduleAdjacent(const
               SecondMI.getOperand(3).getImm() == 48);
     }
 
+  if (ST.hasFuseAddress()) {
+    // Fuse address generation and loads and stores.
+    if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
+         FirstOpcode == AArch64::ADR ||
+         FirstOpcode == AArch64::ADRP) &&
+        ((SecondOpcode == AArch64::STRBBui ||
+          SecondOpcode == AArch64::STRBui ||
+          SecondOpcode == AArch64::STRDui ||
+          SecondOpcode == AArch64::STRHHui ||
+          SecondOpcode == AArch64::STRHui ||
+          SecondOpcode == AArch64::STRQui ||
+          SecondOpcode == AArch64::STRSui ||
+          SecondOpcode == AArch64::STRWui ||
+          SecondOpcode == AArch64::STRXui ||
+          SecondOpcode == AArch64::LDRBBui ||
+          SecondOpcode == AArch64::LDRBui ||
+          SecondOpcode == AArch64::LDRDui ||
+          SecondOpcode == AArch64::LDRHHui ||
+          SecondOpcode == AArch64::LDRHui ||
+          SecondOpcode == AArch64::LDRQui ||
+          SecondOpcode == AArch64::LDRSBWui ||
+          SecondOpcode == AArch64::LDRSBXui ||
+          SecondOpcode == AArch64::LDRSHWui ||
+          SecondOpcode == AArch64::LDRSHXui ||
+          SecondOpcode == AArch64::LDRSWui ||
+          SecondOpcode == AArch64::LDRSui ||
+          SecondOpcode == AArch64::LDRWui ||
+          SecondOpcode == AArch64::LDRXui) &&
+         (FirstOpcode != AArch64::ADR ||
+          SecondMI.getOperand(2).getImm() == 0)))
+      return true;
+  }
+
   return false;
 }
 

Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=323782&r1=323781&r2=323782&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Tue Jan 30 08:28:01 2018
@@ -111,6 +111,7 @@ protected:
   bool UseAlternateSExtLoadCVTF32Pattern = false;
   bool HasArithmeticBccFusion = false;
   bool HasArithmeticCbzFusion = false;
+  bool HasFuseAddress = false;
   bool HasFuseAES = false;
   bool HasFuseLiterals = false;
   bool DisableLatencySchedHeuristic = false;
@@ -236,6 +237,7 @@ public:
   }
   bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
   bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+  bool hasFuseAddress() const { return HasFuseAddress; }
   bool hasFuseAES() const { return HasFuseAES; }
   bool hasFuseLiterals() const { return HasFuseLiterals; }
 

Added: llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll?rev=323782&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/misched-fusion-addr.ll Tue Jan 30 08:28:01 2018
@@ -0,0 +1,112 @@
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-address | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3     | FileCheck %s
+
+target triple = "aarch64-unknown"
+
+ at var_8bit = global i8 0
+ at var_16bit = global i16 0
+ at var_32bit = global i32 0
+ at var_64bit = global i64 0
+ at var_128bit = global i128 0
+ at var_half = global half 0.0
+ at var_float = global float 0.0
+ at var_double = global double 0.0
+ at var_double2 = global <2 x double> <double 0.0, double 0.0>
+
+define void @ldst_8bit() {
+  %val8 = load volatile i8, i8* @var_8bit
+  %ext = zext i8 %val8 to i64
+  %add = add i64 %ext, 1
+  %val16 = trunc i64 %add to i16
+  store volatile i16 %val16, i16* @var_16bit
+  ret void
+
+; CHECK-LABEL: ldst_8bit:
+; CHECK: adrp [[RB:x[0-9]+]], var_8bit
+; CHECK-NEXT: ldrb {{w[0-9]+}}, {{\[}}[[RB]], {{#?}}:lo12:var_8bit{{\]}}
+; CHECK: adrp [[RH:x[0-9]+]], var_16bit
+; CHECK-NEXT: strh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}}
+}
+
+define void @ldst_16bit() {
+  %val16 = load volatile i16, i16* @var_16bit
+  %ext = zext i16 %val16 to i64
+  %add = add i64 %ext, 1
+  %val32 = trunc i64 %add to i32
+  store volatile i32 %val32, i32* @var_32bit
+  ret void
+
+; CHECK-LABEL: ldst_16bit:
+; CHECK: adrp [[RH:x[0-9]+]], var_16bit
+; CHECK-NEXT: ldrh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}}
+; CHECK: adrp [[RW:x[0-9]+]], var_32bit
+; CHECK-NEXT: str {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}}
+}
+
+define void @ldst_32bit() {
+  %val32 = load volatile i32, i32* @var_32bit
+  %ext = zext i32 %val32 to i64
+  %val64 = add i64 %ext, 1
+  store volatile i64 %val64, i64* @var_64bit
+  ret void
+
+; CHECK-LABEL: ldst_32bit:
+; CHECK: adrp [[RW:x[0-9]+]], var_32bit
+; CHECK-NEXT: ldr {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}}
+; CHECK: adrp [[RL:x[0-9]+]], var_64bit
+; CHECK-NEXT: str {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}}
+}
+
+define void @ldst_64bit() {
+  %val64 = load volatile i64, i64* @var_64bit
+  %ext = zext i64 %val64 to i128
+  %val128 = add i128 %ext, 1
+  store volatile i128 %val128, i128* @var_128bit
+  ret void
+
+; CHECK-LABEL: ldst_64bit:
+; CHECK: adrp [[RL:x[0-9]+]], var_64bit
+; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}}
+; CHECK: adrp [[RQ:x[0-9]+]], var_128bit
+; CHECK-NEXT: add {{x[0-9]+}}, [[RQ]], {{#?}}:lo12:var_128bit
+}
+
+define void @ldst_half() {
+  %valh = load volatile half, half* @var_half
+  %valf = fpext half %valh to float
+  store volatile float %valf, float* @var_float
+  ret void
+
+; CHECK-LABEL: ldst_half:
+; CHECK: adrp [[RH:x[0-9]+]], var_half
+; CHECK-NEXT: ldr {{h[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_half{{\]}}
+; CHECK: adrp [[RF:x[0-9]+]], var_float
+; CHECK-NEXT: str {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}}
+}
+
+define void @ldst_float() {
+  %valf = load volatile float, float* @var_float
+  %vald = fpext float %valf to double
+  store volatile double %vald, double* @var_double
+  ret void
+
+; CHECK-LABEL: ldst_float:
+; CHECK: adrp [[RF:x[0-9]+]], var_float
+; CHECK-NEXT: ldr {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}}
+; CHECK: adrp [[RD:x[0-9]+]], var_double
+; CHECK-NEXT: str {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
+}
+
+define void @ldst_double() {
+  %vald = load volatile double, double* @var_double
+  %val = insertelement <2 x double> undef, double %vald, i32 0
+  %vald2 = insertelement <2 x double> %val, double %vald, i32 1
+  store volatile <2 x double> %vald2, <2 x double>* @var_double2
+  ret void
+
+; CHECK-LABEL: ldst_double:
+; CHECK: adrp [[RD:x[0-9]+]], var_double
+; CHECK-NEXT: add {{x[0-9]+}}, [[RD]], {{#?}}:lo12:var_double
+; CHECK: adrp [[RQ:x[0-9]+]], var_double2
+; CHECK-NEXT: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
+}