[llvm] r337575 - [ARM] Add new feature to enable optimizing the VFP registers
Evandro Menezes via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 20 09:49:28 PDT 2018
Author: evandro
Date: Fri Jul 20 09:49:28 2018
New Revision: 337575
URL: http://llvm.org/viewvc/llvm-project?rev=337575&view=rev
Log:
[ARM] Add new feature to enable optimizing the VFP registers
Enable the optimization of operations on DPR and SPR via a feature instead
of checking the target.
Differential revision: https://reviews.llvm.org/D49463
Modified:
llvm/trunk/lib/Target/ARM/A15SDOptimizer.cpp
llvm/trunk/lib/Target/ARM/ARM.td
llvm/trunk/lib/Target/ARM/ARMSubtarget.h
llvm/trunk/test/CodeGen/ARM/a15-SD-dep.ll
Modified: llvm/trunk/lib/Target/ARM/A15SDOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/A15SDOptimizer.cpp?rev=337575&r1=337574&r2=337575&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/A15SDOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/ARM/A15SDOptimizer.cpp Fri Jul 20 09:49:28 2018
@@ -660,8 +660,9 @@ bool A15SDOptimizer::runOnMachineFunctio
const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
// Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
// enabled when NEON is available.
- if (!(STI.isCortexA15() && STI.hasNEON()))
+ if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
return false;
+
TII = STI.getInstrInfo();
TRI = STI.getRegisterInfo();
MRI = &Fn.getRegInfo();
Modified: llvm/trunk/lib/Target/ARM/ARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=337575&r1=337574&r2=337575&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.td (original)
+++ llvm/trunk/lib/Target/ARM/ARM.td Fri Jul 20 09:49:28 2018
@@ -195,6 +195,13 @@ def FeatureDontWidenVMOVS : SubtargetFea
"DontWidenVMOVS", "true",
"Don't widen VMOVS to VMOVD">;
+// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different
+// VFP register widths.
+def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon",
+ "SplatVFPToNeon", "true",
+ "Splat register from VFP to NEON",
+ [FeatureDontWidenVMOVS]>;
+
// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx",
"ExpandMLx", "true",
@@ -819,6 +826,7 @@ def : ProcessorModel<"cortex-a12", Cort
def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureDontWidenVMOVS,
+ FeatureSplatVFPToNeon,
FeatureHasRetAddrStack,
FeatureMuxedUnits,
FeatureTrustZone,
Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=337575&r1=337574&r2=337575&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Fri Jul 20 09:49:28 2018
@@ -352,9 +352,12 @@ protected:
/// If true, the AGU and NEON/FPU units are multiplexed.
bool HasMuxedUnits = false;
- /// If true, VMOVS will never be widened to VMOVD
+ /// If true, VMOVS will never be widened to VMOVD.
bool DontWidenVMOVS = false;
+ /// If true, splat a register between VFP and NEON instructions.
+ bool SplatVFPToNeon = false;
+
/// If true, run the MLx expansion pass.
bool ExpandMLx = false;
@@ -591,6 +594,7 @@ public:
bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
bool hasMuxedUnits() const { return HasMuxedUnits; }
bool dontWidenVMOVS() const { return DontWidenVMOVS; }
+ bool useSplatVFPToNeon() const { return SplatVFPToNeon; }
bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
bool nonpipelinedVFP() const { return NonpipelinedVFP; }
Modified: llvm/trunk/test/CodeGen/ARM/a15-SD-dep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/a15-SD-dep.ll?rev=337575&r1=337574&r2=337575&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/a15-SD-dep.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/a15-SD-dep.ll Fri Jul 20 09:49:28 2018
@@ -1,8 +1,8 @@
-; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-DISABLED %s
-; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-ENABLED %s
+; RUN: llc -O1 -mattr=+splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs -disable-a15-sd-optimization < %s | FileCheck -check-prefixes=CHECK,CHECK-DISABLED %s
+; RUN: llc -O1 -mattr=-splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,CHECK-DISABLED %s
+; RUN: llc -O1 -mattr=+splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,CHECK-ENABLED %s
-; CHECK-ENABLED-LABEL: t1:
-; CHECK-DISABLED-LABEL: t1:
+; CHECK-LABEL: t1:
define <2 x float> @t1(float %f) {
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -11,8 +11,7 @@ define <2 x float> @t1(float %f) {
ret <2 x float> %i2
}
-; CHECK-ENABLED-LABEL: t2:
-; CHECK-DISABLED-LABEL: t2:
+; CHECK-LABEL: t2:
define <4 x float> @t2(float %g, float %f) {
; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]
; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -21,8 +20,7 @@ define <4 x float> @t2(float %g, float %
ret <4 x float> %i2
}
-; CHECK-ENABLED-LABEL: t3:
-; CHECK-DISABLED-LABEL: t3:
+; CHECK-LABEL: t3:
define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -31,8 +29,7 @@ define arm_aapcs_vfpcc <2 x float> @t3(f
ret <2 x float> %i2
}
-; CHECK-ENABLED-LABEL: t4:
-; CHECK-DISABLED-LABEL: t4:
+; CHECK-LABEL: t4:
define <2 x float> @t4(float %f) {
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
; CHECK-DISABLED-NOT: vdup
@@ -45,8 +42,7 @@ b:
ret <2 x float> %i2
}
-; CHECK-ENABLED-LABEL: t5:
-; CHECK-DISABLED-LABEL: t5:
+; CHECK-LABEL: t5:
define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
; CHECK-ENABLED: vadd.f32
@@ -58,8 +54,7 @@ define arm_aapcs_vfpcc <4 x float> @t5(<
}
; Test that DPair can be successfully passed as QPR.
-; CHECK-ENABLED-LABEL: test_DPair1:
-; CHECK-DISABLED-LABEL: test_DPair1:
+; CHECK-LABEL: test_DPair1:
define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) {
entry:
%0 = insertelement <4 x float> undef, float %x, i32 1
@@ -89,8 +84,7 @@ sw.epilog:
ret void
}
-; CHECK-ENABLED-LABEL: test_DPair2:
-; CHECK-DISABLED-LABEL: test_DPair2:
+; CHECK-LABEL: test_DPair2:
define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) {
entry:
%0 = insertelement <4 x float> undef, float %x, i32 0
More information about the llvm-commits
mailing list