[llvm] r205309 - ARM: add cyclone CPU with ZeroCycleZeroing feature.
Tim Northover
tnorthover at apple.com
Tue Apr 1 06:22:02 PDT 2014
Author: tnorthover
Date: Tue Apr 1 08:22:02 2014
New Revision: 205309
URL: http://llvm.org/viewvc/llvm-project?rev=205309&view=rev
Log:
ARM: add cyclone CPU with ZeroCycleZeroing feature.
The Cyclone CPU is similar to swift for most LLVM purposes, but does have two
preferred instructions for zeroing a VFP register. This teaches LLVM about
them.
Added:
llvm/trunk/test/CodeGen/ARM/zero-cycle-zero.ll
Modified:
llvm/trunk/lib/Target/ARM/ARM.td
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
llvm/trunk/lib/Target/ARM/ARMSubtarget.h
Modified: llvm/trunk/lib/Target/ARM/ARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=205309&r1=205308&r2=205309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.td (original)
+++ llvm/trunk/lib/Target/ARM/ARM.td Tue Apr 1 08:22:02 2014
@@ -73,6 +73,11 @@ def FeatureCrypto : SubtargetFeature<"cr
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable support for CRC instructions">;
+// Cyclone has preferred instructions for zeroing VFP registers, which can
+// execute in 0 cycles.
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+ "Has zero-cycle zeroing instructions">;
+
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
// to just not use them.
@@ -361,6 +366,13 @@ def : ProcessorModel<"cortex-a15", Cor
FeatureDSPThumb2, FeatureHasRAS,
FeatureAClass]>;
+// FIXME: krait has currently the same Schedule model as A9
+def : ProcessorModel<"krait", CortexA9Model,
+ [ProcKrait, HasV7Ops,
+ FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
+
// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model,
[ProcR5, HasV7Ops, FeatureDB,
@@ -395,12 +407,12 @@ def : ProcNoItin<"cortex-a57", [Pro
FeatureDB, FeatureFPARMv8,
FeatureNEON, FeatureDSPThumb2]>;
-// FIXME: krait has currently the same Schedule model as A9
-def : ProcessorModel<"krait", CortexA9Model,
- [ProcKrait, HasV7Ops,
- FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
+// Cyclone is very similar to swift
+def : ProcessorModel<"cyclone", SwiftModel,
+ [ProcSwift, HasV8Ops, HasV7Ops,
+ FeatureCrypto, FeatureFPARMv8,
+ FeatureDB,FeatureDSPThumb2,
+ FeatureHasRAS, FeatureZCZeroing]>;
//===----------------------------------------------------------------------===//
// Register File Description
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=205309&r1=205308&r2=205309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Tue Apr 1 08:22:02 2014
@@ -244,6 +244,7 @@ def HasMP : Predicate<"Subtar
def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
AssemblerPredicate<"FeatureTrustZone",
"TrustZone">;
+def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">,
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=205309&r1=205308&r2=205309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Apr 1 08:22:02 2014
@@ -5245,6 +5245,26 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b111
[(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
} // isReMaterializable
+
+// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
+// require zero cycles to execute so they should be used wherever possible for
+// setting a register to zero.
+
+// Even without these pseudo-insts we would probably end up with the correct
+// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
+// since they are sometimes rather expensive (in general).
+
+let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
+ def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
+ [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
+ (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
+ Requires<[HasZCZ]>;
+ def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
+ [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
+ (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
+ Requires<[HasZCZ]>;
+}
+
// VMOV : Vector Get Lane (move scalar to ARM core register)
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp?rev=205309&r1=205308&r2=205309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Tue Apr 1 08:22:02 2014
@@ -134,6 +134,7 @@ void ARMSubtarget::initializeEnvironment
HasTrustZone = false;
HasCrypto = false;
HasCRC = false;
+ HasZeroCycleZeroing = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=205309&r1=205308&r2=205309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Tue Apr 1 08:22:02 2014
@@ -177,6 +177,10 @@ protected:
/// HasCRC - if true, processor supports CRC instructions
bool HasCRC;
+ /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
+ /// particularly effective at zeroing a VFP register.
+ bool HasZeroCycleZeroing;
+
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -298,6 +302,7 @@ public:
bool isFPOnlySP() const { return FPOnlySP; }
bool hasPerfMon() const { return HasPerfMon; }
bool hasTrustZone() const { return HasTrustZone; }
+ bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
Added: llvm/trunk/test/CodeGen/ARM/zero-cycle-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/zero-cycle-zero.ll?rev=205309&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/zero-cycle-zero.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/zero-cycle-zero.ll Tue Apr 1 08:22:02 2014
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK-CYCLONE
+; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-SWIFT
+
+declare arm_aapcs_vfpcc void @take_vec64(<2 x i32>)
+
+define void @test_vec64() {
+; CHECK-CYCLONE-LABEL: test_vec64:
+; CHECK-SWIFT-LABEL: test_vec64:
+
+ call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
+ call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
+; CHECK-CYCLONE-NOT: vmov.f64 d0,
+; CHECK-CYCLONE: vmov.i32 d0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: vmov.i32 d0, #0
+; CHECK-CYCLONE: bl
+
+; CHECK-SWIFT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-SWIFT: vmov.i32 [[ZEROREG]], #0
+; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+
+ ret void
+}
+
+declare arm_aapcs_vfpcc void @take_vec128(<8 x i16>)
+
+define void @test_vec128() {
+; CHECK-CYCLONE-LABEL: test_vec128:
+; CHECK-SWIFT-LABEL: test_vec128:
+
+ call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-CYCLONE: vmov.i32 q0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: vmov.i32 q0, #0
+; CHECK-CYCLONE: bl
+
+; CHECK-SWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-SWIFT: vmov.i32 [[ZEROREG:q[0-9]+]], #0
+; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+
+ ret void
+}
+
+declare void @take_i32(i32)
+
+define void @test_i32() {
+; CHECK-CYCLONE-LABEL: test_i32:
+; CHECK-SWIFT-LABEL: test_i32:
+
+ call arm_aapcs_vfpcc void @take_i32(i32 0)
+ call arm_aapcs_vfpcc void @take_i32(i32 0)
+; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-CYCLONE: mov r0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: mov r0, #0
+; CHECK-CYCLONE: bl
+
+; It doesn't particularly matter what Swift does here, there isn't carefully
+; crafted behaviour that we might break in Cyclone.
+
+ ret void
+}
More information about the llvm-commits
mailing list