[PATCH] ARM: handle some double constants directly
Tim Northover
t.p.northover at gmail.com
Mon Aug 19 09:26:01 PDT 2013
At the moment we resort to a litpool load for virtually all 64-bit floating-point constants, but quite a few common ones can actually be created with a "vmov" of one type or another.
This patch expands the code we already have handling 32-bit floats to deal with 64-bit ones as well. It also enables it in a few more situations (e.g. when there's no NEON) and adds some more tests.
http://llvm-reviews.chandlerc.com/D1439
Files:
lib/Target/ARM/ARMISelLowering.cpp
test/CodeGen/ARM/constantfp.ll
test/CodeGen/ARM/reg_sequence.ll
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -452,6 +452,7 @@
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
@@ -4271,46 +4272,75 @@
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
- if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ if (!ST->hasVFP3())
return SDValue();
+ bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
- assert(Op.getValueType() == MVT::f32 &&
- "ConstantFP custom lowering should only occur for f32.");
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
- int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
+
if (ImmVal != -1) {
+ if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
+ // We have code in place to select a valid ConstantFP already, no need to
+ // do any mangling.
+ return Op;
+ }
+
+ // It's a float and we are trying to use NEON operations where
+ // possible. Lower it to a splat followed by an extract.
SDLoc DL(Op);
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
NewVal);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
DAG.getConstant(0, MVT::i32));
}
- // If that fails, try a VMOV.i32
+ // The rest of our options are NEON only, make sure that's allowed before
+ // proceeding..
+ if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
+ return SDValue();
+
EVT VMovVT;
- unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
- SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
- VMOVModImm);
+ uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
+
+ // It wouldn't really be worth bothering for doubles except for one very
+ // important value, which does happen to match: 0.0. So make sure we don't do
+ // anything stupid.
+ if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
+ return SDValue();
+
+ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
+ SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
NewVal);
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
DAG.getConstant(0, MVT::i32));
}
// Finally, try a VMVN.i32
- NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
- VMVNModImm);
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
Index: test/CodeGen/ARM/constantfp.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/constantfp.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=swift %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEONFP %s
+; RUN: llc -mtriple=armv7 -mattr=-neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEON %s
+
+define arm_aapcs_vfpcc float @test_vmov_f32() {
+; CHECK-LABEL: test_vmov_f32:
+; CHECK: vmov.f32 d0, #1.0
+
+; CHECK-NONEONFP: vmov.f32 s0, #1.0
+ ret float 1.0
+}
+
+define arm_aapcs_vfpcc float @test_vmov_imm() {
+; CHECK-LABEL: test_vmov_imm:
+; CHECK: vmov.i32 d0, #0
+
+; CHECK-NONEON-LABEL: test_vmov_imm:
+; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+ ret float 0.0
+}
+
+define arm_aapcs_vfpcc float @test_vmvn_imm() {
+; CHECK-LABEL: test_vmvn_imm:
+; CHECK: vmvn.i32 d0, #0xb0000000
+
+; CHECK-NONEON-LABEL: test_vmvn_imm:
+; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+ ret float 8589934080.0
+}
+
+define arm_aapcs_vfpcc double @test_vmov_f64() {
+; CHECK-LABEL: test_vmov_f64:
+; CHECK: vmov.f64 d0, #1.0
+
+; CHECK-NONEON-LABEL: test_vmov_f64:
+; CHECK_NONEON: vmov.f64 d0, #1.0
+
+ ret double 1.0
+}
+
+define arm_aapcs_vfpcc double @test_vmov_double_imm() {
+; CHECK-LABEL: test_vmov_double_imm:
+; CHECK: vmov.i32 d0, #0
+
+; CHECK-NONEON-LABEL: test_vmov_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+ ret double 0.0
+}
+
+define arm_aapcs_vfpcc double @test_vmvn_double_imm() {
+; CHECK-LABEL: test_vmvn_double_imm:
+; CHECK: vmvn.i32 d0, #0xb0000000
+
+; CHECK-NONEON-LABEL: test_vmvn_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+ ret double 0x4fffffff4fffffff
+}
+
+; Make sure we don't ignore the high half of 64-bit values when deciding whether
+; a vmov/vmvn is possible.
+define arm_aapcs_vfpcc double @test_notvmvn_double_imm() {
+; CHECK-LABEL: test_notvmvn_double_imm:
+; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+
+; CHECK-NONEON-LABEL: test_notvmvn_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+ ret double 0x4fffffffffffffff
+}
Index: test/CodeGen/ARM/reg_sequence.ll
===================================================================
--- test/CodeGen/ARM/reg_sequence.ll
+++ test/CodeGen/ARM/reg_sequence.ll
@@ -239,10 +239,9 @@
; PR7157
define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
; CHECK-LABEL: t9:
-; CHECK: vldr
-; CHECK-NOT: vmov d{{.*}}, d16
-; CHECK: vmov.i32 d17
+; CHECK: vmov.i32 d16, #0x0
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
+; CHECK-NEXT: vorr d17, d16, d16
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
%3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
%4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1439.1.patch
Type: text/x-patch
Size: 7031 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130819/b15e903c/attachment.bin>
More information about the llvm-commits
mailing list