[llvm-branch-commits] [llvm-branch] r126415 - in /llvm/branches/Apple/palisade: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/fcopysign.ll
Daniel Dunbar
daniel at zuster.org
Thu Feb 24 12:02:23 PST 2011
Author: ddunbar
Date: Thu Feb 24 14:02:23 2011
New Revision: 126415
URL: http://llvm.org/viewvc/llvm-project?rev=126415&view=rev
Log:
Merge r126295:
--
Author: Evan Cheng <evan.cheng at apple.com>
Date: Wed Feb 23 02:24:55 2011 +0000
More fcopysign correctness and performance fix.
The previous codegen for the slow path (when values are in VFP / NEON
registers) was incorrect if the source is NaN.
The new codegen uses NEON vbsl instruction to copy the sign bit. e.g.
vmov.i32 d1, #0x80000000
vbsl d1, d2, d0
If NEON is not available, it uses integer instructions to copy the sign bit.
rdar://9034702
Modified:
llvm/branches/Apple/palisade/lib/Target/ARM/ARMISelLowering.cpp
llvm/branches/Apple/palisade/test/CodeGen/ARM/fcopysign.ll
Modified: llvm/branches/Apple/palisade/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/palisade/lib/Target/ARM/ARMISelLowering.cpp?rev=126415&r1=126414&r2=126415&view=diff
==============================================================================
--- llvm/branches/Apple/palisade/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/branches/Apple/palisade/lib/Target/ARM/ARMISelLowering.cpp Thu Feb 24 14:02:23 2011
@@ -2838,8 +2838,51 @@
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
- bool F2IisFast = Subtarget->isCortexA9() ||
- Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ }
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (SrcVT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
@@ -2847,37 +2890,24 @@
&Tmp1, 1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
- // If float to int conversion isn't going to be super expensive, then simply
- // or in the signbit.
- if (F2IisFast) {
- SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
- SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
- Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
- if (VT == MVT::f32) {
- Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
- }
-
- // f64: Or the high part with signbit and then combine two parts.
- Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp0, 1);
- SDValue Lo = Tmp0.getValue(0);
- SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
- Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
- return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
- }
-
- // Remove the signbit of operand 0.
- Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
-
- // If operand 1 signbit is one, then negate operand 0.
- SDValue ARMcc;
- SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
- ISD::SETLT, ARMcc, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
Modified: llvm/branches/Apple/palisade/test/CodeGen/ARM/fcopysign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/palisade/test/CodeGen/ARM/fcopysign.ll?rev=126415&r1=126414&r2=126415&view=diff
==============================================================================
--- llvm/branches/Apple/palisade/test/CodeGen/ARM/fcopysign.ll (original)
+++ llvm/branches/Apple/palisade/test/CodeGen/ARM/fcopysign.ll Thu Feb 24 14:02:23 2011
@@ -9,9 +9,8 @@
; SOFT: bfi r0, r1, #31, #1
; HARD: test1:
-; HARD: vabs.f32 d0, d0
-; HARD: cmp r0, #0
-; HARD: vneglt.f32 s0, s0
+; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
+; HARD: vbsl [[REG1]], d2, d0
%0 = tail call float @copysignf(float %x, float %y) nounwind
ret float %0
}
@@ -23,9 +22,9 @@
; SOFT: bfi r1, r2, #31, #1
; HARD: test2:
-; HARD: vabs.f64 d0, d0
-; HARD: cmp r1, #0
-; HARD: vneglt.f64 d0, d0
+; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000
+; HARD: vshl.i64 [[REG2]], [[REG2]], #32
+; HARD: vbsl [[REG2]], d1, d0
%0 = tail call double @copysign(double %x, double %y) nounwind
ret double %0
}
@@ -33,9 +32,9 @@
define double @test3(double %x, double %y, double %z) nounwind {
entry:
; SOFT: test3:
-; SOFT: vabs.f64
-; SOFT: cmp {{.*}}, #0
-; SOFT: vneglt.f64
+; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000
+; SOFT: vshl.i64 [[REG3]], [[REG3]], #32
+; SOFT: vbsl [[REG3]],
%0 = fmul double %x, %y
%1 = tail call double @copysign(double %0, double %z) nounwind
ret double %1
More information about the llvm-branch-commits
mailing list