[llvm] r215646 - optimize vector fneg of bitcasted integer value
Sanjay Patel
spatel at rotateright.com
Thu Aug 14 08:15:29 PDT 2014
Author: spatel
Date: Thu Aug 14 10:15:28 2014
New Revision: 215646
URL: http://llvm.org/viewvc/llvm-project?rev=215646&view=rev
Log:
optimize vector fneg of bitcasted integer value
This patch allows a vector fneg of a bitcasted integer value to be optimized in the same way that we already optimize a scalar fneg. If the integer variable is a constant, we can precompute the result and not require any logic ops.
This patch is very similar to a fabs patch committed at r214892.
Differential Revision: http://reviews.llvm.org/D4852
Removed:
llvm/trunk/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/ARM/fnegs.ll
llvm/trunk/test/CodeGen/X86/vec_fneg.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=215646&r1=215645&r2=215646&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Aug 14 10:15:28 2014
@@ -7321,22 +7321,27 @@ SDValue DAGCombiner::visitFNEG(SDNode *N
&DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
- // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
- // TODO: We can also optimize for vectors here, but we need to make sure
- // that the sign mask is created properly for each vector element.
if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
- !VT.isVector() &&
- N0.getNode()->hasOneUse() &&
- N0.getOperand(0).getValueType().isInteger()) {
+ N0.getNode()->hasOneUse()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x80... per scalar element
+ // and splat it.
+ SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x80...
+ SignMask = APInt::getSignBit(IntVT.getSizeInBits());
+ }
Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
- DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ DAG.getConstant(SignMask, IntVT));
AddToWorklist(Int.getNode());
- return DAG.getNode(ISD::BITCAST, SDLoc(N),
- VT, Int);
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
}
}
Removed: llvm/trunk/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll?rev=215645&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll (removed)
@@ -1,48 +0,0 @@
-; RUN: llc -mcpu=cortex-a8 -mattr=+neon < %s | grep vneg
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "armv7-eabi"
-
-%aaa = type { %fff, %fff }
-%bbb = type { [6 x %ddd] }
-%ccc = type { %eee, %fff }
-%ddd = type { %fff }
-%eee = type { %fff, %fff, %fff, %fff }
-%fff = type { %struct.vec_float4 }
-%struct.vec_float4 = type { <4 x float> }
-
-define linkonce_odr arm_aapcs_vfpcc void @foo(%eee* noalias sret %agg.result, i64 %tfrm.0.0, i64 %tfrm.0.1, i64 %tfrm.0.2, i64 %tfrm.0.3, i64 %tfrm.0.4, i64 %tfrm.0.5, i64 %tfrm.0.6, i64 %tfrm.0.7) nounwind noinline {
-entry:
- %tmp104 = zext i64 %tfrm.0.2 to i512 ; <i512> [#uses=1]
- %tmp105 = shl i512 %tmp104, 128 ; <i512> [#uses=1]
- %tmp118 = zext i64 %tfrm.0.3 to i512 ; <i512> [#uses=1]
- %tmp119 = shl i512 %tmp118, 192 ; <i512> [#uses=1]
- %ins121 = or i512 %tmp119, %tmp105 ; <i512> [#uses=1]
- %tmp99 = zext i64 %tfrm.0.4 to i512 ; <i512> [#uses=1]
- %tmp100 = shl i512 %tmp99, 256 ; <i512> [#uses=1]
- %tmp123 = zext i64 %tfrm.0.5 to i512 ; <i512> [#uses=1]
- %tmp124 = shl i512 %tmp123, 320 ; <i512> [#uses=1]
- %tmp96 = zext i64 %tfrm.0.6 to i512 ; <i512> [#uses=1]
- %tmp97 = shl i512 %tmp96, 384 ; <i512> [#uses=1]
- %tmp128 = zext i64 %tfrm.0.7 to i512 ; <i512> [#uses=1]
- %tmp129 = shl i512 %tmp128, 448 ; <i512> [#uses=1]
- %mask.masked = or i512 %tmp124, %tmp100 ; <i512> [#uses=1]
- %ins131 = or i512 %tmp129, %tmp97 ; <i512> [#uses=1]
- %tmp109132 = zext i64 %tfrm.0.0 to i128 ; <i128> [#uses=1]
- %tmp113134 = zext i64 %tfrm.0.1 to i128 ; <i128> [#uses=1]
- %tmp114133 = shl i128 %tmp113134, 64 ; <i128> [#uses=1]
- %tmp94 = or i128 %tmp114133, %tmp109132 ; <i128> [#uses=1]
- %tmp95 = bitcast i128 %tmp94 to <4 x float> ; <<4 x float>> [#uses=0]
- %tmp82 = lshr i512 %ins121, 128 ; <i512> [#uses=1]
- %tmp83 = trunc i512 %tmp82 to i128 ; <i128> [#uses=1]
- %tmp84 = bitcast i128 %tmp83 to <4 x float> ; <<4 x float>> [#uses=0]
- %tmp86 = lshr i512 %mask.masked, 256 ; <i512> [#uses=1]
- %tmp87 = trunc i512 %tmp86 to i128 ; <i128> [#uses=1]
- %tmp88 = bitcast i128 %tmp87 to <4 x float> ; <<4 x float>> [#uses=0]
- %tmp90 = lshr i512 %ins131, 384 ; <i512> [#uses=1]
- %tmp91 = trunc i512 %tmp90 to i128 ; <i128> [#uses=1]
- %tmp92 = bitcast i128 %tmp91 to <4 x float> ; <<4 x float>> [#uses=1]
- %tmp = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp92 ; <<4 x float>> [#uses=1]
- %tmp28 = getelementptr inbounds %eee* %agg.result, i32 0, i32 3, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
- store <4 x float> %tmp, <4 x float>* %tmp28, align 16
- ret void
-}
Modified: llvm/trunk/test/CodeGen/ARM/fnegs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fnegs.ll?rev=215646&r1=215645&r2=215646&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fnegs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fnegs.ll Thu Aug 14 10:15:28 2014
@@ -73,3 +73,49 @@ entry:
; CORTEXA9-LABEL: test2:
; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
+; If we're bitcasting an integer to an FP vector, we should avoid the FP/vector unit entirely.
+; Make sure that we're flipping the sign bit and only the sign bit of each float (PR20354).
+; So instead of something like this:
+; vmov d16, r0, r1
+; vneg.f32 d16, d16
+; vmov r0, r1, d16
+;
+; We should generate:
+; eor r0, r0, #-214783648
+; eor r1, r1, #-214783648
+
+define <2 x float> @fneg_bitcast(i64 %i) {
+ %bitcast = bitcast i64 %i to <2 x float>
+ %fneg = fsub <2 x float> <float -0.0, float -0.0>, %bitcast
+ ret <2 x float> %fneg
+}
+; VFP2-LABEL: fneg_bitcast:
+; VFP2-DAG: eor r0, r0, #-2147483648
+; VFP2-DAG: eor r1, r1, #-2147483648
+; VFP2-NOT: vneg.f32
+
+; NFP1-LABEL: fneg_bitcast:
+; NFP1-DAG: eor r0, r0, #-2147483648
+; NFP1-DAG: eor r1, r1, #-2147483648
+; NFP1-NOT: vneg.f32
+
+; NFP0-LABEL: fneg_bitcast:
+; NFP0-DAG: eor r0, r0, #-2147483648
+; NFP0-DAG: eor r1, r1, #-2147483648
+; NFP0-NOT: vneg.f32
+
+; CORTEXA8-LABEL: fneg_bitcast:
+; CORTEXA8-DAG: eor r0, r0, #-2147483648
+; CORTEXA8-DAG: eor r1, r1, #-2147483648
+; CORTEXA8-NOT: vneg.f32
+
+; CORTEXA8U-LABEL: fneg_bitcast:
+; CORTEXA8U-DAG: eor r0, r0, #-2147483648
+; CORTEXA8U-DAG: eor r1, r1, #-2147483648
+; CORTEXA8U-NOT: vneg.f32
+
+; CORTEXA9-LABEL: fneg_bitcast:
+; CORTEXA9-DAG: eor r0, r0, #-2147483648
+; CORTEXA9-DAG: eor r1, r1, #-2147483648
+; CORTEXA9-NOT: vneg.f32
+
Modified: llvm/trunk/test/CodeGen/X86/vec_fneg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fneg.ll?rev=215646&r1=215645&r2=215646&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fneg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fneg.ll Thu Aug 14 10:15:28 2014
@@ -21,3 +21,25 @@ define <4 x float> @t2(<4 x float> %Q) {
%tmp = fsub <4 x float> zeroinitializer, %Q
ret <4 x float> %tmp
}
+
+; If we're bitcasting an integer to an FP vector, we should avoid the FPU/vector unit entirely.
+; Make sure that we're flipping the sign bit and only the sign bit of each float.
+; So instead of something like this:
+; movd %rdi, %xmm0
+; xorps .LCPI2_0(%rip), %xmm0
+;
+; We should generate:
+; movabsq (put sign bit mask in integer register))
+; xorq (flip sign bits)
+; movd (move to xmm return register)
+
+define <2 x float> @fneg_bitcast(i64 %i) {
+; CHECK-LABEL: fneg_bitcast:
+; CHECK: movabsq $-9223372034707292160, %rax # imm = 0x8000000080000000
+; CHECK-NEXT: xorq %rdi, %rax
+; CHECK-NEXT: movd %rax, %xmm0
+; CHECK-NEXT: retq
+ %bitcast = bitcast i64 %i to <2 x float>
+ %fneg = fsub <2 x float> <float -0.0, float -0.0>, %bitcast
+ ret <2 x float> %fneg
+}
More information about the llvm-commits
mailing list