[llvm] r243924 - [CodeGen] Fix FCOPYSIGN legalization to account for mismatched types.
Ahmed Bougacha
ahmed.bougacha at gmail.com
Mon Aug 3 17:32:55 PDT 2015
Author: ab
Date: Mon Aug 3 19:32:55 2015
New Revision: 243924
URL: http://llvm.org/viewvc/llvm-project?rev=243924&view=rev
Log:
[CodeGen] Fix FCOPYSIGN legalization to account for mismatched types.
We used to legalize it like it's any other binary operations. It's not,
because it accepts mismatched operand types. Because of that, we used
to hit various asserts and miscompiles.
Specialize vector legalizations to, in the worst case, unroll, or, when
possible, to just legalize the operand that needs legalization.
Scalarization isn't covered, because I can't think of a target where
some but not all of the 1-element vector types are to be scalarized.
Added:
llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=243924&r1=243923&r2=243924&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Mon Aug 3 19:32:55 2015
@@ -623,6 +623,7 @@ private:
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
@@ -650,6 +651,7 @@ private:
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
+ SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -693,6 +695,7 @@ private:
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
@@ -710,6 +713,7 @@ private:
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=243924&r1=243923&r2=243924&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Mon Aug 3 19:32:55 2015
@@ -594,6 +594,7 @@ void DAGTypeLegalizer::SplitVectorResult
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
@@ -656,7 +657,6 @@ void DAGTypeLegalizer::SplitVectorResult
case ISD::SUB:
case ISD::MUL:
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMINNUM:
@@ -872,6 +872,25 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI
Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
}
+void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc DL(N);
+
+ SDValue RHSLo, RHSHi;
+ SDValue RHS = N->getOperand(1);
+ EVT RHSVT = RHS.getValueType();
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+
+ Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
@@ -1359,6 +1378,7 @@ bool DAGTypeLegalizer::SplitVectorOperan
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -1877,6 +1897,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and the first input) has a legal vector type, but the second
+ // input needs splitting.
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+}
//===----------------------------------------------------------------------===//
@@ -1939,7 +1964,6 @@ void DAGTypeLegalizer::WidenVectorResult
break;
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FMUL:
case ISD::FPOW:
case ISD::FSUB:
@@ -1952,6 +1976,10 @@ void DAGTypeLegalizer::WidenVectorResult
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::FCOPYSIGN:
+ Res = WidenVecRes_FCOPYSIGN(N);
+ break;
+
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
break;
@@ -2246,6 +2274,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Co
return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+ // If this is an FCOPYSIGN with same input types, we can treat it as a
+ // normal (can trap) binary op.
+ if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+ return WidenVecRes_BinaryCanTrap(N);
+
+ // If the types are different, fall back to unrolling.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -2818,6 +2857,7 @@ bool DAGTypeLegalizer::WidenVectorOperan
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -2914,6 +2954,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXT
}
}
+SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and first input) is legal, but the second input is illegal.
+ // We can't do much to fix that, so just unroll and let the extracts off of
+ // the second input be widened as needed later.
+ return DAG.UnrollVectorOp(N);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely
// that we can fix the input to a legal type so unroll the convert
Added: llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll?rev=243924&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/vector-fcopysign.ll Mon Aug 3 19:32:55 2015
@@ -0,0 +1,243 @@
+; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+;============ v1f32
+
+; WidenVecRes same
+define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f32_v1f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov s2, v1[1]
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ret
+ %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
+ ret <1 x float> %r
+}
+
+; WidenVecRes mismatched
+define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f32_v1f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <1 x double> %b to <1 x float>
+ %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0)
+ ret <1 x float> %r
+}
+
+declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0
+
+;============ v1f64
+
+; WidenVecOp #1
+define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f64_v1f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: fcvt d1, s1
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = fpext <1 x float> %b to <1 x double>
+ %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
+ ret <1 x double> %r
+}
+
+define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f64_v1f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %r
+}
+
+declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
+
+;============ v2f32
+
+define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f32_v2f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov s2, v1[1]
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ret
+ %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f32_v2f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov d2, v1[1]
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: fcvt s2, d2
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <2 x double> %b to <2 x float>
+ %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
+ ret <2 x float> %r
+}
+
+declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
+
+;============ v4f32
+
+define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f32_v4f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov s2, v1[1]
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: movi.4s v4, #0x80, lsl #24
+; CHECK-NEXT: mov s5, v0[2]
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: mov s2, v0[3]
+; CHECK-NEXT: mov s6, v1[2]
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: bit.16b v5, v6, v4
+; CHECK-NEXT: mov s1, v1[3]
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ins.s v0[2], v5[0]
+; CHECK-NEXT: bit.16b v2, v1, v4
+; CHECK-NEXT: ins.s v0[3], v2[0]
+; CHECK-NEXT: ret
+ %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %r
+}
+
+; SplitVecOp #1
+define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f32_v4f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: mov d4, v1[1]
+; CHECK-NEXT: movi.4s v5, #0x80, lsl #24
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: mov s6, v0[2]
+; CHECK-NEXT: mov s7, v0[3]
+; CHECK-NEXT: fcvt s16, d2
+; CHECK-NEXT: bit.16b v0, v1, v5
+; CHECK-NEXT: bit.16b v6, v16, v5
+; CHECK-NEXT: fcvt s1, d4
+; CHECK-NEXT: bit.16b v3, v1, v5
+; CHECK-NEXT: mov d1, v2[1]
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ins.s v0[2], v6[0]
+; CHECK-NEXT: bit.16b v7, v1, v5
+; CHECK-NEXT: ins.s v0[3], v7[0]
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <4 x double> %b to <4 x float>
+ %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
+ ret <4 x float> %r
+}
+
+declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
+
+;============ v2f64
+
+define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f64_v232:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov d2, v0[1]
+; CHECK-NEXT: mov s3, v1[1]
+; CHECK-NEXT: movi.2d v4, #0000000000000000
+; CHECK-NEXT: fcvt d1, s1
+; CHECK-NEXT: fcvt d3, s3
+; CHECK-NEXT: fneg.2d v4, v4
+; CHECK-NEXT: bit.16b v2, v3, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.d v0[1], v2[0]
+; CHECK-NEXT: ret
+ %tmp0 = fpext <2 x float> %b to <2 x double>
+ %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f64_v2f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov d2, v1[1]
+; CHECK-NEXT: mov d3, v0[1]
+; CHECK-NEXT: movi.2d v4, #0000000000000000
+; CHECK-NEXT: fneg.2d v4, v4
+; CHECK-NEXT: bit.16b v3, v2, v4
+; CHECK-NEXT: bit.16b v0, v1, v4
+; CHECK-NEXT: ins.d v0[1], v3[0]
+; CHECK-NEXT: ret
+ %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %r
+}
+
+declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
+
+;============ v4f64
+
+; SplitVecRes mismatched
+define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f64_v4f32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: ext.16b v3, v2, v2, #8
+; CHECK-NEXT: mov d4, v0[1]
+; CHECK-NEXT: mov s5, v2[1]
+; CHECK-NEXT: movi.2d v6, #0000000000000000
+; CHECK-NEXT: fcvt d2, s2
+; CHECK-NEXT: fcvt d5, s5
+; CHECK-NEXT: fneg.2d v6, v6
+; CHECK-NEXT: bit.16b v4, v5, v6
+; CHECK-NEXT: mov d5, v1[1]
+; CHECK-NEXT: bit.16b v0, v2, v6
+; CHECK-NEXT: mov s2, v3[1]
+; CHECK-NEXT: fcvt d3, s3
+; CHECK-NEXT: fcvt d2, s2
+; CHECK-NEXT: ins.d v0[1], v4[0]
+; CHECK-NEXT: bit.16b v5, v2, v6
+; CHECK-NEXT: bit.16b v1, v3, v6
+; CHECK-NEXT: ins.d v1[1], v5[0]
+; CHECK-NEXT: ret
+ %tmp0 = fpext <4 x float> %b to <4 x double>
+ %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
+ ret <4 x double> %r
+}
+
+; SplitVecRes same
+define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f64_v4f64:
+; CHECK: ; BB#0:
+; CHECK-NEXT: mov d4, v2[1]
+; CHECK-NEXT: mov d5, v0[1]
+; CHECK-NEXT: movi.2d v6, #0000000000000000
+; CHECK-NEXT: fneg.2d v6, v6
+; CHECK-NEXT: bit.16b v5, v4, v6
+; CHECK-NEXT: mov d4, v3[1]
+; CHECK-NEXT: bit.16b v0, v2, v6
+; CHECK-NEXT: mov d2, v1[1]
+; CHECK-NEXT: bit.16b v2, v4, v6
+; CHECK-NEXT: bit.16b v1, v3, v6
+; CHECK-NEXT: ins.d v0[1], v5[0]
+; CHECK-NEXT: ins.d v1[1], v2[0]
+; CHECK-NEXT: ret
+ %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
+ ret <4 x double> %r
+}
+
+declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list