[llvm] 172456c - [Legalizer] Fix some flags miss in vector results
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 26 07:07:01 PDT 2020
Author: Qiu Chaofan
Date: 2020-03-26T22:01:19+08:00
New Revision: 172456c77506abf8f5a1a7e28db6ea449904ec8e
URL: https://github.com/llvm/llvm-project/commit/172456c77506abf8f5a1a7e28db6ea449904ec8e
DIFF: https://github.com/llvm/llvm-project/commit/172456c77506abf8f5a1a7e28db6ea449904ec8e.diff
LOG: [Legalizer] Fix some flags miss in vector results
In some scalarize/split result methods (unary, binary, ...), flags in
SDNode were not passed down, which may lead to unexpected results in
unsafe float-point optimization. This patch fixes them. (maybe not
complete)
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D76832
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index afed415af5ed..3f0d22384618 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -191,8 +191,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = GetScalarizedVector(N->getOperand(2));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- Op0.getValueType(), Op0, Op1, Op2);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+ Op2, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
@@ -200,7 +200,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = N->getOperand(2);
return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
- Op2);
+ Op2, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
@@ -225,7 +225,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
Opers[i] = Oper;
}
- SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs),
+ Opers, N->getFlags());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -255,6 +256,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
ResVT.getVectorElementType(), OvVT.getVectorElementType());
SDNode *ScalarNode = DAG.getNode(
N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode();
+ ScalarNode->setFlags(N->getFlags());
// Replace the other vector result not being explicitly scalarized here.
unsigned OtherNo = 1 - ResNo;
@@ -364,7 +366,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
DAG.getVectorIdxConstant(0, DL));
}
- return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
@@ -997,10 +999,10 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
- Op0Lo, Op1Lo, Op2Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
- Op0Hi, Op1Hi, Op2Hi);
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo,
+ Op2Lo, N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi,
+ Op2Hi, N->getFlags());
}
void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
@@ -1012,8 +1014,10 @@ void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Op2 = N->getOperand(2);
unsigned Opcode = N->getOpcode();
- Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2);
- Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2);
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2,
+ N->getFlags());
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2,
+ N->getFlags());
}
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
@@ -1294,8 +1298,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
EVT LoValueVTs[] = {LoVT, MVT::Other};
EVT HiValueVTs[] = {HiVT, MVT::Other};
- Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi);
+ Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo,
+ N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi,
+ N->getFlags());
// Build a factor node to remember that this Op is independent of the
// other one.
@@ -1385,6 +1391,8 @@ void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode();
SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode();
+ LoNode->setFlags(N->getFlags());
+ HiNode->setFlags(N->getFlags());
Lo = SDValue(LoNode, ResNo);
Hi = SDValue(HiNode, ResNo);
@@ -1710,11 +1718,13 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
if (N->getOpcode() == ISD::FP_ROUND) {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1),
+ N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1),
+ N->getFlags());
} else {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags());
}
}
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
index 0b6fb97ef913..4c8e50468fd7 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
@@ -87,8 +87,33 @@ define <4 x float> @v4f32_no_daz(<4 x float> %f) #0 {
define <8 x float> @v8f32_no_daz(<8 x float> %f) #0 {
; NHM-LABEL: v8f32_no_daz:
; NHM: # %bb.0:
-; NHM-NEXT: sqrtps %xmm0, %xmm0
-; NHM-NEXT: sqrtps %xmm1, %xmm1
+; NHM-NEXT: movaps %xmm0, %xmm2
+; NHM-NEXT: rsqrtps %xmm0, %xmm3
+; NHM-NEXT: mulps %xmm3, %xmm0
+; NHM-NEXT: movaps {{.*#+}} xmm4 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; NHM-NEXT: movaps %xmm0, %xmm5
+; NHM-NEXT: mulps %xmm4, %xmm5
+; NHM-NEXT: mulps %xmm3, %xmm0
+; NHM-NEXT: movaps {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
+; NHM-NEXT: addps %xmm3, %xmm0
+; NHM-NEXT: mulps %xmm5, %xmm0
+; NHM-NEXT: movaps {{.*#+}} xmm5 = [NaN,NaN,NaN,NaN]
+; NHM-NEXT: andps %xmm5, %xmm2
+; NHM-NEXT: movaps {{.*#+}} xmm6 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
+; NHM-NEXT: movaps %xmm6, %xmm7
+; NHM-NEXT: cmpleps %xmm2, %xmm7
+; NHM-NEXT: andps %xmm7, %xmm0
+; NHM-NEXT: rsqrtps %xmm1, %xmm7
+; NHM-NEXT: movaps %xmm1, %xmm2
+; NHM-NEXT: mulps %xmm7, %xmm2
+; NHM-NEXT: mulps %xmm2, %xmm4
+; NHM-NEXT: mulps %xmm7, %xmm2
+; NHM-NEXT: addps %xmm3, %xmm2
+; NHM-NEXT: mulps %xmm4, %xmm2
+; NHM-NEXT: andps %xmm5, %xmm1
+; NHM-NEXT: cmpleps %xmm1, %xmm6
+; NHM-NEXT: andps %xmm6, %xmm2
+; NHM-NEXT: movaps %xmm2, %xmm1
; NHM-NEXT: retq
;
; SNB-LABEL: v8f32_no_daz:
@@ -209,8 +234,28 @@ define <4 x float> @v4f32_daz(<4 x float> %f) #1 {
define <8 x float> @v8f32_daz(<8 x float> %f) #1 {
; NHM-LABEL: v8f32_daz:
; NHM: # %bb.0:
-; NHM-NEXT: sqrtps %xmm0, %xmm0
-; NHM-NEXT: sqrtps %xmm1, %xmm1
+; NHM-NEXT: rsqrtps %xmm0, %xmm2
+; NHM-NEXT: movaps %xmm0, %xmm3
+; NHM-NEXT: mulps %xmm2, %xmm3
+; NHM-NEXT: movaps {{.*#+}} xmm4 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; NHM-NEXT: movaps %xmm3, %xmm5
+; NHM-NEXT: mulps %xmm4, %xmm5
+; NHM-NEXT: mulps %xmm2, %xmm3
+; NHM-NEXT: movaps {{.*#+}} xmm2 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
+; NHM-NEXT: addps %xmm2, %xmm3
+; NHM-NEXT: mulps %xmm5, %xmm3
+; NHM-NEXT: xorps %xmm5, %xmm5
+; NHM-NEXT: cmpneqps %xmm5, %xmm0
+; NHM-NEXT: andps %xmm3, %xmm0
+; NHM-NEXT: rsqrtps %xmm1, %xmm3
+; NHM-NEXT: movaps %xmm1, %xmm6
+; NHM-NEXT: mulps %xmm3, %xmm6
+; NHM-NEXT: mulps %xmm6, %xmm4
+; NHM-NEXT: mulps %xmm3, %xmm6
+; NHM-NEXT: addps %xmm2, %xmm6
+; NHM-NEXT: mulps %xmm4, %xmm6
+; NHM-NEXT: cmpneqps %xmm5, %xmm1
+; NHM-NEXT: andps %xmm6, %xmm1
; NHM-NEXT: retq
;
; SNB-LABEL: v8f32_daz:
More information about the llvm-commits
mailing list