[llvm] r230884 - avoid infinite looping when folding vector multiplies of constants (PR22698)
Sanjay Patel
spatel at rotateright.com
Sat Feb 28 16:09:36 PST 2015
Author: spatel
Date: Sat Feb 28 18:09:35 2015
New Revision: 230884
URL: http://llvm.org/viewvc/llvm-project?rev=230884&view=rev
Log:
avoid infinite looping when folding vector multiplies of constants (PR22698)
We were missing a check for the following fold in DAGCombiner:
// fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
If 'x' is also a constant, then we shouldn't do anything. Otherwise, we could end up swapping the operands back and forth forever.
This should fix:
http://llvm.org/bugs/show_bug.cgi?id=22698
Differential Revision: http://reviews.llvm.org/D7917
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/fmul-combines.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=230884&r1=230883&r2=230884&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Feb 28 18:09:35 2015
@@ -7453,14 +7453,23 @@ SDValue DAGCombiner::visitFMUL(SDNode *N
// Fold scalars or any vector constants (not just splats).
// This fold is done in general by InstCombine, but extra fmul insts
// may have been generated during lowering.
+ SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
- if ((N1CFP && isConstOrConstSplatFP(N01)) ||
- (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDLoc SL(N);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
- return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+
+ // Check 1: Make sure that the first operand of the inner multiply is NOT
+ // a constant. Otherwise, we may induce infinite looping.
+ if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
+ // Check 2: Make sure that the second operand of the inner multiply and
+ // the second operand of the outer multiply are constants.
+ if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+ (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+ SDLoc SL(N);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts);
+ }
}
}
Modified: llvm/trunk/test/CodeGen/X86/fmul-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fmul-combines.ll?rev=230884&r1=230883&r2=230884&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fmul-combines.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fmul-combines.ll Sat Feb 28 18:09:35 2015
@@ -103,6 +103,40 @@ define <4 x float> @fmul_v4f32_two_const
ret <4 x float> %a
}
+; PR22698 - http://llvm.org/bugs/show_bug.cgi?id=22698
+; Make sure that we don't infinite loop swapping constants back and forth.
+
+define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
+ %mul1 = fmul fast <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <float 3.0, float 3.0, float 3.0, float 3.0>
+ %mul2 = fmul fast <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, %mul1
+ %mul3 = fmul fast <4 x float> %a, %mul2
+ ret <4 x float> %mul3
+
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK-LABEL: PR22698_splats:
+; CHECK: mulps
+; CHECK: ret
+}
+
+; Same as above, but verify that non-splat vectors are handled correctly too.
+define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
+ %mul1 = fmul fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <float 5.0, float 6.0, float 7.0, float 8.0>
+ %mul2 = fmul fast <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, %mul1
+ %mul3 = fmul fast <4 x float> %a, %mul2
+ ret <4 x float> %mul3
+
+; CHECK: float 4.500000e+01
+; CHECK: float 1.200000e+02
+; CHECK: float 2.310000e+02
+; CHECK: float 3.840000e+02
+; CHECK-LABEL: PR22698_no_splats:
+; CHECK: mulps
+; CHECK: ret
+}
+
; CHECK-LABEL: fmul_c2_c4_f32:
; CHECK-NOT: addss
; CHECK: mulss
More information about the llvm-commits
mailing list