[llvm-branch-commits] [llvm] 4d16d8d - [DAGCombine] Remove dead node when it is created by getNegatedExpression
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 25 08:54:34 PDT 2020
Author: QingShan Zhang
Date: 2020-08-25T17:51:05+02:00
New Revision: 4d16d8dfe50eb45545e844c3c9acafd363637dad
URL: https://github.com/llvm/llvm-project/commit/4d16d8dfe50eb45545e844c3c9acafd363637dad
DIFF: https://github.com/llvm/llvm-project/commit/4d16d8dfe50eb45545e844c3c9acafd363637dad.diff
LOG: [DAGCombine] Remove dead node when it is created by getNegatedExpression
We hit the compiling time reported by https://bugs.llvm.org/show_bug.cgi?id=46877
and the reason is the same as D77319. So we need to remove the dead node we created
to avoid increase the problem size of DAGCombiner.
Reviewed By: Spatel
Differential Revision: https://reviews.llvm.org/D86183
(cherry picked from commit 960cbc53ca170c8c605bf83fa63b49ab27a56f65)
Added:
llvm/test/CodeGen/X86/pr46877.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 96df20039b15..94cb6da3d69e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5726,6 +5726,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+ auto RemoveDeadNode = [&](SDValue N) {
+ if (N && N.getNode()->use_empty())
+ DAG.RemoveDeadNode(N.getNode());
+ };
+
SDLoc DL(Op);
switch (Opcode) {
@@ -5804,12 +5809,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
+ RemoveDeadNode(NegY);
return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
}
// Negate the Y if it is not expensive.
if (NegY) {
Cost = CostY;
+ RemoveDeadNode(NegX);
return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
}
break;
@@ -5847,6 +5854,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
+ RemoveDeadNode(NegY);
return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
}
@@ -5858,6 +5866,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
// Negate the Y if it is not expensive.
if (NegY) {
Cost = CostY;
+ RemoveDeadNode(NegX);
return DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
}
break;
@@ -5887,12 +5896,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = std::min(CostX, CostZ);
+ RemoveDeadNode(NegY);
return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
}
// Negate the Y if it is not expensive.
if (NegY) {
Cost = std::min(CostY, CostZ);
+ RemoveDeadNode(NegX);
return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
}
break;
diff --git a/llvm/test/CodeGen/X86/pr46877.ll b/llvm/test/CodeGen/X86/pr46877.ll
new file mode 100644
index 000000000000..581b2d586fa0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr46877.ll
@@ -0,0 +1,416 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s -mcpu=haswell -mtriple=x86_64 | FileCheck %s
+
+; Verify that we are not exponentially increasing compiling time.
+define void @tester(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16, float %17, float %18, float %19, float %20, float %21, float %22, float %23, float %24, float %25, float %26, float %27, float %28, float %29, float %30, float %31, float %32, float %33, float %34, float %35, float %36, float %37, float %38, float %39, float %40, float %41, float %42, float %43, float %44, float %45, float %46, float %47, float %48, float %49, float %50, float %51, float %52, float %53, float %54, float %55, float %56, float %57, float %58, float %59, float %60, float %61, float %62, float %63, float %64, float %65, float %66, float %67, float %68, float %69, float %70, float %71, float %72, float %73, float %74, float %75, float %76, float %77, float %78, float %79, float* %80) {
+; CHECK-LABEL: tester:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovaps %xmm3, %xmm15
+; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
+; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm12
+; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm3
+; CHECK-NEXT: vfmsub213ss {{.*#+}} xmm3 = (xmm15 * xmm3) - xmm0
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm12 * xmm5) + xmm0
+; CHECK-NEXT: vmulss %xmm5, %xmm4, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm3
+; CHECK-NEXT: vmulss %xmm6, %xmm12, %xmm2
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm7 * xmm2) + xmm0
+; CHECK-NEXT: vmulss %xmm3, %xmm2, %xmm5
+; CHECK-NEXT: vmulss %xmm0, %xmm13, %xmm2
+; CHECK-NEXT: vmovss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmulss %xmm2, %xmm10, %xmm2
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * mem) + xmm0
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm7, %xmm3
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm3 = -(xmm3 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm3, %xmm2, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm3
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm4
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm4 = -(xmm14 * xmm4) + xmm0
+; CHECK-NEXT: vmulss %xmm4, %xmm5, %xmm4
+; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm5 = -(xmm5 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm5, %xmm2, %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm7, %xmm5
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm10 * xmm5) + xmm0
+; CHECK-NEXT: vmulss %xmm5, %xmm4, %xmm4
+; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss %xmm0, %xmm9, %xmm6
+; CHECK-NEXT: vmovss %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmulss %xmm6, %xmm14, %xmm5
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm12 * xmm5) + xmm0
+; CHECK-NEXT: vmulss %xmm5, %xmm2, %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm13 * xmm5) + xmm0
+; CHECK-NEXT: vmulss %xmm5, %xmm4, %xmm4
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm11
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm11 * xmm3) + xmm0
+; CHECK-NEXT: vmulss %xmm3, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm4, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm15 * xmm3) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm4
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm8, %xmm6
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm6 = -(xmm6 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm6, %xmm4, %xmm4
+; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm10, %xmm4
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm4, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm4 = -(xmm1 * xmm4) + xmm0
+; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm6 = -(xmm6 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm6, %xmm4, %xmm4
+; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm1
+; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm4 = -(xmm1 * xmm4) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm4, %xmm10
+; CHECK-NEXT: vmulss %xmm0, %xmm12, %xmm6
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm4
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm13, %xmm5
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm7 * xmm5) + xmm0
+; CHECK-NEXT: vmulss %xmm5, %xmm4, %xmm4
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm10, %xmm5
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm5, %xmm5
+; CHECK-NEXT: vmulss %xmm4, %xmm5, %xmm12
+; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm7 * xmm5) + xmm0
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss %xmm6, %xmm3, %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm10 * xmm2) + xmm0
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm9
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm1
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm1 = -(xmm1 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm5, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm5
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm5 = -(xmm5 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmulss %xmm5, %xmm1, %xmm1
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm2
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm13 * xmm2) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm12, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm4
+; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm5, %xmm3
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm13 * xmm3) + xmm0
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm2
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * mem) + xmm0
+; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm4, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 4-byte Reload
+; CHECK-NEXT: # xmm12 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm12, %xmm2
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm7 = -(xmm7 * mem) + xmm0
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm13 * xmm2) + xmm0
+; CHECK-NEXT: vmulss %xmm7, %xmm2, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm8 = -(xmm8 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm8, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm15 * xmm2) + xmm0
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmulss %xmm0, %xmm5, %xmm2
+; CHECK-NEXT: vmulss %xmm3, %xmm2, %xmm2
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm10 * xmm2) + xmm0
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm5 * xmm3) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm8
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm4
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
+; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm10
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm11 = -(xmm5 * xmm11) + xmm0
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm2
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm15 * xmm2) + xmm0
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm4
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm11, %xmm2
+; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vfnmadd132ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm14 # 4-byte Folded Reload
+; CHECK-NEXT: # xmm14 = -(xmm14 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm14, %xmm9
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm11
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm11 = -(xmm11 * mem) + xmm0
+; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm5, %xmm7
+; CHECK-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm5, %xmm5 # 4-byte Folded Reload
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm1
+; CHECK-NEXT: vmulss %xmm6, %xmm15, %xmm6
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm6 = -(xmm3 * xmm6) + xmm0
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm4
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm4 = -(xmm3 * xmm4) + xmm0
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm7 = -(xmm3 * xmm7) + xmm0
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm3 * xmm5) + xmm0
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm12, %xmm2
+; CHECK-NEXT: vmulss %xmm0, %xmm13, %xmm3
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm3
+; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm12 * xmm3) + xmm0
+; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm12 * xmm2) + xmm0
+; CHECK-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm15 * xmm1) - xmm0
+; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm8 = -(xmm8 * mem) + xmm0
+; CHECK-NEXT: vmulss %xmm8, %xmm9, %xmm0
+; CHECK-NEXT: vmulss %xmm6, %xmm0, %xmm0
+; CHECK-NEXT: vmulss %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vmulss %xmm7, %xmm0, %xmm0
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm10, %xmm4
+; CHECK-NEXT: vmulss %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vmulss %xmm5, %xmm11, %xmm4
+; CHECK-NEXT: vmulss %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmovss %xmm0, (%rdi)
+; CHECK-NEXT: retq
+entry:
+ %81 = fsub reassoc nsz contract float %0, %1
+ %82 = fmul reassoc nsz contract float %1, %2
+ %83 = fmul reassoc nsz contract float %3, %82
+ %84 = fsub reassoc nsz contract float %0, %83
+ %85 = fmul reassoc nsz contract float %84, %4
+ %86 = fmul reassoc nsz contract float %81, %5
+ %87 = fsub reassoc nsz contract float %0, %86
+ %88 = fmul reassoc nsz contract float %87, %85
+ %89 = fmul reassoc nsz contract float %81, %6
+ %90 = fmul reassoc nsz contract float %89, %7
+ %91 = fsub reassoc nsz contract float %0, %90
+ %92 = fmul reassoc nsz contract float %91, %88
+ %93 = fmul reassoc nsz contract float %8, %0
+ %94 = fmul reassoc nsz contract float %93, %9
+ %95 = fmul reassoc nsz contract float %94, %10
+ %96 = fsub reassoc nsz contract float %0, %95
+ %97 = fmul reassoc nsz contract float %96, %92
+ %98 = fmul reassoc nsz contract float %11, %7
+ %99 = fmul reassoc nsz contract float %98, %12
+ %100 = fsub reassoc nsz contract float %0, %99
+ %101 = fmul reassoc nsz contract float %100, %97
+ %102 = fmul reassoc nsz contract float %13, %0
+ %103 = fmul reassoc nsz contract float %102, %14
+ %104 = fmul reassoc nsz contract float %103, %15
+ %105 = fsub reassoc nsz contract float %0, %104
+ %106 = fmul reassoc nsz contract float %105, %101
+ %107 = fmul reassoc nsz contract float %16, %17
+ %108 = fsub reassoc nsz contract float %0, %107
+ %109 = fmul reassoc nsz contract float %108, %106
+ %110 = fmul reassoc nsz contract float %18, %19
+ %111 = fmul reassoc nsz contract float %110, %9
+ %112 = fsub reassoc nsz contract float %0, %111
+ %113 = fmul reassoc nsz contract float %112, %109
+ %114 = fmul reassoc nsz contract float %20, %0
+ %115 = fmul reassoc nsz contract float %114, %15
+ %116 = fmul reassoc nsz contract float %81, %115
+ %117 = fsub reassoc nsz contract float %0, %116
+ %118 = fmul reassoc nsz contract float %117, %113
+ %119 = fmul reassoc nsz contract float %8, %21
+ %120 = fsub reassoc nsz contract float %0, %119
+ %121 = fmul reassoc nsz contract float %120, %118
+ %122 = fmul reassoc nsz contract float %102, %22
+ %123 = fmul reassoc nsz contract float %122, %23
+ %124 = fsub reassoc nsz contract float %0, %123
+ %125 = fmul reassoc nsz contract float %124, %121
+ %126 = fmul reassoc nsz contract float %125, %24
+ %127 = fmul reassoc nsz contract float %3, %25
+ %128 = fsub reassoc nsz contract float %0, %127
+ %129 = fmul reassoc nsz contract float %128, %126
+ %130 = fmul reassoc nsz contract float %129, %26
+ %131 = fmul reassoc nsz contract float %27, %1
+ %132 = fmul reassoc nsz contract float %131, %28
+ %133 = fsub reassoc nsz contract float %0, %132
+ %134 = fmul reassoc nsz contract float %133, %130
+ %135 = fmul reassoc nsz contract float %29, %30
+ %136 = fmul reassoc nsz contract float %135, %31
+ %137 = fsub reassoc nsz contract float %0, %136
+ %138 = fmul reassoc nsz contract float %137, %134
+ %139 = fmul reassoc nsz contract float %138, %32
+ %140 = fmul reassoc nsz contract float %139, %33
+ %141 = fmul reassoc nsz contract float %140, %34
+ %142 = fmul reassoc nsz contract float %35, %9
+ %143 = fmul reassoc nsz contract float %142, %36
+ %144 = fsub reassoc nsz contract float %0, %143
+ %145 = fmul reassoc nsz contract float %144, %141
+ %146 = fmul reassoc nsz contract float %145, %37
+ %147 = fmul reassoc nsz contract float %1, %38
+ %148 = fsub reassoc nsz contract float %0, %147
+ %149 = fmul reassoc nsz contract float %148, %146
+ %150 = fmul reassoc nsz contract float %39, %40
+ %151 = fsub reassoc nsz contract float %0, %150
+ %152 = fmul reassoc nsz contract float %151, %149
+ %153 = fmul reassoc nsz contract float %152, %41
+ %154 = fmul reassoc nsz contract float %20, %42
+ %155 = fmul reassoc nsz contract float %154, %43
+ %156 = fsub reassoc nsz contract float %0, %155
+ %157 = fmul reassoc nsz contract float %156, %153
+ %158 = fmul reassoc nsz contract float %157, %44
+ %159 = fmul reassoc nsz contract float %158, %45
+ %160 = fmul reassoc nsz contract float %81, %0
+ %161 = fmul reassoc nsz contract float %160, %46
+ %162 = fmul reassoc nsz contract float %161, %14
+ %163 = fsub reassoc nsz contract float %0, %162
+ %164 = fmul reassoc nsz contract float %163, %159
+ %165 = fmul reassoc nsz contract float %8, %47
+ %166 = fmul reassoc nsz contract float %18, %165
+ %167 = fsub reassoc nsz contract float %0, %166
+ %168 = fmul reassoc nsz contract float %167, %164
+ %169 = fmul reassoc nsz contract float %168, %48
+ %170 = fmul reassoc nsz contract float %169, %49
+ %171 = fmul reassoc nsz contract float %18, %50
+ %172 = fsub reassoc nsz contract float %0, %171
+ %173 = fmul reassoc nsz contract float %172, %170
+ %174 = fmul reassoc nsz contract float %16, %160
+ %175 = fmul reassoc nsz contract float %174, %12
+ %176 = fsub reassoc nsz contract float %0, %175
+ %177 = fmul reassoc nsz contract float %176, %173
+ %178 = fmul reassoc nsz contract float %51, %0
+ %179 = fmul reassoc nsz contract float %178, %22
+ %180 = fmul reassoc nsz contract float %179, %52
+ %181 = fsub reassoc nsz contract float %0, %180
+ %182 = fmul reassoc nsz contract float %181, %177
+ %183 = fmul reassoc nsz contract float %27, %16
+ %184 = fmul reassoc nsz contract float %183, %53
+ %185 = fsub reassoc nsz contract float %0, %184
+ %186 = fmul reassoc nsz contract float %185, %182
+ %187 = fmul reassoc nsz contract float %16, %54
+ %188 = fmul reassoc nsz contract float %8, %187
+ %189 = fsub reassoc nsz contract float %0, %188
+ %190 = fmul reassoc nsz contract float %189, %186
+ %191 = fmul reassoc nsz contract float %190, %55
+ %192 = fmul reassoc nsz contract float %191, %56
+ %193 = fmul reassoc nsz contract float %57, %58
+ %194 = fmul reassoc nsz contract float %193, %59
+ %195 = fsub reassoc nsz contract float %0, %194
+ %196 = fmul reassoc nsz contract float %195, %192
+ %197 = fmul reassoc nsz contract float %13, %160
+ %198 = fmul reassoc nsz contract float %197, %36
+ %199 = fsub reassoc nsz contract float %0, %198
+ %200 = fmul reassoc nsz contract float %199, %196
+ %201 = fmul reassoc nsz contract float %93, %60
+ %202 = fmul reassoc nsz contract float %201, %61
+ %203 = fsub reassoc nsz contract float %0, %202
+ %204 = fmul reassoc nsz contract float %203, %200
+ %205 = fmul reassoc nsz contract float %204, %62
+ %206 = fmul reassoc nsz contract float %205, %63
+ %207 = fmul reassoc nsz contract float %114, %9
+ %208 = fmul reassoc nsz contract float %207, %59
+ %209 = fsub reassoc nsz contract float %0, %208
+ %210 = fmul reassoc nsz contract float %209, %206
+ %211 = fmul reassoc nsz contract float %18, %64
+ %212 = fsub reassoc nsz contract float %0, %211
+ %213 = fmul reassoc nsz contract float %212, %210
+ %214 = fmul reassoc nsz contract float %29, %65
+ %215 = fsub reassoc nsz contract float %0, %214
+ %216 = fmul reassoc nsz contract float %215, %213
+ %217 = fmul reassoc nsz contract float %216, %66
+ %218 = fmul reassoc nsz contract float %3, %67
+ %219 = fsub reassoc nsz contract float %0, %218
+ %220 = fmul reassoc nsz contract float %219, %217
+ %221 = fmul reassoc nsz contract float %220, %68
+ %222 = fmul reassoc nsz contract float %57, %69
+ %223 = fsub reassoc nsz contract float %0, %222
+ %224 = fmul reassoc nsz contract float %223, %221
+ %225 = fmul reassoc nsz contract float %57, %0
+ %226 = fmul reassoc nsz contract float %225, %61
+ %227 = fmul reassoc nsz contract float %226, %12
+ %228 = fsub reassoc nsz contract float %0, %227
+ %229 = fmul reassoc nsz contract float %228, %224
+ %230 = fmul reassoc nsz contract float %178, %70
+ %231 = fmul reassoc nsz contract float %230, %46
+ %232 = fsub reassoc nsz contract float %0, %231
+ %233 = fmul reassoc nsz contract float %232, %229
+ %234 = fmul reassoc nsz contract float %233, %71
+ %235 = fmul reassoc nsz contract float %57, %122
+ %236 = fsub reassoc nsz contract float %0, %235
+ %237 = fmul reassoc nsz contract float %236, %234
+ %238 = fmul reassoc nsz contract float %20, %160
+ %239 = fmul reassoc nsz contract float %3, %238
+ %240 = fsub reassoc nsz contract float %0, %239
+ %241 = fmul reassoc nsz contract float %240, %237
+ %242 = fmul reassoc nsz contract float %16, %72
+ %243 = fmul reassoc nsz contract float %242, %73
+ %244 = fsub reassoc nsz contract float %0, %243
+ %245 = fmul reassoc nsz contract float %244, %241
+ %246 = fmul reassoc nsz contract float %154, %15
+ %247 = fsub reassoc nsz contract float %0, %246
+ %248 = fmul reassoc nsz contract float %247, %245
+ %249 = fmul reassoc nsz contract float %178, %23
+ %250 = fmul reassoc nsz contract float %249, %74
+ %251 = fsub reassoc nsz contract float %0, %250
+ %252 = fmul reassoc nsz contract float %251, %248
+ %253 = fmul reassoc nsz contract float %3, %160
+ %254 = fmul reassoc nsz contract float %51, %253
+ %255 = fsub reassoc nsz contract float %0, %254
+ %256 = fmul reassoc nsz contract float %255, %252
+ %257 = fmul reassoc nsz contract float %13, %75
+ %258 = fmul reassoc nsz contract float %257, %51
+ %259 = fsub reassoc nsz contract float %0, %258
+ %260 = fmul reassoc nsz contract float %259, %256
+ %261 = fmul reassoc nsz contract float %8, %76
+ %262 = fmul reassoc nsz contract float %51, %261
+ %263 = fsub reassoc nsz contract float %0, %262
+ %264 = fmul reassoc nsz contract float %263, %260
+ %265 = fmul reassoc nsz contract float %264, %77
+ %266 = fmul reassoc nsz contract float %39, %0
+ %267 = fmul reassoc nsz contract float %266, %78
+ %268 = fmul reassoc nsz contract float %267, %14
+ %269 = fsub reassoc nsz contract float %0, %268
+ %270 = fmul reassoc nsz contract float %269, %265
+ %271 = fmul reassoc nsz contract float %1, %76
+ %272 = fmul reassoc nsz contract float %51, %271
+ %273 = fsub reassoc nsz contract float %0, %272
+ %274 = fmul reassoc nsz contract float %273, %270
+ %275 = fmul reassoc nsz contract float %0, %59
+ %276 = fmul reassoc nsz contract float %275, %79
+ %277 = fmul reassoc nsz contract float %276, %36
+ %278 = fsub reassoc nsz contract float %0, %277
+ %279 = fmul reassoc nsz contract float %278, %274
+ %280 = fmul reassoc nsz contract float %114, %22
+ %281 = fmul reassoc nsz contract float %280, %36
+ %282 = fsub reassoc nsz contract float %0, %281
+ %283 = fmul reassoc nsz contract float %282, %279
+ %284 = fmul reassoc nsz contract float %0, %43
+ %285 = fmul reassoc nsz contract float %284, %81
+ %286 = fmul reassoc nsz contract float %3, %285
+ %287 = fsub reassoc nsz contract float %0, %286
+ %288 = fmul reassoc nsz contract float %287, %283
+ store float %288, float* %80, align 4
+ ret void
+}
More information about the llvm-branch-commits
mailing list