[llvm] r249251 - [DAGCombiner] Generalize FADD constant combines to work with vectors
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 3 15:06:07 PDT 2015
Author: rksimon
Date: Sat Oct 3 17:06:06 2015
New Revision: 249251
URL: http://llvm.org/viewvc/llvm-project?rev=249251&view=rev
Log:
[DAGCombiner] Generalize FADD constant combines to work with vectors
Updated the FADD combines to work with vectors as well as scalars.
Differential Revision: http://reviews.llvm.org/D13416
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/fadd-combines.ll
llvm/trunk/test/CodeGen/X86/fmul-combines.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=249251&r1=249250&r2=249251&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Oct 3 17:06:06 2015
@@ -7987,8 +7987,8 @@ SDValue DAGCombiner::visitFMULForFMAComb
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -8026,12 +8026,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N
bool AllowNewConst = (Level < AfterLegalizeDAG);
// fold (fadd A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+ if (N1C->isZero())
+ return N0;
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(1)))
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
Flags),
@@ -8050,12 +8051,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
}
@@ -8064,19 +8065,19 @@ SDValue DAGCombiner::visitFADD(SDNode *N
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(2.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
}
}
if (N1.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
}
@@ -8085,16 +8086,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(2.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
}
}
if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
- if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
return DAG.getNode(ISD::FMUL, DL, VT,
N1, DAG.getConstantFP(3.0, DL, VT), Flags);
@@ -8102,7 +8103,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N
}
if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
Modified: llvm/trunk/test/CodeGen/X86/fadd-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fadd-combines.ll?rev=249251&r1=249250&r2=249251&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fadd-combines.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fadd-combines.ll Sat Oct 3 17:06:06 2015
@@ -11,13 +11,12 @@ define float @fadd_zero_f32(float %x) #0
define <4 x float> @fadd_zero_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_zero_4f32:
; CHECK: # BB#0:
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: retq
%y = fadd <4 x float> %x, zeroinitializer
ret <4 x float> %y
}
+; CHECK: float 3
define float @fadd_2const_f32(float %x) #0 {
; CHECK-LABEL: fadd_2const_f32:
; CHECK: # BB#0:
@@ -28,17 +27,21 @@ define float @fadd_2const_f32(float %x)
ret float %z
}
+; CHECK: float 5.000000e+00
+; CHECK: float 5.000000e+00
+; CHECK: float 5.000000e+00
+; CHECK: float 5.000000e+00
define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_2const_4f32:
; CHECK: # BB#0:
; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%z = fadd <4 x float> %y, <float 4.0, float 3.0, float 2.0, float 1.0>
ret <4 x float> %z
}
+; CHECK: float 3
define float @fadd_x_fmul_x_c_f32(float %x) #0 {
; CHECK-LABEL: fadd_x_fmul_x_c_f32:
; CHECK: # BB#0:
@@ -49,18 +52,21 @@ define float @fadd_x_fmul_x_c_f32(float
ret float %z
}
+; CHECK: float 2.000000e+00
+; CHECK: float 3.000000e+00
+; CHECK: float 4.000000e+00
+; CHECK: float 5.000000e+00
define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_x_fmul_x_c_4f32:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
-; CHECK-NEXT: mulps %xmm0, %xmm1
-; CHECK-NEXT: addps %xmm1, %xmm0
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%z = fadd <4 x float> %x, %y
ret <4 x float> %z
}
+; CHECK: float 3
define float @fadd_fmul_x_c_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_fmul_x_c_x_f32:
; CHECK: # BB#0:
@@ -71,18 +77,21 @@ define float @fadd_fmul_x_c_x_f32(float
ret float %z
}
+; CHECK: float 2.000000e+00
+; CHECK: float 3.000000e+00
+; CHECK: float 4.000000e+00
+; CHECK: float 5.000000e+00
define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fmul_x_c_x_4f32:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
-; CHECK-NEXT: mulps %xmm0, %xmm1
-; CHECK-NEXT: addps %xmm1, %xmm0
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
%z = fadd <4 x float> %y, %x
ret <4 x float> %z
}
+; CHECK: float 4
define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_f32:
; CHECK: # BB#0:
@@ -94,13 +103,14 @@ define float @fadd_fadd_x_x_fmul_x_c_f32
ret float %w
}
+; CHECK: float 3.000000e+00
+; CHECK: float 4.000000e+00
+; CHECK: float 5.000000e+00
+; CHECK: float 6.000000e+00
define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_4f32:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
-; CHECK-NEXT: mulps %xmm0, %xmm1
-; CHECK-NEXT: addps %xmm0, %xmm1
-; CHECK-NEXT: addps %xmm1, %xmm0
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fadd <4 x float> %x, %x
%z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
@@ -108,6 +118,7 @@ define <4 x float> @fadd_fadd_x_x_fmul_x
ret <4 x float> %w
}
+; CHECK: float 4
define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_f32:
; CHECK: # BB#0:
@@ -119,13 +130,14 @@ define float @fadd_fmul_x_c_fadd_x_x_f32
ret float %w
}
+; CHECK: float 3.000000e+00
+; CHECK: float 4.000000e+00
+; CHECK: float 5.000000e+00
+; CHECK: float 6.000000e+00
define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_4f32:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
-; CHECK-NEXT: mulps %xmm0, %xmm1
-; CHECK-NEXT: addps %xmm0, %xmm1
-; CHECK-NEXT: addps %xmm1, %xmm0
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%y = fadd <4 x float> %x, %x
%z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
@@ -133,6 +145,7 @@ define <4 x float> @fadd_fmul_x_c_fadd_x
ret <4 x float> %w
}
+; CHECK: float 3
define float @fadd_x_fadd_x_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_x_fadd_x_x_f32:
; CHECK: # BB#0:
@@ -143,6 +156,10 @@ define float @fadd_x_fadd_x_x_f32(float
ret float %z
}
+; CHECK: float 3.000000e+00
+; CHECK: float 3.000000e+00
+; CHECK: float 3.000000e+00
+; CHECK: float 3.000000e+00
define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_x_fadd_x_x_4f32:
; CHECK: # BB#0:
@@ -153,6 +170,7 @@ define <4 x float> @fadd_x_fadd_x_x_4f32
ret <4 x float> %z
}
+; CHECK: float 3
define float @fadd_fadd_x_x_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_x_f32:
; CHECK: # BB#0:
@@ -163,6 +181,10 @@ define float @fadd_fadd_x_x_x_f32(float
ret float %z
}
+; CHECK: float 3.000000e+00
+; CHECK: float 3.000000e+00
+; CHECK: float 3.000000e+00
+; CHECK: float 3.000000e+00
define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_x_4f32:
; CHECK: # BB#0:
@@ -173,6 +195,7 @@ define <4 x float> @fadd_fadd_x_x_x_4f32
ret <4 x float> %z
}
+; CHECK: float 4
define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_f32:
; CHECK: # BB#0:
@@ -183,6 +206,10 @@ define float @fadd_fadd_x_x_fadd_x_x_f32
ret float %z
}
+; CHECK: float 4.000000e+00
+; CHECK: float 4.000000e+00
+; CHECK: float 4.000000e+00
+; CHECK: float 4.000000e+00
define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {
; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_4f32:
; CHECK: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/fmul-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fmul-combines.ll?rev=249251&r1=249250&r2=249251&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fmul-combines.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fmul-combines.ll Sat Oct 3 17:06:06 2015
@@ -86,15 +86,13 @@ define <4 x float> @fmul_v4f32_two_const
}
; More than one use of a constant multiply should not inhibit the optimization.
-; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
-; CHECK: float 5.000000e+00
-; CHECK: float 1.200000e+01
-; CHECK: float 2.100000e+01
-; CHECK: float 3.200000e+01
+; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
+; CHECK: float 6.000000e+00
+; CHECK: float 1.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK: float 3.600000e+01
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
; CHECK: mulps
-; CHECK: mulps
-; CHECK: addps
; CHECK: ret
define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
More information about the llvm-commits
mailing list