[llvm] r287676 - [X86][SSE] Combine UNPCKL(FHADD, FHADD) -> FHADD for v2f64 shuffles.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 22 09:50:07 PST 2016
Author: rksimon
Date: Tue Nov 22 11:50:06 2016
New Revision: 287676
URL: http://llvm.org/viewvc/llvm-project?rev=287676&view=rev
Log:
[X86][SSE] Combine UNPCKL(FHADD,FHADD) -> FHADD for v2f64 shuffles.
This occurs during UINT_TO_FP v2f64 lowering.
We can easily generalize this to other horizontal ops (FHSUB, PACKSS, PACKUS) as required - we are doing something similar with PACKUS in lowerV2I64VectorShuffle
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=287676&r1=287675&r2=287676&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 22 11:50:06 2016
@@ -26601,6 +26601,17 @@ static SDValue combineTargetShuffle(SDVa
assert(Mask.size() == 4);
break;
case X86ISD::UNPCKL: {
+ auto Op0 = N.getOperand(0);
+ auto Op1 = N.getOperand(1);
+ unsigned Opcode0 = Op0.getOpcode();
+ unsigned Opcode1 = Op1.getOpcode();
+
+ // Combine X86ISD::UNPCKL with 2 X86ISD::FHADD inputs into a single
+ // X86ISD::FHADD. This is generated by UINT_TO_FP v2f64 scalarization.
+ // TODO: Add other horizontal operations as required.
+ if (VT == MVT::v2f64 && Opcode0 == Opcode1 && Opcode0 == X86ISD::FHADD)
+ return DAG.getNode(Opcode0, DL, VT, Op0.getOperand(0), Op1.getOperand(0));
+
// Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
// which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
// moves upper half elements into the lower half part. For example:
@@ -26618,9 +26629,7 @@ static SDValue combineTargetShuffle(SDVa
if (!VT.is128BitVector())
return SDValue();
- auto Op0 = N.getOperand(0);
- auto Op1 = N.getOperand(1);
- if (Op0.isUndef() && Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (Op0.isUndef() && Opcode1 == ISD::VECTOR_SHUFFLE) {
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
unsigned NumElts = VT.getVectorNumElements();
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=287676&r1=287675&r2=287676&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Tue Nov 22 11:50:06 2016
@@ -425,12 +425,10 @@ define <2 x double> @uitofp_2i64_to_2f64
; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
-; VEX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
-; VEX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
; VEX-NEXT: retq
;
; AVX512-LABEL: uitofp_2i64_to_2f64:
@@ -471,12 +469,10 @@ define <2 x double> @uitofp_2i32_to_2f64
; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
-; VEX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
-; VEX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i32_to_2f64:
@@ -699,20 +695,16 @@ define <4 x double> @uitofp_4i64_to_4f64
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -723,20 +715,16 @@ define <4 x double> @uitofp_4i64_to_4f64
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
-; AVX2-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -2456,12 +2444,10 @@ define <2 x double> @uitofp_load_2i64_to
; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
-; VEX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
-; VEX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_load_2i64_to_2f64:
@@ -2515,12 +2501,10 @@ define <2 x double> @uitofp_load_2i32_to
; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
-; VEX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
-; VEX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_load_2i32_to_2f64:
@@ -2652,20 +2636,16 @@ define <4 x double> @uitofp_load_4i64_to
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -2677,20 +2657,16 @@ define <4 x double> @uitofp_load_4i64_to
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
-; AVX2-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
More information about the llvm-commits
mailing list