[llvm] r360435 - [X86][SSE] Add getHopForBuildVector vector splitting
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri May 10 08:46:04 PDT 2019
Author: rksimon
Date: Fri May 10 08:46:04 2019
New Revision: 360435
URL: http://llvm.org/viewvc/llvm-project?rev=360435&view=rev
Log:
[X86][SSE] Add getHopForBuildVector vector splitting
If we only use the lower xmm of a ymm hop, then extract the xmm's (for free), perform the xmm hop and then insert back into a ymm (for free).
Fixes some of the regressions noted in D61782
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/haddsub-undef.ll
llvm/trunk/test/CodeGen/X86/phaddsub-undef.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=360435&r1=360434&r2=360435&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri May 10 08:46:04 2019
@@ -8581,6 +8581,22 @@ static SDValue getHopForBuildVector(cons
else if (V1.getValueSizeInBits() < Width)
V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
+ unsigned NumElts = VT.getVectorNumElements();
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (BV->getOperand(i).isUndef())
+ DemandedElts.clearBit(i);
+
+ // If we don't need the upper xmm, then perform as a xmm hop.
+ unsigned HalfNumElts = NumElts / 2;
+ if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {
+ MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), HalfNumElts);
+ V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128);
+ V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128);
+ SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1);
+ return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256);
+ }
+
return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
}
Modified: llvm/trunk/test/CodeGen/X86/haddsub-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/haddsub-undef.ll?rev=360435&r1=360434&r2=360435&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/haddsub-undef.ll (original)
+++ llvm/trunk/test/CodeGen/X86/haddsub-undef.ll Fri May 10 08:46:04 2019
@@ -247,7 +247,7 @@ define <8 x float> @test10_undef(<8 x fl
;
; AVX-LABEL: test10_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %a, i32 1
@@ -300,7 +300,7 @@ define <8 x float> @test12_undef(<8 x fl
;
; AVX-LABEL: test12_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %a, i32 1
Modified: llvm/trunk/test/CodeGen/X86/phaddsub-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/phaddsub-undef.ll?rev=360435&r1=360434&r2=360435&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/phaddsub-undef.ll (original)
+++ llvm/trunk/test/CodeGen/X86/phaddsub-undef.ll Fri May 10 08:46:04 2019
@@ -16,20 +16,10 @@ define <8 x i32> @test14_undef(<8 x i32>
; SSE-NEXT: phaddd %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: test14_undef:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test14_undef:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: test14_undef:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: test14_undef:
+; AVX: # %bb.0:
+; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%vecext = extractelement <8 x i32> %a, i32 0
%vecext1 = extractelement <8 x i32> %a, i32 1
%add = add i32 %vecext, %vecext1
@@ -149,20 +139,10 @@ define <8 x i32> @test16_undef(<8 x i32>
; SSE-NEXT: phaddd %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: test16_undef:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test16_undef:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: test16_undef:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vphaddd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: test16_undef:
+; AVX: # %bb.0:
+; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
%vecext = extractelement <8 x i32> %a, i32 0
%vecext1 = extractelement <8 x i32> %a, i32 1
%add = add i32 %vecext, %vecext1
@@ -180,20 +160,10 @@ define <16 x i32> @test16_v16i32_undef(<
; SSE-NEXT: phaddd %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: test16_v16i32_undef:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test16_v16i32_undef:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: test16_v16i32_undef:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vphaddd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: test16_v16i32_undef:
+; AVX: # %bb.0:
+; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
%vecext = extractelement <16 x i32> %a, i32 0
%vecext1 = extractelement <16 x i32> %a, i32 1
%add = add i32 %vecext, %vecext1
@@ -268,7 +238,7 @@ define <16 x i32> @test17_v16i32_undef(<
; AVX512-LABEL: test17_v16i32_undef:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%vecext = extractelement <16 x i32> %a, i32 0
%vecext1 = extractelement <16 x i32> %a, i32 1
More information about the llvm-commits
mailing list