[llvm] r352743 - [X86][AVX] Enable AVX1 broadcasts in shuffle combining
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 31 03:41:10 PST 2019
Author: rksimon
Date: Thu Jan 31 03:41:10 2019
New Revision: 352743
URL: http://llvm.org/viewvc/llvm-project?rev=352743&view=rev
Log:
[X86][AVX] Enable AVX1 broadcasts in shuffle combining
Enables 32/64-bit scalar load broadcasts on AVX1 targets
The extractelement-load.ll regression will be fixed shortly in a followup commit.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
llvm/trunk/test/CodeGen/X86/extractelement-load.ll
llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll
llvm/trunk/test/CodeGen/X86/insert-loaded-scalar.ll
llvm/trunk/test/CodeGen/X86/insertelement-var-index.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=352743&r1=352742&r2=352743&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 31 03:41:10 2019
@@ -31035,15 +31035,27 @@ static SDValue combineX86ShuffleChain(Ar
}
// Attempt to match against broadcast-from-vector.
- // TODO: Add (partial) AVX1 support.
- if (Subtarget.hasAVX2() && (!IsEVEXShuffle || NumRootElts == NumMaskElts)) {
+ // Limit AVX1 to cases where we're loading+broadcasting a scalar element.
+ if ((Subtarget.hasAVX2() || (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits))
+ && (!IsEVEXShuffle || NumRootElts == NumMaskElts)) {
SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
- return SDValue(); // Nothing to do!
- Res = DAG.getBitcast(MaskVT, V1);
- Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
- return DAG.getBitcast(RootVT, Res);
+ if (V1.getValueType() == MaskVT &&
+ V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ MayFoldLoad(V1.getOperand(0))) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ return SDValue(); // Nothing to do!
+ Res = V1.getOperand(0);
+ Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
+ return DAG.getBitcast(RootVT, Res);
+ }
+ if (Subtarget.hasAVX2()) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ return SDValue(); // Nothing to do!
+ Res = DAG.getBitcast(MaskVT, V1);
+ Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
+ return DAG.getBitcast(RootVT, Res);
+ }
}
}
Modified: llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll?rev=352743&r1=352742&r2=352743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll Thu Jan 31 03:41:10 2019
@@ -596,8 +596,7 @@ define <2 x i64> @G(i64* %ptr) nounwind
;
; X64-LABEL: G:
; X64: ## %bb.0: ## %entry
-; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 8
Modified: llvm/trunk/test/CodeGen/X86/extractelement-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extractelement-load.ll?rev=352743&r1=352742&r2=352743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extractelement-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extractelement-load.ll Thu Jan 31 03:41:10 2019
@@ -98,7 +98,8 @@ define i64 @t4(<2 x double>* %a) {
;
; X64-AVX-LABEL: t4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: movq (%rdi), %rax
+; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax
; X64-AVX-NEXT: retq
%b = load <2 x double>, <2 x double>* %a, align 16
%c = shufflevector <2 x double> %b, <2 x double> %b, <2 x i32> <i32 1, i32 0>
Modified: llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll?rev=352743&r1=352742&r2=352743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll Thu Jan 31 03:41:10 2019
@@ -273,8 +273,7 @@ define <8 x i32> @elt7_v8i32(i32 %x) {
;
; X32AVX1-LABEL: elt7_v8i32:
; X32AVX1: # %bb.0:
-; X32AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,0]
+; X32AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
; X32AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
; X32AVX1-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/insert-loaded-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-loaded-scalar.ll?rev=352743&r1=352742&r2=352743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-loaded-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insert-loaded-scalar.ll Thu Jan 31 03:41:10 2019
@@ -180,8 +180,7 @@ define <2 x i64> @load64_ins_eltc_v2i64(
;
; AVX1-LABEL: load64_ins_eltc_v2i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load64_ins_eltc_v2i64:
Modified: llvm/trunk/test/CodeGen/X86/insertelement-var-index.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertelement-var-index.ll?rev=352743&r1=352742&r2=352743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertelement-var-index.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insertelement-var-index.ll Thu Jan 31 03:41:10 2019
@@ -205,8 +205,7 @@ define <2 x i64> @load_i64_v2i64(i64* %p
;
; AVX1-LABEL: load_i64_v2i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_i64_v2i64:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=352743&r1=352742&r2=352743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Thu Jan 31 03:41:10 2019
@@ -1266,8 +1266,7 @@ define <2 x i64> @insert_dup_mem_v2i64(i
;
; AVX1-LABEL: insert_dup_mem_v2i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_dup_mem_v2i64:
Modified: llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widened-broadcast.ll?rev=352743&r1=352742&r2=352743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widened-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widened-broadcast.ll Thu Jan 31 03:41:10 2019
@@ -582,8 +582,7 @@ define <4 x i32> @load_splat_4i32_2i32_0
;
; AVX1-LABEL: load_splat_4i32_2i32_0101:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_splat_4i32_2i32_0101:
@@ -610,8 +609,7 @@ define <8 x i32> @load_splat_8i32_2i32_0
;
; AVX1-LABEL: load_splat_8i32_2i32_0101:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -641,8 +639,7 @@ define <16 x i32> @load_splat_16i32_2i32
;
; AVX1-LABEL: load_splat_16i32_2i32_0101:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps %ymm0, %ymm1
; AVX1-NEXT: retq
More information about the llvm-commits
mailing list