[llvm] 19f657d - [X86] combineToExtendBoolVectorInReg - use broadcast on AVX2+ targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 08:49:48 PST 2024
Author: Simon Pilgrim
Date: 2024-11-08T16:49:29Z
New Revision: 19f657d55d679cc3949e9e4c1a5bf76cc4c031b1
URL: https://github.com/llvm/llvm-project/commit/19f657d55d679cc3949e9e4c1a5bf76cc4c031b1
DIFF: https://github.com/llvm/llvm-project/commit/19f657d55d679cc3949e9e4c1a5bf76cc4c031b1.diff
LOG: [X86] combineToExtendBoolVectorInReg - use broadcast on AVX2+ targets
Make use of AVX2 broadcasts to splat the source integer across all lanes to simplify the per-lane byte shuffles.
Prep work to avoid a regression in the fix for #66150
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
llvm/test/CodeGen/X86/vector-sext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1ab879e9036b3e..33bfb32c0b2234 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46183,11 +46183,17 @@ static SDValue combineToExtendBoolVectorInReg(
assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale");
unsigned Scale = NumElts / EltSizeInBits;
EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits);
- Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
+ bool UseBroadcast = Subtarget.hasInt256() &&
+ (!BroadcastVT.is128BitVector() || isa<LoadSDNode>(N00));
+ Vec = UseBroadcast
+ ? DAG.getSplat(BroadcastVT, DL, N00)
+ : DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
Vec = DAG.getBitcast(VT, Vec);
- for (unsigned i = 0; i != Scale; ++i)
- ShuffleMask.append(EltSizeInBits, i);
+ for (unsigned i = 0; i != Scale; ++i) {
+ int Offset = UseBroadcast ? (i * EltSizeInBits) : 0;
+ ShuffleMask.append(EltSizeInBits, i + Offset);
+ }
Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
} else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits &&
(SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) {
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index e9e5bec4a4b15f..efb7ba393721bc 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -365,8 +365,8 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX2-LABEL: ext_i32_32i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
+; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -635,12 +635,12 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX2-LABEL: ext_i64_64i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovq %rdi, %xmm0
-; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
+; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,13,13,13,13,13,13,13,13,22,22,22,22,22,22,22,22,31,31,31,31,31,31,31,31]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index 9f6de9a5c5d9b2..7863de8754fa0b 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -456,8 +456,8 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX2-LABEL: ext_i32_32i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
+; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -806,14 +806,14 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX2-LABEL: ext_i64_64i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovq %rdi, %xmm0
-; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
+; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,13,13,13,13,13,13,13,13,22,22,22,22,22,22,22,22,31,31,31,31,31,31,31,31]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
index c27b5b289e1fa9..33a0946c8fbe94 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
@@ -256,8 +256,8 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
; AVX2-LABEL: bitcast_i32_32i1:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
+; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 7673f09b2ba296..8620651c1c1992 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -2596,9 +2596,8 @@ define <16 x i8> @load_sext_16i1_to_16i8(ptr%ptr) nounwind readnone {
;
; AVX2-LABEL: load_sext_16i1_to_16i8:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: movzwl (%rdi), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3]
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -2804,9 +2803,8 @@ define <32 x i8> @load_sext_32i1_to_32i8(ptr%ptr) nounwind readnone {
;
; AVX2-LABEL: load_sext_32i1_to_32i8:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
+; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
More information about the llvm-commits
mailing list