[llvm] 503ba62 - [X86] combineEXTRACT_SUBVECTOR - fold extractions from BLENDI nodes.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 05:52:32 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-14T13:50:26+01:00
New Revision: 503ba6262ed4c5d93c61392604c98cd9b3ae0e6c
URL: https://github.com/llvm/llvm-project/commit/503ba6262ed4c5d93c61392604c98cd9b3ae0e6c
DIFF: https://github.com/llvm/llvm-project/commit/503ba6262ed4c5d93c61392604c98cd9b3ae0e6c.diff
LOG: [X86] combineEXTRACT_SUBVECTOR - fold extractions from BLENDI nodes.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 999742a408926..7249789a8c074 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57233,6 +57233,19 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
DL, SizeInBits),
InVec.getOperand(2));
break;
+ case X86ISD::BLENDI:
+ if (IsExtractFree(InVec.getOperand(0)) ||
+ IsExtractFree(InVec.getOperand(1))) {
+ uint64_t M = InVec.getConstantOperandVal(2) & 255;
+ M = VT.getScalarType() == MVT::i16 ? M : (M >> IdxVal);
+ return DAG.getNode(InOpcode, DL, VT,
+ extractSubVector(InVec.getOperand(0), IdxVal, DAG,
+ DL, SizeInBits),
+ extractSubVector(InVec.getOperand(1), IdxVal, DAG,
+ DL, SizeInBits),
+ DAG.getTargetConstant(M, DL, MVT::i8));
+ }
+ break;
}
}
}
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 4550e53a9e749..2ea01230ca02d 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -4848,21 +4848,17 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4(ptr %in.
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,0,1,1]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5],ymm0[6],ymm3[7]
+; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3,4,5,6,7]
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
-; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
+; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm0
+; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
+; AVX-NEXT: vpaddb 16(%rdx), %xmm2, %xmm2
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
-; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
-; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
-; AVX-NEXT: vzeroupper
+; AVX-NEXT: vmovdqa %xmm2, 16(%rcx)
+; AVX-NEXT: vmovdqa %xmm0, 32(%rcx)
; AVX-NEXT: retq
;
; AVX2-SLOW-LABEL: vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4:
@@ -5049,18 +5045,14 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm3[5,6,7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0,1],xmm2[2,3,4,5,6,7]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpaddb 32(%rdx), %xmm3, %xmm3
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
-; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
-; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
-; AVX-NEXT: vzeroupper
+; AVX-NEXT: vmovdqa %xmm3, 32(%rcx)
; AVX-NEXT: retq
;
; AVX2-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
@@ -5191,12 +5183,10 @@ define void @vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2(ptr %i
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5],ymm0[6],ymm2[7]
; AVX-NEXT: vmovaps 32(%rdx), %ymm2
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5],xmm3[6,7]
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovaps %ymm2, 32(%rcx)
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
@@ -5323,18 +5313,13 @@ define void @vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3(ptr %i
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX-NEXT: vmovq {{.*#+}} xmm2 = xmm0[0],zero
-; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm3[6,7]
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
-; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
-; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
-; AVX-NEXT: vzeroupper
+; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
; AVX-NEXT: retq
;
; AVX2-LABEL: vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3:
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 6090aa06deb5b..70d12b2e89770 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -3896,17 +3896,15 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4(ptr %in.
;
; AVX-LABEL: vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps (%rdi), %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],mem[1,2,3]
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,1,2,0]
-; AVX-NEXT: vbroadcastss (%rdi), %ymm2
-; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5],ymm2[6],ymm3[7]
-; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],mem[2,3,4,5,6,7]
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; AVX-NEXT: vbroadcastss (%rdi), %ymm3
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm0
-; AVX-NEXT: vextractf128 $1, %ymm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
; AVX-NEXT: vpaddb 16(%rsi), %xmm2, %xmm2
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
@@ -4052,10 +4050,9 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],mem[1,2,3]
-; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5,6,7]
; AVX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
@@ -4167,10 +4164,9 @@ define void @vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2(ptr %i
; AVX-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1],xmm0[2,3,4,5,6,7]
; AVX-NEXT: vbroadcastss (%rdi), %ymm1
-; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6],ymm2[7]
; AVX-NEXT: vmovaps 32(%rsi), %ymm2
-; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3]
; AVX-NEXT: vpaddb 16(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX-NEXT: vmovaps %ymm2, 32(%rdx)
@@ -4273,10 +4269,8 @@ define void @vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3(ptr %i
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
-; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm2[6,7]
; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
More information about the llvm-commits
mailing list