[llvm] 6183665 - [X86] combineEXTRACT_SUBVECTOR - fold extractions from UNPCK nodes.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 05:52:30 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-14T13:50:25+01:00
New Revision: 618366546d37a47d907226fca0e3d54e18938497
URL: https://github.com/llvm/llvm-project/commit/618366546d37a47d907226fca0e3d54e18938497
DIFF: https://github.com/llvm/llvm-project/commit/618366546d37a47d907226fca0e3d54e18938497.diff
LOG: [X86] combineEXTRACT_SUBVECTOR - fold extractions from UNPCK nodes.
Allow subvector extraction as long as at least one operand extraction is free.
Refactor existing cases into a switch statement to allow easier reuse + future expansion.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2759252693f9f8..999742a4089269 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57204,24 +57204,36 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits);
return DAG.getNode(InOpcode, DL, VT, Ext);
}
- if ((InOpcode == X86ISD::CMPP || InOpcode == X86ISD::PCMPEQ ||
- InOpcode == X86ISD::PCMPGT) &&
- (IsExtractFree(InVec.getOperand(0)) ||
- IsExtractFree(InVec.getOperand(1))) &&
- SizeInBits == 128) {
- SDValue Ext0 =
- extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
- SDValue Ext1 =
- extractSubVector(InVec.getOperand(1), IdxVal, DAG, DL, SizeInBits);
- if (InOpcode == X86ISD::CMPP)
- return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, InVec.getOperand(2));
- return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1);
- }
- if (InOpcode == X86ISD::MOVDDUP &&
- (SizeInBits == 128 || SizeInBits == 256)) {
- SDValue Ext0 =
- extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
- return DAG.getNode(InOpcode, DL, VT, Ext0);
+
+ if (SizeInBits == 128 || SizeInBits == 256) {
+ switch (InOpcode) {
+ case X86ISD::MOVDDUP:
+ return DAG.getNode(
+ InOpcode, DL, VT,
+ extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits));
+ case X86ISD::PCMPEQ:
+ case X86ISD::PCMPGT:
+ case X86ISD::UNPCKH:
+ case X86ISD::UNPCKL:
+ if (IsExtractFree(InVec.getOperand(0)) ||
+ IsExtractFree(InVec.getOperand(1)))
+ return DAG.getNode(InOpcode, DL, VT,
+ extractSubVector(InVec.getOperand(0), IdxVal, DAG,
+ DL, SizeInBits),
+ extractSubVector(InVec.getOperand(1), IdxVal, DAG,
+ DL, SizeInBits));
+ break;
+ case X86ISD::CMPP:
+ if (IsExtractFree(InVec.getOperand(0)) ||
+ IsExtractFree(InVec.getOperand(1)))
+ return DAG.getNode(InOpcode, DL, VT,
+ extractSubVector(InVec.getOperand(0), IdxVal, DAG,
+ DL, SizeInBits),
+ extractSubVector(InVec.getOperand(1), IdxVal, DAG,
+ DL, SizeInBits),
+ InVec.getOperand(2));
+ break;
+ }
}
}
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 9290f9f17b0532..4550e53a9e7493 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -4649,12 +4649,11 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2
; AVX-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[1,3],ymm2[4,4],ymm1[5,7]
; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm1[0,2,1,3]
-; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vunpcklps {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; AVX-NEXT: vpaddb 16(%rdx), %xmm1, %xmm1
; AVX-NEXT: vpaddb (%rdx), %xmm2, %xmm2
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm0
@@ -5473,12 +5472,9 @@ define void @vec384_i64_widen_to_i192_factor3_broadcast_to_v2i192_factor2(ptr %i
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[2],ymm0[2]
; AVX-NEXT: vmovaps 32(%rdx), %ymm2
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovaps %ymm2, 32(%rcx)
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index c0fa13f1a30084..6090aa06deb5bc 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -3743,15 +3743,14 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; AVX-NEXT: vbroadcastss (%rdi), %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX-NEXT: vmovaps 48(%rdi), %xmm1
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = mem[0,1,0,1]
-; AVX-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[1,3],ymm2[4,4],ymm1[5,7]
-; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm1[0,2,1,3]
-; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vunpcklps {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
-; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX-NEXT: vmovaps 48(%rdi), %xmm2
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm3 = mem[0,1,0,1]
+; AVX-NEXT: vshufps {{.*#+}} ymm2 = ymm3[0,0],ymm2[1,3],ymm3[4,4],ymm2[5,7]
+; AVX-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2,1,3]
+; AVX-NEXT: vextractf128 $1, %ymm2, %xmm2
+; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; AVX-NEXT: vpaddb 16(%rsi), %xmm1, %xmm1
-; AVX-NEXT: vpaddb (%rsi), %xmm2, %xmm2
+; AVX-NEXT: vpaddb (%rsi), %xmm3, %xmm2
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm0, 32(%rdx)
; AVX-NEXT: vmovdqa %xmm2, (%rdx)
@@ -4382,14 +4381,10 @@ define void @vec384_i64_widen_to_i192_factor3_broadcast_to_v2i192_factor2(ptr %i
;
; AVX-LABEL: vec384_i64_widen_to_i192_factor3_broadcast_to_v2i192_factor2:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 32(%rdi), %ymm0
-; AVX-NEXT: vmovdqa (%rdi), %xmm1
-; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],mem[4,5,6,7]
-; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
-; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[2],ymm0[2]
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],mem[4,5,6,7]
; AVX-NEXT: vmovaps 32(%rsi), %ymm2
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vmovaps %ymm2, 32(%rdx)
More information about the llvm-commits
mailing list