[llvm] [X86] getTargetConstantBitsFromNode - handle EXTRACT_SUBVECTOR through bitcasts (PR #143886)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 12 05:51:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Generalize the extraction index/width to account for any changes in type through bitcasts
---
Full diff: https://github.com/llvm/llvm-project/pull/143886.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+19-19)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll (+18-23)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll (+13-17)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f0fbf55e97be9..b4670e270141f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5242,25 +5242,25 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
// Extract constant bits from a subvector's source.
- if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- // TODO - support extract_subvector through bitcasts.
- if (EltSizeInBits != VT.getScalarSizeInBits())
- return false;
-
- if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
- UndefElts, EltBits, AllowWholeUndefs,
- AllowPartialUndefs)) {
- EVT SrcVT = Op.getOperand(0).getValueType();
- unsigned NumSrcElts = SrcVT.getVectorNumElements();
- unsigned NumSubElts = VT.getVectorNumElements();
- unsigned BaseIdx = Op.getConstantOperandVal(1);
- UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
- if ((BaseIdx + NumSubElts) != NumSrcElts)
- EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
- if (BaseIdx != 0)
- EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
- return true;
- }
+ if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, UndefElts,
+ EltBits, AllowWholeUndefs,
+ AllowPartialUndefs)) {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ unsigned NumSrcElts = SrcVT.getSizeInBits() / EltSizeInBits;
+ unsigned NumSubElts = VT.getSizeInBits() / EltSizeInBits;
+ unsigned BaseOfs = Op.getConstantOperandVal(1) * VT.getScalarSizeInBits();
+ unsigned BaseIdx = BaseOfs / EltSizeInBits;
+ assert((SrcVT.getSizeInBits() % EltSizeInBits) == 0 &&
+ (VT.getSizeInBits() % EltSizeInBits) == 0 &&
+ (BaseOfs % EltSizeInBits) == 0 && "Bad subvector index");
+
+ UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
+ if ((BaseIdx + NumSubElts) != NumSrcElts)
+ EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
+ if (BaseIdx != 0)
+ EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
+ return true;
}
// Extract constant bits from shuffle node sources.
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 7ad9fb0c27170..4311a64f86a04 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -3567,14 +3567,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
-; AVX-NEXT: vmovdqa %xmm1, (%rcx)
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
+; AVX-NEXT: vmovdqa %xmm1, (%rcx)
; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
; AVX-NEXT: retq
;
@@ -3757,14 +3756,14 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpaddb 32(%rdx), %xmm3, %xmm3
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3],xmm0[4],xmm2[5,6],xmm0[7]
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
-; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
+; AVX-NEXT: vmovdqa %xmm3, 32(%rcx)
; AVX-NEXT: retq
;
; AVX2-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
@@ -3955,10 +3954,9 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
@@ -4181,17 +4179,16 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5],xmm2[6],xmm1[7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0[2],xmm3[3,4,5,6,7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5,6,7]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
-; AVX-NEXT: vpaddb 16(%rdx), %xmm2, %xmm2
+; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
-; AVX-NEXT: vmovdqa %xmm2, 16(%rcx)
-; AVX-NEXT: vmovdqa %xmm0, 32(%rcx)
+; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
; AVX-NEXT: retq
;
; AVX2-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
@@ -4379,10 +4376,9 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
@@ -4517,10 +4513,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vmovaps 32(%rdx), %ymm2
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovaps %ymm2, 32(%rcx)
; AVX-NEXT: vmovdqa %xmm1, (%rcx)
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 3bef834bbd902..38ed3ec0a975e 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -2868,14 +2868,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX-NEXT: vmovdqa %xmm1, (%rdx)
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero
+; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
+; AVX-NEXT: vmovdqa %xmm1, (%rdx)
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
; AVX-NEXT: retq
;
@@ -2986,7 +2985,8 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
; AVX-NEXT: vmovdqa (%rdi), %xmm2
; AVX-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,xmm2[0,1],zero,zero,zero,zero,xmm2[0,1],zero,zero,zero,zero
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2,3],xmm0[4],xmm3[5,6],xmm0[7]
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
@@ -3135,9 +3135,8 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3],xmm0[4],mem[5,6,7]
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
-; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
+; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
@@ -3319,13 +3318,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
-; AVX-NEXT: vmovdqa %xmm1, (%rdx)
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
+; AVX-NEXT: vmovdqa %xmm1, (%rdx)
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
; AVX-NEXT: retq
;
@@ -3469,9 +3467,8 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5,6,7]
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7]
-; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
@@ -3584,9 +3581,8 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vmovaps 32(%rsi), %ymm2
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vmovaps %ymm2, 32(%rdx)
``````````
</details>
https://github.com/llvm/llvm-project/pull/143886
More information about the llvm-commits
mailing list