[llvm] [X86] SimplifyDemandedBitsForTargetNode - add X86ISD::BLENDI handling (PR #133102)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 08:12:59 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/133102.diff
4 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+29)
- (modified) llvm/test/CodeGen/X86/combine-movmsk-avx.ll (+11-57)
- (modified) llvm/test/CodeGen/X86/combine-sdiv.ll (+11-12)
- (modified) llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll (+2-4)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b128a6dadbbb6..72977923bac2b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44227,6 +44227,35 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
return false;
}
+ case X86ISD::BLENDI: {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ APInt Mask = getBLENDIBlendMask(Op);
+
+ APInt DemandedEltsLHS = OriginalDemandedElts & ~Mask;
+ if (SimplifyDemandedBits(LHS, OriginalDemandedBits, DemandedEltsLHS, Known,
+ TLO, Depth + 1))
+ return true;
+
+ APInt DemandedEltsRHS = OriginalDemandedElts & Mask;
+ if (SimplifyDemandedBits(RHS, OriginalDemandedBits, DemandedEltsRHS, Known,
+ TLO, Depth + 1))
+ return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue NewLHS = SimplifyMultipleUseDemandedBits(
+ LHS, OriginalDemandedBits, DemandedEltsLHS, TLO.DAG, Depth + 1);
+ SDValue NewRHS = SimplifyMultipleUseDemandedBits(
+ RHS, OriginalDemandedBits, DemandedEltsRHS, TLO.DAG, Depth + 1);
+ if (NewLHS || NewRHS) {
+ NewLHS = NewLHS ? NewLHS : LHS;
+ NewRHS = NewRHS ? NewRHS : RHS;
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT,
+ NewLHS, NewRHS, Op.getOperand(2)));
+ }
+ break;
+ }
case X86ISD::BLENDV: {
SDValue Sel = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
index 6da4102bc2ecd..303873599ca52 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
@@ -75,39 +75,15 @@ define i1 @movmskps_allof_bitcast_v4f64(<4 x double> %a0) {
}
;
-; TODO - Avoid sign extension ops when just extracting the sign bits.
+; Avoid sign extension ops when just extracting the sign bits.
;
define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
-; VTEST-AVX1-LABEL: movmskpd_cmpgt_v4i64:
-; VTEST-AVX1: # %bb.0:
-; VTEST-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VTEST-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1
-; VTEST-AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; VTEST-AVX1-NEXT: vmovmskpd %ymm0, %eax
-; VTEST-AVX1-NEXT: vzeroupper
-; VTEST-AVX1-NEXT: retq
-;
-; VTEST-AVX2-LABEL: movmskpd_cmpgt_v4i64:
-; VTEST-AVX2: # %bb.0:
-; VTEST-AVX2-NEXT: vmovmskpd %ymm0, %eax
-; VTEST-AVX2-NEXT: vzeroupper
-; VTEST-AVX2-NEXT: retq
-;
-; MOVMSK-AVX1-LABEL: movmskpd_cmpgt_v4i64:
-; MOVMSK-AVX1: # %bb.0:
-; MOVMSK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; MOVMSK-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1
-; MOVMSK-AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; MOVMSK-AVX1-NEXT: vmovmskpd %ymm0, %eax
-; MOVMSK-AVX1-NEXT: vzeroupper
-; MOVMSK-AVX1-NEXT: retq
-;
-; MOVMSK-AVX2-LABEL: movmskpd_cmpgt_v4i64:
-; MOVMSK-AVX2: # %bb.0:
-; MOVMSK-AVX2-NEXT: vmovmskpd %ymm0, %eax
-; MOVMSK-AVX2-NEXT: vzeroupper
-; MOVMSK-AVX2-NEXT: retq
+; CHECK-LABEL: movmskpd_cmpgt_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovmskpd %ymm0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%1 = icmp sgt <4 x i64> zeroinitializer, %a0
%2 = sext <4 x i1> %1 to <4 x i64>
%3 = bitcast <4 x i64> %2 to <4 x double>
@@ -116,33 +92,11 @@ define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
}
define i32 @movmskps_ashr_v8i32(<8 x i32> %a0) {
-; VTEST-AVX1-LABEL: movmskps_ashr_v8i32:
-; VTEST-AVX1: # %bb.0:
-; VTEST-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
-; VTEST-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; VTEST-AVX1-NEXT: vmovmskps %ymm0, %eax
-; VTEST-AVX1-NEXT: vzeroupper
-; VTEST-AVX1-NEXT: retq
-;
-; VTEST-AVX2-LABEL: movmskps_ashr_v8i32:
-; VTEST-AVX2: # %bb.0:
-; VTEST-AVX2-NEXT: vmovmskps %ymm0, %eax
-; VTEST-AVX2-NEXT: vzeroupper
-; VTEST-AVX2-NEXT: retq
-;
-; MOVMSK-AVX1-LABEL: movmskps_ashr_v8i32:
-; MOVMSK-AVX1: # %bb.0:
-; MOVMSK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
-; MOVMSK-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; MOVMSK-AVX1-NEXT: vmovmskps %ymm0, %eax
-; MOVMSK-AVX1-NEXT: vzeroupper
-; MOVMSK-AVX1-NEXT: retq
-;
-; MOVMSK-AVX2-LABEL: movmskps_ashr_v8i32:
-; MOVMSK-AVX2: # %bb.0:
-; MOVMSK-AVX2-NEXT: vmovmskps %ymm0, %eax
-; MOVMSK-AVX2-NEXT: vzeroupper
-; MOVMSK-AVX2-NEXT: retq
+; CHECK-LABEL: movmskps_ashr_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovmskps %ymm0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%2 = bitcast <8 x i32> %1 to <8 x float>
%3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2)
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 42f09d04da26e..1ae1d61091362 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -2187,15 +2187,14 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: pcmpgtb %xmm1, %xmm3
-; SSE41-NEXT: pxor %xmm4, %xmm4
-; SSE41-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT: paddw %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5],xmm2[6],xmm4[7]
-; SSE41-NEXT: psrlw $8, %xmm2
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,2,2,2,2,128,2,128]
; SSE41-NEXT: psrlw $8, %xmm3
+; SSE41-NEXT: paddw %xmm4, %xmm4
+; SSE41-NEXT: pmovsxbw %xmm1, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2],xmm2[3,4,5],xmm4[6],xmm2[7]
+; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: packuswb %xmm3, %xmm2
; SSE41-NEXT: paddb %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
@@ -2223,15 +2222,15 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3,4,5],xmm4[6],xmm3[7]
-; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,2,2,2,2,128,2,128]
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpmovsxbw %xmm0, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5],xmm2[6],xmm3[7]
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
index c0c93646b5aaf..1a5c3730c1839 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
@@ -273,8 +273,7 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4
-; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,16,241,57,27,205,135,187]
@@ -711,8 +710,7 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm5
-; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,16,241,57,27,205,135,187]
``````````
</details>
https://github.com/llvm/llvm-project/pull/133102
More information about the llvm-commits
mailing list