[llvm] [DAG] narrowExtractedVectorBinOp - ensure we limit late node creation to LegalOperations only (PR #72130)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 13 08:14:38 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Avoids infinite issues in some upcoming patches to help D152928
---
Full diff: https://github.com/llvm/llvm-project/pull/72130.diff
6 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+2-1)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+10)
- (modified) llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll (+6-7)
- (modified) llvm/test/CodeGen/X86/avx512-insert-extract.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+10-7)
- (modified) llvm/test/CodeGen/X86/kshift.ll (+7-5)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a867d88f76c0cf6..f54d45d883d1927 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23960,7 +23960,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
WideNumElts / NarrowingRatio);
- if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
+ if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
+ LegalOperations))
return SDValue();
// If extraction is cheap, we don't need to look at the binop operands
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3d44c50b44e6234..99f225a7d702981 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41371,8 +41371,18 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
case X86ISD::UNPCKH:
case X86ISD::BLENDI:
// Integer ops.
+ case X86ISD::PCMPEQ:
+ case X86ISD::PCMPGT:
case X86ISD::PACKSS:
case X86ISD::PACKUS:
+ case X86ISD::VSHLV:
+ case X86ISD::VSRLV:
+ case X86ISD::VSRAV:
+ // Float Ops.
+ case X86ISD::FMAX:
+ case X86ISD::FMIN:
+ case X86ISD::FMAXC:
+ case X86ISD::FMINC:
// Horizontal Ops.
case X86ISD::HADD:
case X86ISD::HSUB:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index df35b4ecb3d6623..5e477e8947d1b86 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -1860,15 +1860,14 @@ define i64 @umaxv_v3i64(<3 x i64> %a) {
; CHECK-LABEL: umaxv_v3i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov v3.16b, v2.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov v3.16b, v0.16b
-; CHECK-NEXT: mov v4.16b, v2.16b
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: mov v3.d[1], v1.d[0]
-; CHECK-NEXT: mov v4.d[1], xzr
-; CHECK-NEXT: cmhi v3.2d, v3.2d, v4.2d
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: mov v3.d[1], xzr
+; CHECK-NEXT: cmhi v3.2d, v0.2d, v3.2d
; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: bif v0.8b, v2.8b, v3.8b
+; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b
; CHECK-NEXT: and v1.8b, v1.8b, v4.8b
; CHECK-NEXT: cmhi d2, d0, d1
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
@@ -1930,4 +1929,4 @@ define i128 @umaxv_v2i128(<2 x i128> %a) {
entry:
%arg1 = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)
ret i128 %arg1
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 40cbbfe2c14e5d4..4a2dd7673f4e767 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -1077,11 +1077,10 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) nounwin
; KNL-LABEL: test_extractelement_v64i1:
; KNL: ## %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1113,11 +1112,10 @@ define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) nounwind
; KNL-LABEL: extractelement_v64i1_alt:
; KNL: ## %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
index 080ad3a7b0b463e..69f893ac829d603 100644
--- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -1073,18 +1073,21 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
;
; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: subq $40, %rsp
-; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
-; CHECK-AVX2-NEXT: vpsllvd %xmm0, %xmm1, %xmm0
-; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-AVX2-NEXT: subq $56, %rsp
+; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [2,2,0,0,2,2,0,0]
+; CHECK-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
+; CHECK-AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
+; CHECK-AVX2-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
+; CHECK-AVX2-NEXT: vzeroupper
; CHECK-AVX2-NEXT: callq __truncsfhf2 at PLT
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-AVX2-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK-AVX2-NEXT: vpextrw $0, %xmm0, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
+; CHECK-AVX2-NEXT: vzeroupper
; CHECK-AVX2-NEXT: callq __truncsfhf2 at PLT
; CHECK-AVX2-NEXT: callq __extendhfsf2 at PLT
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1097,7 +1100,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
; CHECK-AVX2-NEXT: callq __truncsfhf2 at PLT
; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; CHECK-AVX2-NEXT: addq $40, %rsp
+; CHECK-AVX2-NEXT: addq $56, %rsp
; CHECK-AVX2-NEXT: retq
;
; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
diff --git a/llvm/test/CodeGen/X86/kshift.ll b/llvm/test/CodeGen/X86/kshift.ll
index f4efacc1946cff9..0acf82f5a144a2e 100644
--- a/llvm/test/CodeGen/X86/kshift.ll
+++ b/llvm/test/CodeGen/X86/kshift.ll
@@ -270,10 +270,11 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
+; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
@@ -563,13 +564,14 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; KNL-LABEL: kshiftr_v64i1_63:
; KNL: # %bb.0:
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k1
-; KNL-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
``````````
</details>
https://github.com/llvm/llvm-project/pull/72130
More information about the llvm-commits
mailing list