[llvm] 843d43e - [X86] computeKnownBitsForTargetNode - add X86ISD::VBROADCAST_LOAD handling
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 21 03:48:19 PDT 2022
Author: Simon Pilgrim
Date: 2022-06-21T11:48:01+01:00
New Revision: 843d43e62ae132b723a2ba9606299ba2159c8949
URL: https://github.com/llvm/llvm-project/commit/843d43e62ae132b723a2ba9606299ba2159c8949
DIFF: https://github.com/llvm/llvm-project/commit/843d43e62ae132b723a2ba9606299ba2159c8949.diff
LOG: [X86] computeKnownBitsForTargetNode - add X86ISD::VBROADCAST_LOAD handling
This requires us to override the isTargetCanonicalConstantNode callback introduced in D128144, so we can recognise the various cases where a VBROADCAST_LOAD constant is being reused at different vector widths to prevent infinite loops.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
llvm/test/CodeGen/X86/var-permute-256.ll
llvm/test/CodeGen/X86/vector-pack-128.ll
llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 289c2858b6ad..2c192db9688c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36732,6 +36732,28 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.setAllZero();
break;
}
+ case X86ISD::VBROADCAST_LOAD: {
+ APInt UndefElts;
+ SmallVector<APInt, 16> EltBits;
+ if (getTargetConstantBitsFromNode(Op, BitWidth, UndefElts, EltBits,
+ /*AllowWholeUndefs*/ false,
+ /*AllowPartialUndefs*/ false)) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ for (unsigned I = 0; I != NumElts; ++I) {
+ if (!DemandedElts[I])
+ continue;
+ if (UndefElts[I]) {
+ Known.resetAll();
+ break;
+ }
+ KnownBits Known2 = KnownBits::makeConstant(EltBits[I]);
+ Known = KnownBits::commonBits(Known, Known2);
+ }
+ return;
+ }
+ break;
+ }
}
// Handle target shuffles.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 7f2f2a4c76ae..af110884049b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1160,6 +1160,19 @@ namespace llvm {
APInt &UndefElts,
unsigned Depth) const override;
+ bool isTargetCanonicalConstantNode(SDValue Op) const override {
+ // Peek through bitcasts/extracts/inserts to see if we have a broadcast
+ // vector from memory.
+ while (Op.getOpcode() == ISD::BITCAST ||
+ Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
+ (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Op.getOperand(0).isUndef()))
+ Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
+
+ return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
+ TargetLowering::isTargetCanonicalConstantNode(Op);
+ }
+
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
SDValue unwrapAddress(SDValue N) const override;
diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
index 85ed99831e5e..90889f9c053e 100644
--- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
+++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
@@ -222,9 +222,7 @@ define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $1, %xmm0, %xmm1
-; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882]
; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/var-permute-256.ll b/llvm/test/CodeGen/X86/var-permute-256.ll
index 5ffeb90aa634..6af3126861bd 100644
--- a/llvm/test/CodeGen/X86/var-permute-256.ll
+++ b/llvm/test/CodeGen/X86/var-permute-256.ll
@@ -1184,8 +1184,6 @@ define <4 x i64> @PR50356(<4 x i64> %0, <4 x i32> %1, <4 x i64> %2) unnamed_addr
; AVX2-NEXT: movq %rsp, %rbp
; AVX2-NEXT: andq $-32, %rsp
; AVX2-NEXT: subq $64, %rsp
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [7,7,7,7]
-; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX2-NEXT: vmovd %xmm1, %eax
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
; AVX2-NEXT: andl $3, %eax
@@ -1208,8 +1206,6 @@ define <4 x i64> @PR50356(<4 x i64> %0, <4 x i32> %1, <4 x i64> %2) unnamed_addr
; AVX512-NEXT: andq $-32, %rsp
; AVX512-NEXT: subq $64, %rsp
; AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm3 = [7,7,7,7]
-; AVX512-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX512-NEXT: vmovd %xmm1, %eax
; AVX512-NEXT: vmovaps %ymm0, (%rsp)
; AVX512-NEXT: andl $3, %eax
@@ -1233,7 +1229,6 @@ define <4 x i64> @PR50356(<4 x i64> %0, <4 x i32> %1, <4 x i64> %2) unnamed_addr
; AVX512VL-NEXT: movq %rsp, %rbp
; AVX512VL-NEXT: andq $-32, %rsp
; AVX512VL-NEXT: subq $64, %rsp
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vmovd %xmm1, %eax
; AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
; AVX512VL-NEXT: andl $3, %eax
diff --git a/llvm/test/CodeGen/X86/vector-pack-128.ll b/llvm/test/CodeGen/X86/vector-pack-128.ll
index 4f6c795404a1..b88bb4641b4d 100644
--- a/llvm/test/CodeGen/X86/vector-pack-128.ll
+++ b/llvm/test/CodeGen/X86/vector-pack-128.ll
@@ -160,9 +160,7 @@ define <8 x i16> @concat_trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwi
; AVX2-NEXT: vpsrad $17, %xmm0, %xmm0
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vpackusdw %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: concat_trunc_packssdw_128:
diff --git a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
index 608522e88814..d24fc9acd390 100644
--- a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
@@ -3076,8 +3076,6 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_1(<2 x i64> %a
;
; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrad $1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
More information about the llvm-commits
mailing list