[llvm] d44b31e - [DAGCombine] Allow DAGCombine to remove dead masked stores.
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 13 08:30:22 PST 2023
Note sure if we have a isImpliedCondition equivalent in DAG, but if so,
using it would generalize the transform added here. If not, might make
sense to add one as a utility?
Philip
On 2/13/23 08:12, Dinar Temirbulatov via llvm-commits wrote:
> Author: Dinar Temirbulatov
> Date: 2023-02-13T16:11:11Z
> New Revision: d44b31eca27ca2b4cd9fc7d44702697f00aac739
>
> URL: https://github.com/llvm/llvm-project/commit/d44b31eca27ca2b4cd9fc7d44702697f00aac739
> DIFF: https://github.com/llvm/llvm-project/commit/d44b31eca27ca2b4cd9fc7d44702697f00aac739.diff
>
> LOG: [DAGCombine] Allow DAGCombine to remove dead masked stores.
>
> Remove a dead masked store if another one has the same base pointer and mask or
> the following store has all true constant mask and size if equal or bigger to
> the first store.
>
> Differential Revision: https://reviews.llvm.org/D143069
>
> Added:
> llvm/test/CodeGen/AArch64/sve-dead-masked-store.ll
>
> Modified:
> llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/test/CodeGen/X86/masked_store.ll
>
> Removed:
>
>
>
> ################################################################################
> diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> index 172c83024f430..ce269004000f8 100644
> --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> @@ -11361,6 +11361,23 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
> if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
> return Chain;
>
> + // Remove a masked store if base pointers and masks are equal.
> + if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
> + if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
> + MST1->isSimple() && MST1->getBasePtr() == Ptr &&
> + !MST->getBasePtr().isUndef() &&
> + ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
> + MST1->getMemoryVT().getStoreSize()) ||
> + ISD::isConstantSplatVectorAllOnes(Mask.getNode())) &&
> + TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
> + MST->getMemoryVT().getStoreSize())) {
> + CombineTo(MST1, MST1->getChain());
> + if (N->getOpcode() != ISD::DELETED_NODE)
> + AddToWorklist(N);
> + return SDValue(N, 0);
> + }
> + }
> +
> // If this is a masked load with an all ones mask, we can use a unmasked load.
> // FIXME: Can we do this for indexed, compressing, or truncating stores?
> if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
>
> diff --git a/llvm/test/CodeGen/AArch64/sve-dead-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-dead-masked-store.ll
> new file mode 100644
> index 0000000000000..b1b4d9d73880e
> --- /dev/null
> +++ b/llvm/test/CodeGen/AArch64/sve-dead-masked-store.ll
> @@ -0,0 +1,77 @@
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
> +
> +define void @dead_masked_store(<vscale x 4 x i32> %val, ptr %a, <vscale x 4 x i1> %mask) {
> +; CHECK-LABEL: dead_masked_store:
> +; CHECK: // %bb.0:
> +; CHECK-NEXT: st1w { z0.s }, p0, [x0]
> +; CHECK-NEXT: ret
> + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + ret void
> +}
> +
> +define void @dead_masked_store_alltrue_same(<vscale x 4 x i32> %val, ptr %a, <vscale x 4 x i1> %mask) {
> +; CHECK-LABEL: dead_masked_store_alltrue_same:
> +; CHECK: // %bb.0:
> +; CHECK-NEXT: ptrue p0.s
> +; CHECK-NEXT: st1w { z0.s }, p0, [x0]
> +; CHECK-NEXT: ret
> + %alltrue.ins = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
> + %alltrue = shufflevector <vscale x 4 x i1> %alltrue.ins, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
> + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, ptr %a, i32 4, <vscale x 4 x i1> %alltrue)
> + ret void
> +}
> +
> +define void @dead_masked_store_alltrue_bigger(<vscale x 4 x i16> %val, <vscale x 4 x i32> %val1, ptr %a, <vscale x 4 x i1> %mask) {
> +; CHECK-LABEL: dead_masked_store_alltrue_bigger:
> +; CHECK: // %bb.0:
> +; CHECK-NEXT: ptrue p0.s
> +; CHECK-NEXT: st1w { z1.s }, p0, [x0]
> +; CHECK-NEXT: ret
> + %alltrue.ins = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
> + %alltrue = shufflevector <vscale x 4 x i1> %alltrue.ins, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
> + call void @llvm.masked.store.nxv4i16(<vscale x 4 x i16> %val, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val1, ptr %a, i32 4, <vscale x 4 x i1> %alltrue)
> + ret void
> +}
> +
> +define void @dead_masked_store_alltrue_smaller(<vscale x 4 x i32> %val, <vscale x 4 x i16> %val1, ptr %a, <vscale x 4 x i1> %mask) {
> +; CHECK-LABEL: dead_masked_store_alltrue_smaller:
> +; CHECK: // %bb.0:
> +; CHECK-NEXT: ptrue p1.s
> +; CHECK-NEXT: st1w { z0.s }, p0, [x0]
> +; CHECK-NEXT: st1h { z1.s }, p1, [x0]
> +; CHECK-NEXT: ret
> + %alltrue.ins = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
> + %alltrue = shufflevector <vscale x 4 x i1> %alltrue.ins, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
> + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + call void @llvm.masked.store.nxv4i16(<vscale x 4 x i16> %val1, ptr %a, i32 4, <vscale x 4 x i1> %alltrue)
> + ret void
> +}
> +
> +define void @dead_masked_store_same_mask_smaller_type(<vscale x 4 x i32> %val, <vscale x 4 x i16> %val1, ptr %a, <vscale x 4 x i1> %mask) {
> +; CHECK-LABEL: dead_masked_store_same_mask_smaller_type:
> +; CHECK: // %bb.0:
> +; CHECK-NEXT: st1w { z0.s }, p0, [x0]
> +; CHECK-NEXT: st1h { z1.s }, p0, [x0]
> +; CHECK-NEXT: ret
> + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + call void @llvm.masked.store.nxv4i16(<vscale x 4 x i16> %val1, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + ret void
> +}
> +
> +define void @dead_masked_store_same_mask_bigger_type(<vscale x 4 x i16> %val, <vscale x 4 x i32> %val1, ptr %a, <vscale x 4 x i1> %mask) {
> +; CHECK-LABEL: dead_masked_store_same_mask_bigger_type:
> +; CHECK: // %bb.0:
> +; CHECK-NEXT: st1h { z0.s }, p0, [x0]
> +; CHECK-NEXT: st1w { z1.s }, p0, [x0]
> +; CHECK-NEXT: ret
> + call void @llvm.masked.store.nxv4i16(<vscale x 4 x i16> %val, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val1, ptr %a, i32 4, <vscale x 4 x i1> %mask)
> + ret void
> +}
> +
> +declare void @llvm.masked.store.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>*, i32, <vscale x 4 x i1>)
> +declare void @llvm.masked.store.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>*, i32, <vscale x 4 x i1>)
>
> diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
> index 3da5fd3acc32b..5e99f7c66a4c7 100644
> --- a/llvm/test/CodeGen/X86/masked_store.ll
> +++ b/llvm/test/CodeGen/X86/masked_store.ll
> @@ -5564,7 +5564,6 @@ define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask)
> ;
> ; AVX1OR2-LABEL: PR11210:
> ; AVX1OR2: ## %bb.0:
> -; AVX1OR2-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi)
> ; AVX1OR2-NEXT: vmaskmovps %xmm1, %xmm2, (%rdi)
> ; AVX1OR2-NEXT: retq
> ;
> @@ -5572,12 +5571,10 @@ define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask)
> ; AVX512F: ## %bb.0:
> ; AVX512F-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
> ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
> -; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
> -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
> -; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k0
> +; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
> +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
> ; AVX512F-NEXT: kshiftlw $12, %k0, %k0
> ; AVX512F-NEXT: kshiftrw $12, %k0, %k1
> -; AVX512F-NEXT: vmovups %zmm0, (%rdi) {%k1}
> ; AVX512F-NEXT: vmovups %zmm1, (%rdi) {%k1}
> ; AVX512F-NEXT: vzeroupper
> ; AVX512F-NEXT: retq
> @@ -5585,15 +5582,13 @@ define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask)
> ; AVX512VLDQ-LABEL: PR11210:
> ; AVX512VLDQ: ## %bb.0:
> ; AVX512VLDQ-NEXT: vpmovd2m %xmm2, %k1
> -; AVX512VLDQ-NEXT: vmovups %xmm0, (%rdi) {%k1}
> ; AVX512VLDQ-NEXT: vmovups %xmm1, (%rdi) {%k1}
> ; AVX512VLDQ-NEXT: retq
> ;
> ; AVX512VLBW-LABEL: PR11210:
> ; AVX512VLBW: ## %bb.0:
> -; AVX512VLBW-NEXT: vpxor %xmm3, %xmm3, %xmm3
> -; AVX512VLBW-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
> -; AVX512VLBW-NEXT: vmovups %xmm0, (%rdi) {%k1}
> +; AVX512VLBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
> +; AVX512VLBW-NEXT: vpcmpgtd %xmm2, %xmm0, %k1
> ; AVX512VLBW-NEXT: vmovups %xmm1, (%rdi) {%k1}
> ; AVX512VLBW-NEXT: retq
> ;
> @@ -5601,7 +5596,6 @@ define void @PR11210(<4 x float> %x, ptr %ptr, <4 x float> %y, <2 x i64> %mask)
> ; X86-AVX512: ## %bb.0:
> ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
> ; X86-AVX512-NEXT: vpmovd2m %xmm2, %k1
> -; X86-AVX512-NEXT: vmovups %xmm0, (%eax) {%k1}
> ; X86-AVX512-NEXT: vmovups %xmm1, (%eax) {%k1}
> ; X86-AVX512-NEXT: retl
> %bc = bitcast <2 x i64> %mask to <4 x i32>
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list