[PATCH] D87788: [DAGCombiner] Teach visitMLOAD to replace an all ones mask with an unmasked load
Craig Topper via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 16 13:27:54 PDT 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rG89ee4c0314bd: [DAGCombiner] Teach visitMLOAD to replace an all ones mask with an unmasked load (authored by craig.topper).
Changed prior to commit:
https://reviews.llvm.org/D87788?vs=292311&id=292328#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D87788/new/
https://reviews.llvm.org/D87788
Files:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/masked_load.ll
Index: llvm/test/CodeGen/X86/masked_load.ll
===================================================================
--- llvm/test/CodeGen/X86/masked_load.ll
+++ llvm/test/CodeGen/X86/masked_load.ll
@@ -6171,25 +6171,10 @@
; SSE-NEXT: movups (%rdi), %xmm0
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: mload_constmask_v4f32_all:
-; AVX1OR2: ## %bb.0:
-; AVX1OR2-NEXT: vmovups (%rdi), %xmm0
-; AVX1OR2-NEXT: retq
-;
-; AVX512F-LABEL: mload_constmask_v4f32_all:
-; AVX512F: ## %bb.0:
-; AVX512F-NEXT: movw $15, %ax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z}
-; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: mload_constmask_v4f32_all:
-; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: kxnorw %k0, %k0, %k1
-; AVX512VL-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
-; AVX512VL-NEXT: retq
+; AVX-LABEL: mload_constmask_v4f32_all:
+; AVX: ## %bb.0:
+; AVX-NEXT: vmovups (%rdi), %xmm0
+; AVX-NEXT: retq
%res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float>undef)
ret <4 x float> %res
}
@@ -6573,7 +6558,7 @@
ret <8 x double> %res
}
-; FIXME: We should be able to detect the mask is all ones after type
+; Make sure we detect the mask is all ones after type
; legalization to use an unmasked load for some of the avx512 instructions.
define <16 x double> @mload_constmask_v16f64_allones_split(<16 x double>* %addr, <16 x double> %dst) {
; SSE-LABEL: mload_constmask_v16f64_allones_split:
@@ -6611,29 +6596,26 @@
;
; AVX512F-LABEL: mload_constmask_v16f64_allones_split:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: kxnorw %k0, %k0, %k1
-; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1}
; AVX512F-NEXT: movb $85, %al
; AVX512F-NEXT: kmovw %eax, %k1
; AVX512F-NEXT: vmovupd 64(%rdi), %zmm1 {%k1}
+; AVX512F-NEXT: vmovups (%rdi), %zmm0
; AVX512F-NEXT: retq
;
; AVX512VLDQ-LABEL: mload_constmask_v16f64_allones_split:
; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: kxnorw %k0, %k0, %k1
-; AVX512VLDQ-NEXT: vmovupd (%rdi), %zmm0 {%k1}
; AVX512VLDQ-NEXT: movb $85, %al
; AVX512VLDQ-NEXT: kmovw %eax, %k1
; AVX512VLDQ-NEXT: vmovupd 64(%rdi), %zmm1 {%k1}
+; AVX512VLDQ-NEXT: vmovups (%rdi), %zmm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: mload_constmask_v16f64_allones_split:
; AVX512VLBW: ## %bb.0:
-; AVX512VLBW-NEXT: kxnorw %k0, %k0, %k1
-; AVX512VLBW-NEXT: vmovupd (%rdi), %zmm0 {%k1}
; AVX512VLBW-NEXT: movb $85, %al
; AVX512VLBW-NEXT: kmovd %eax, %k1
; AVX512VLBW-NEXT: vmovupd 64(%rdi), %zmm1 {%k1}
+; AVX512VLBW-NEXT: vmovups (%rdi), %zmm0
; AVX512VLBW-NEXT: retq
%res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %addr, i32 4, <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <16 x double> %dst)
ret <16 x double> %res
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9272,6 +9272,16 @@
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, expanding, or extending loads?
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ MLD->isUnindexed() && !MLD->isExpandingLoad() &&
+ MLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
+ MLD->getBasePtr(), MLD->getMemOperand());
+ return CombineTo(N, NewLd, NewLd.getValue(1));
+ }
+
// Try transforming N to an indexed load.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D87788.292328.patch
Type: text/x-patch
Size: 4100 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200916/b929be39/attachment.bin>
More information about the llvm-commits
mailing list