[PATCH] D37446: [x86] eliminate unnecessary vector compare for AVX masked store
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 12 16:25:39 PDT 2017
This revision was automatically updated to reflect the committed changes.
Closed by commit rL313089: [x86] eliminate unnecessary vector compare for AVX masked store (authored by spatel).
Changed prior to commit:
https://reviews.llvm.org/D37446?vs=114028&id=114936#toc
Repository:
rL LLVM
https://reviews.llvm.org/D37446
Files:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/masked_memop.ll
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -33145,8 +33145,33 @@
if (Mst->isCompressingStore())
return SDValue();
- if (!Mst->isTruncatingStore())
- return reduceMaskedStoreToScalarStore(Mst, DAG);
+ if (!Mst->isTruncatingStore()) {
+ if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
+ return ScalarStore;
+
+ // If the mask is checking (0 > X), we're creating a vector with all-zeros
+ // or all-ones elements based on the sign bits of X. AVX1 masked store only
+ // cares about the sign bit of each mask element, so eliminate the compare:
+ // mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X
+ // Note that by waiting to match an x86-specific PCMPGT node, we're
+ // eliminating potentially more complex matching of a setcc node which has
+ // a full range of predicates.
+ SDValue Mask = Mst->getMask();
+ if (Mask.getOpcode() == X86ISD::PCMPGT &&
+ ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) {
+ assert(Mask.getValueType() == Mask.getOperand(1).getValueType() &&
+ "Unexpected type for PCMPGT");
+ return DAG.getMaskedStore(
+ Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(),
+ Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand());
+ }
+
+ // TODO: AVX512 targets should also be able to simplify something like the
+ // pattern above, but that pattern will be different. It will either need to
+ // match setcc more generally or match PCMPGTM later (in tablegen?).
+
+ return SDValue();
+ }
// Resolve truncating stores.
EVT VT = Mst->getValue().getValueType();
Index: llvm/trunk/test/CodeGen/X86/masked_memop.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll
@@ -1140,21 +1140,18 @@
ret <8 x double> %res
}
-; FIXME: The mask bit for each data element is the most significant bit of the mask operand, so a compare isn't needed.
+; The mask bit for each data element is the most significant bit of the mask operand, so a compare isn't needed.
+; FIXME: The AVX512 code should be improved to use 'vpmovd2m'. Add tests for 512-bit vectors when implementing that.
define void @trunc_mask(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x i32> %mask) {
; AVX-LABEL: trunc_mask:
; AVX: ## BB#0:
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
+; AVX-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi)
; AVX-NEXT: retq
;
; AVX512F-LABEL: trunc_mask:
; AVX512F: ## BB#0:
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX512F-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
+; AVX512F-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi)
; AVX512F-NEXT: retq
;
; SKX-LABEL: trunc_mask:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D37446.114936.patch
Type: text/x-patch
Size: 3116 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170912/541f07a4/attachment.bin>
More information about the llvm-commits
mailing list