[PATCH] D103462: [SDAG] allow more cast folding for vector sext-of-setcc
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 1 07:26:05 PDT 2021
spatel created this revision.
spatel added reviewers: lebedev.ri, craig.topper, RKSimon.
Herald added subscribers: ecnelises, pengfei, hiraditya, mcrosier.
spatel requested review of this revision.
Herald added a project: LLVM.
This is a follow-up to D103280 <https://reviews.llvm.org/D103280> that eases the use restrictions, so we can handle the motivating case from:
https://llvm.org/PR50055
The loop code is adapted from similar use checks in ExtendUsesToFormExtLoad() and SliceUpLoad(). I did not see an easier way to filter out non-chain uses of load values.
https://reviews.llvm.org/D103462
Files:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/sext-vsetcc.ll
Index: llvm/test/CodeGen/X86/sext-vsetcc.ll
===================================================================
--- llvm/test/CodeGen/X86/sext-vsetcc.ll
+++ llvm/test/CodeGen/X86/sext-vsetcc.ll
@@ -438,7 +438,7 @@
ret <8 x i32> %sext
}
-; negative test - extra use (TODO)
+; Both uses of the load can be absorbed by the zext-load, so we eliminate the explicit casts.
define <8 x i32> @PR50055(<8 x i8>* %src, <8 x i32>* %dst) nounwind {
; SSE-LABEL: PR50055:
@@ -462,28 +462,13 @@
; SSE-NEXT: movdqa %xmm3, (%rsi)
; SSE-NEXT: retq
;
-; AVX2-LABEL: PR50055:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
-; AVX2-NEXT: vmovdqa %ymm1, (%rsi)
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: PR50055:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
-; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; AVX512-NEXT: vpmovsxbd %xmm1, %ymm1
-; AVX512-NEXT: vmovdqa %ymm1, (%rsi)
-; AVX512-NEXT: retq
+; AVX-LABEL: PR50055:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm1
+; AVX-NEXT: vmovdqa %ymm1, (%rsi)
+; AVX-NEXT: retq
%load = load <8 x i8>, <8 x i8>* %src
%zext = zext <8 x i8> %load to <8 x i32>
%icmp = icmp ne <8 x i8> %load, zeroinitializer
@@ -492,6 +477,8 @@
ret <8 x i32> %zext
}
+; negative test - extra uses must be absorbable by a zext-load.
+
define <8 x i16> @multi_use_different_sizes(<8 x i8>* %src, <8 x i32>* %dst) nounwind {
; SSE-LABEL: multi_use_different_sizes:
; SSE: # %bb.0:
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10826,13 +10826,27 @@
// Match a simple, non-extended load that can be converted to a
// legal zext-load.
- // TODO: Handle more than one use if the other uses are free to zext.
// TODO: Allow widening of an existing zext-load?
- return ISD::isNON_EXTLoad(V.getNode()) &&
- ISD::isUNINDEXEDLoad(V.getNode()) &&
- cast<LoadSDNode>(V)->isSimple() &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, V.getValueType()) &&
- V.hasOneUse();
+ if (!(ISD::isNON_EXTLoad(V.getNode()) &&
+ ISD::isUNINDEXEDLoad(V.getNode()) &&
+ cast<LoadSDNode>(V)->isSimple() &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, V.getValueType())))
+ return false;
+
+ // Non-chain users of this value must either be the setcc in this
+ // sequence or zexts that can be folded into the new zext-load.
+ for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ // Skip uses of the chain and the setcc.
+ SDNode *User = *UI;
+ if (UI.getUse().getResNo() != 0 || User == N0.getNode())
+ continue;
+ // Other users should get folded into the load.
+ if (User->getOpcode() != ISD::ZERO_EXTEND ||
+ User->getValueType(0) != VT)
+ return false;
+ }
+ return true;
};
if (IsFreeToZext(N00) && IsFreeToZext(N01)) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D103462.348971.patch
Type: text/x-patch
Size: 4278 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210601/ca0c7a24/attachment.bin>
More information about the llvm-commits
mailing list