[PATCH] D53737: [x86] commute blendvb with constant condition op to allow load folding
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 26 08:02:14 PDT 2018
This revision was automatically updated to reflect the committed changes.
Closed by commit rL345390: [x86] commute blendvb with constant condition op to allow load folding (authored by spatel, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D53737?vs=171209&id=171307#toc
Repository:
rL LLVM
https://reviews.llvm.org/D53737
Files:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -10068,6 +10068,15 @@
// type.
MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ // x86 allows load folding with blendvb from the 2nd source operand. But
+ // we are still using LLVM select here (see comment below), so that's V1.
+ // If V2 can be load-folded and V1 cannot be load-folded, then commute to
+ // allow that load-folding possibility.
+ if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(V1, V2);
+ }
+
// Compute the VSELECT mask. Note that VSELECT is really confusing in the
// mix of LLVM's code generator and the x86 backend. We tell the code
// generator that boolean values in the elements of an x86 vector register
Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -601,17 +601,15 @@
; SSE41-LABEL: load_fold_pblendvb:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: movdqa (%rdi), %xmm2
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; SSE41-NEXT: pblendvb %xmm0, (%rdi), %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1OR2-LABEL: load_fold_pblendvb:
; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vmovdqa (%rdi), %xmm1
-; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; AVX1OR2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; AVX1OR2-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: load_fold_pblendvb:
Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -1656,9 +1656,8 @@
;
; AVX2-LABEL: load_fold_pblendvb:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa (%rdi), %ymm1
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; AVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: load_fold_pblendvb:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D53737.171307.patch
Type: text/x-patch
Size: 3089 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181026/dacaec43/attachment.bin>
More information about the llvm-commits
mailing list