[llvm] r304153 - [DAGCombiner] fix load narrowing transform to exclude loads with extension
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon May 29 06:24:59 PDT 2017
Author: spatel
Date: Mon May 29 08:24:58 2017
New Revision: 304153
URL: http://llvm.org/viewvc/llvm-project?rev=304153&view=rev
Log:
[DAGCombiner] fix load narrowing transform to exclude loads with extension
The extending load possibility was missed in:
https://reviews.llvm.org/rL304072
We might want to handle this cases as a follow-up, but bailing out for now
to avoid miscompiling.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/vector-sext.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=304153&r1=304152&r2=304153&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon May 29 08:24:58 2017
@@ -14567,7 +14567,8 @@ static SDValue narrowExtractedVectorLoad
// extract instead or remove that condition entirely.
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx)
+ if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
+ !ExtIdx)
return SDValue();
// The narrow load will be offset from the base address of the old load if
Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=304153&r1=304152&r2=304153&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Mon May 29 08:24:58 2017
@@ -1749,6 +1749,62 @@ entry:
ret <4 x i64> %Y
}
+define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_4i8_to_4i64_extract:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsbq 3(%rdi), %rax
+; SSE2-NEXT: movq %rax, %xmm1
+; SSE2-NEXT: movsbq 2(%rdi), %rax
+; SSE2-NEXT: movq %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_4i8_to_4i64_extract:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movsbq 3(%rdi), %rax
+; SSSE3-NEXT: movq %rax, %xmm1
+; SSSE3-NEXT: movsbq 2(%rdi), %rax
+; SSSE3-NEXT: movq %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_4i8_to_4i64_extract:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_sext_4i8_to_4i64_extract:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0
+; X32-SSE41-NEXT: retl
+ %ld = load <4 x i8>, <4 x i8>* %ptr
+ %sext = sext <4 x i8> %ld to <4 x i64>
+ %extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+ ret <2 x i64> %extract
+}
+
define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-LABEL: load_sext_8i1_to_8i16:
; SSE2: # BB#0: # %entry
More information about the llvm-commits
mailing list