[llvm] r352433 - Recommit r352255 "[SelectionDAG][X86] Don't use SEXTLOAD for promoting masked loads in the type legalizer"
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 28 13:38:47 PST 2019
Author: ctopper
Date: Mon Jan 28 13:38:47 2019
New Revision: 352433
URL: http://llvm.org/viewvc/llvm-project?rev=352433&view=rev
Log:
Recommit r352255 "[SelectionDAG][X86] Don't use SEXTLOAD for promoting masked loads in the type legalizer"
This did not cause the buildbot failure it was previously reverted for.
Original commit message:
I'm not sure why we were using SEXTLOAD. EXTLOAD seems more appropriate since we don't care about the upper bits.
This patch changes this and then modifies the X86 post legalization combine to emit a extending shuffle instead of a sign_extend_vector_inreg. Could maybe use an any_extend_vector_inre
On AVX512 targets I think we might be able to use a masked vpmovzx and not have to expand this at all.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/masked_load.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=352433&r1=352432&r2=352433&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Mon Jan 28 13:38:47 2019
@@ -554,7 +554,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getMask(), ExtPassThru, N->getMemoryVT(),
- N->getMemOperand(), ISD::SEXTLOAD);
+ N->getMemOperand(), ISD::EXTLOAD);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=352433&r1=352432&r2=352433&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 28 13:38:47 2019
@@ -37694,7 +37694,7 @@ static SDValue combineMaskedLoad(SDNode
return Blend;
}
- if (Mld->getExtensionType() != ISD::SEXTLOAD)
+ if (Mld->getExtensionType() != ISD::EXTLOAD)
return SDValue();
// Resolve extending loads.
@@ -37764,8 +37764,20 @@ static SDValue combineMaskedLoad(SDNode
Mld->getBasePtr(), NewMask, WidePassThru,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
- SDValue NewVec = getExtendInVec(/*Signed*/true, dl, VT, WideLd, DAG);
- return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
+
+ SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd);
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i * SizeRatio] = i;
+
+ // Can't shuffle using an illegal type.
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+ "WideVecVT should be legal");
+ SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT), ShuffleVec);
+ SlicedVec = DAG.getBitcast(VT, SlicedVec);
+
+ return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true);
}
/// If exactly one element of the mask is set for a non-truncating masked store,
Modified: llvm/trunk/test/CodeGen/X86/masked_load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_load.ll?rev=352433&r1=352432&r2=352433&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_load.ll Mon Jan 28 13:38:47 2019
@@ -1388,7 +1388,7 @@ define <2 x i32> @load_v2i32_v2i32(<2 x
; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_v2i32_v2i32:
@@ -1400,7 +1400,7 @@ define <2 x i32> @load_v2i32_v2i32(<2 x
; AVX2-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX2-NEXT: retq
;
; AVX512F-LABEL: load_v2i32_v2i32:
@@ -1412,7 +1412,7 @@ define <2 x i32> @load_v2i32_v2i32(<2 x
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k1
; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1}
-; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -1423,7 +1423,7 @@ define <2 x i32> @load_v2i32_v2i32(<2 x
; AVX512VLBW-NEXT: vptestnmq %xmm0, %xmm0, %k1
; AVX512VLBW-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; AVX512VLBW-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1}
-; AVX512VLBW-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512VLBW-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
More information about the llvm-commits
mailing list