[llvm-branch-commits] [llvm-branch] r339106 - Merging r338915:
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 7 00:40:45 PDT 2018
Author: hans
Date: Tue Aug 7 00:40:45 2018
New Revision: 339106
URL: http://llvm.org/viewvc/llvm-project?rev=339106&view=rev
Log:
Merging r338915:
------------------------------------------------------------------------
r338915 | ctopper | 2018-08-03 22:14:18 +0200 (Fri, 03 Aug 2018) | 5 lines
[SelectionDAG] Teach LegalizeVectorTypes to widen the mask input to a masked store.
The mask operand is visited before the data operand so we need to be able to widen it.
Fixes PR38436.
------------------------------------------------------------------------
Modified:
llvm/branches/release_70/ (props changed)
llvm/branches/release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/branches/release_70/test/CodeGen/X86/masked_memop.ll
Propchange: llvm/branches/release_70/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Aug 7 00:40:45 2018
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,338552,338554,338569,338599,338610,338658,338665,338682,338703,338709,338751,338762,338817,338968
+/llvm/trunk:155241,338552,338554,338569,338599,338610,338658,338665,338682,338703,338709,338751,338762,338817,338915,338968
Modified: llvm/branches/release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=339106&r1=339105&r2=339106&view=diff
==============================================================================
--- llvm/branches/release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/branches/release_70/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Tue Aug 7 00:40:45 2018
@@ -3641,26 +3641,43 @@ SDValue DAGTypeLegalizer::WidenVecOp_STO
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
- assert(OpNo == 3 && "Can widen only data operand of mstore");
+ assert((OpNo == 2 || OpNo == 3) &&
+ "Can widen only data or mask operand of mstore");
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
EVT MaskVT = Mask.getValueType();
SDValue StVal = MST->getValue();
- // Widen the value
- SDValue WideVal = GetWidenedVector(StVal);
SDLoc dl(N);
- // The mask should be widened as well.
- EVT WideVT = WideVal.getValueType();
- EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
- MaskVT.getVectorElementType(),
- WideVT.getVectorNumElements());
- Mask = ModifyToType(Mask, WideMaskVT, true);
+ if (OpNo == 3) {
+ // Widen the value
+ StVal = GetWidenedVector(StVal);
+
+ // The mask should be widened as well.
+ EVT WideVT = StVal.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+ } else {
+ EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ EVT ValueVT = StVal.getValueType();
+ if (getTypeAction(ValueVT) == TargetLowering::TypeWidenVector)
+ StVal = GetWidenedVector(StVal);
+ else {
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(),
+ WideMaskVT.getVectorNumElements());
+ StVal = ModifyToType(StVal, WideVT);
+ }
+ }
assert(Mask.getValueType().getVectorNumElements() ==
- WideVal.getValueType().getVectorNumElements() &&
+ StVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
- return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
+ return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
Mask, MST->getMemoryVT(), MST->getMemOperand(),
false, MST->isCompressingStore());
}
Modified: llvm/branches/release_70/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_70/test/CodeGen/X86/masked_memop.ll?rev=339106&r1=339105&r2=339106&view=diff
==============================================================================
--- llvm/branches/release_70/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/branches/release_70/test/CodeGen/X86/masked_memop.ll Tue Aug 7 00:40:45 2018
@@ -1310,6 +1310,65 @@ define void @trunc_mask(<4 x float> %x,
ret void
}
+; This needs to be widened to v4i32.
+; This used to assert in type legalization. PR38436
+; FIXME: The codegen for AVX512 should use KSHIFT to zero the upper bits of the mask.
+define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) {
+; AVX1-LABEL: widen_masked_store:
+; AVX1: ## %bb.0:
+; AVX1-NEXT: vmovd %edx, %xmm1
+; AVX1-NEXT: vmovd %esi, %xmm2
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX1-NEXT: vmovd %ecx, %xmm2
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX1-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: widen_masked_store:
+; AVX2: ## %bb.0:
+; AVX2-NEXT: vmovd %edx, %xmm1
+; AVX2-NEXT: vmovd %esi, %xmm2
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX2-NEXT: vmovd %ecx, %xmm2
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmaskmovd %xmm0, %xmm1, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: widen_masked_store:
+; AVX512F: ## %bb.0:
+; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
+; AVX512F-NEXT: kshiftlw $12, %k0, %k0
+; AVX512F-NEXT: kshiftrw $12, %k0, %k1
+; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; SKX-LABEL: widen_masked_store:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vmovdqa32 %xmm1, %xmm1 {%k1} {z}
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
+; SKX-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1}
+; SKX-NEXT: retq
+ call void @llvm.masked.store.v3i32(<3 x i32> %v, <3 x i32>* %p, i32 16, <3 x i1> %mask)
+ ret void
+}
+declare void @llvm.masked.store.v3i32(<3 x i32>, <3 x i32>*, i32, <3 x i1>)
+
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
More information about the llvm-branch-commits
mailing list