[llvm] 74b979a - [X86][FP16] Avoid to generate VZEXT_MOVL with i16
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 11 17:32:36 PST 2021
Author: Phoebe Wang
Date: 2021-11-12T09:32:29+08:00
New Revision: 74b979abcd0fd711cb1038b666eabf17f2274fe3
URL: https://github.com/llvm/llvm-project/commit/74b979abcd0fd711cb1038b666eabf17f2274fe3
DIFF: https://github.com/llvm/llvm-project/commit/74b979abcd0fd711cb1038b666eabf17f2274fe3.diff
LOG: [X86][FP16] Avoid to generate VZEXT_MOVL with i16
This fixes the crash due to lacking VZEXT_MOVL support with i16.
Reviewed By: LuoYuanke, RKSimon
Differential Revision: https://reviews.llvm.org/D113661
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512fp16-mov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b796f12baa34..60daca24e728 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36100,8 +36100,9 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
(V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT =
- !Subtarget.hasSSE2() && MaskEltSize == 32 ? MVT::v4f32 : MaskVT;
+ SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
+ : !Subtarget.hasSSE2() ? MVT::v4f32
+ : MaskVT;
return true;
}
}
@@ -36145,11 +36146,14 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
- if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
+ if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
+ (MaskEltSize == 16 && Subtarget.hasFP16())) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
+ : !Subtarget.hasSSE2() ? MVT::v4f32
+ : MaskVT;
return true;
}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index 0b384a4d10c3..7f05b0220981 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -1926,3 +1926,32 @@ define void @load_store_v4f16(<4 x half>* %x, <4 x half>* %y, <4 x half>* %z) {
store <4 x half> %c, <4 x half>* %z
ret void
}
+
+define <8 x half> @test21(half %a, half %b, half %c) nounwind {
+; X64-LABEL: test21:
+; X64: # %bb.0:
+; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; X64-NEXT: vmovsh %xmm2, %xmm3, %xmm2
+; X64-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-NEXT: vpbroadcastw %xmm1, %xmm1
+; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; X64-NEXT: retq
+;
+; X86-LABEL: test21:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm1
+; X86-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm1
+; X86-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X86-NEXT: vpbroadcastw %xmm1, %xmm1
+; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; X86-NEXT: retl
+ %1 = insertelement <8 x half> <half poison, half poison, half poison, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000>, half %a, i32 0
+ %2 = insertelement <8 x half> %1, half %b, i32 1
+ %3 = insertelement <8 x half> %2, half %c, i32 2
+ ret <8 x half> %3
+}
More information about the llvm-commits
mailing list