[llvm] f37d9b4 - [X86][FP16] Replace vXi16 to vXf16 instead of v8f16
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 5 03:55:31 PST 2021
Author: Phoebe Wang
Date: 2021-12-05T19:19:11+08:00
New Revision: f37d9b41122292d6758eef489af677ea1afa9436
URL: https://github.com/llvm/llvm-project/commit/f37d9b41122292d6758eef489af677ea1afa9436
DIFF: https://github.com/llvm/llvm-project/commit/f37d9b41122292d6758eef489af677ea1afa9436.diff
LOG: [X86][FP16] Replace vXi16 to vXf16 instead of v8f16
Fixes pr52561
Reviewed By: LuoYuanke
Differential Revision: https://reviews.llvm.org/D114304
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512fp16-mov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 62b2387396bed..f412292dfd071 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36249,9 +36249,10 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
(V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
- : !Subtarget.hasSSE2() ? MVT::v4f32
- : MaskVT;
+ if (MaskEltSize == 16)
+ SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
+ else
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
}
@@ -36300,9 +36301,10 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
- : !Subtarget.hasSSE2() ? MVT::v4f32
- : MaskVT;
+ if (MaskEltSize == 16)
+ SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
+ else
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index 150b763a17cd6..ee3696525c445 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -2025,3 +2025,39 @@ for.body.preheader: ; preds = %entry
for.end: ; preds = %for.body.preheader, %entry
ret void
}
+
+define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind {
+; X64-LABEL: pr52561:
+; X64: # %bb.0:
+; X64-NEXT: vpbroadcastd {{.*#+}} ymm4 = [112,112,112,112,112,112,112,112]
+; X64-NEXT: vpaddd %ymm4, %ymm2, %ymm2
+; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; X64-NEXT: vpaddd %ymm4, %ymm3, %ymm2
+; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: pr52561:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-32, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1
+; X86-NEXT: vpbroadcastd {{.*#+}} ymm3 = [112,112,112,112,112,112,112,112]
+; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm2
+; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm1
+; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
+; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %1 = add <16 x i32> %a, <i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+ %2 = add <16 x i32> %1, %b
+ %3 = and <16 x i32> %2, <i32 65535, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 65535>
+ ret <16 x i32> %3
+}
More information about the llvm-commits
mailing list