[llvm] 35f7cbf - [X86] Don't crash on CVTPS2PH with wide vector inputs.

Tue Oct 27 06:42:24 PDT 2020

Author: Benjamin Kramer
Date: 2020-10-27T14:42:02+01:00
New Revision: 35f7cbf9dfeced33561c811fb1a3b750a2e35640

URL: https://github.com/llvm/llvm-project/commit/35f7cbf9dfeced33561c811fb1a3b750a2e35640
DIFF: https://github.com/llvm/llvm-project/commit/35f7cbf9dfeced33561c811fb1a3b750a2e35640.diff

LOG: [X86] Don't crash on CVTPS2PH with wide vector inputs.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-half-conversions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b508c2b36756..fb9fc8444213 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29878,6 +29878,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(Chain);
     return;
   }
+  case X86ISD::CVTPS2PH:
+    Results.push_back(LowerCVTPS2PH(SDValue(N, 0), DAG));
+    return;
   case ISD::CTPOP: {
     assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
     // Use a v2i64 if possible.

diff  --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll
index a4be7337f145..abaff7176ed0 100644
--- a/llvm/test/CodeGen/X86/vector-half-conversions.ll
+++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll
@@ -1422,3 +1422,33 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind {
   store <8 x i16> %2, <8 x i16>* %a1
   ret void
 }
+
+define void @store_cvt_32f32_to_32f16(<32 x float> %a0, <32 x half>* %a1) nounwind {
+; AVX1-LABEL: store_cvt_32f32_to_32f16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vcvtps2ph $4, %ymm3, 48(%rdi)
+; AVX1-NEXT:    vcvtps2ph $4, %ymm2, 32(%rdi)
+; AVX1-NEXT:    vcvtps2ph $4, %ymm1, 16(%rdi)
+; AVX1-NEXT:    vcvtps2ph $4, %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: store_cvt_32f32_to_32f16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vcvtps2ph $4, %ymm3, 48(%rdi)
+; AVX2-NEXT:    vcvtps2ph $4, %ymm2, 32(%rdi)
+; AVX2-NEXT:    vcvtps2ph $4, %ymm1, 16(%rdi)
+; AVX2-NEXT:    vcvtps2ph $4, %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: store_cvt_32f32_to_32f16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vcvtps2ph $4, %zmm1, 32(%rdi)
+; AVX512-NEXT:    vcvtps2ph $4, %zmm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %1 = fptrunc <32 x float> %a0 to <32 x half>
+  store <32 x half> %1, <32 x half>* %a1
+  ret void
+}