[llvm] r260033 - [X86][AVX512] add intrinsics of Scalar FP to integer conversion with rounding mode

Asaf Badouh via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 7 06:59:14 PST 2016


Author: abadouh
Date: Sun Feb  7 08:59:13 2016
New Revision: 260033

URL: http://llvm.org/viewvc/llvm-project?rev=260033&view=rev
Log:
[X86][AVX512] add intrinsics of Scalar FP to integer conversion with rounding mode

Differential Revision: http://reviews.llvm.org/D16629

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=260033&r1=260032&r2=260033&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sun Feb  7 08:59:13 2016
@@ -5098,10 +5098,6 @@ let TargetPrefix = "x86" in {  // All in
 
 // Conversion ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_cvtss2usi : GCCBuiltin<"__builtin_ia32_cvtss2usi">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
               Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">,
@@ -5116,11 +5112,6 @@ let TargetPrefix = "x86" in {  // All in
   def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">,
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">,
@@ -5135,7 +5126,22 @@ let TargetPrefix = "x86" in {  // All in
   def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
-
+  def int_x86_avx512_vcvtss2usi32 : GCCBuiltin<"__builtin_ia32_vcvtss2usi32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vcvtss2usi64 : GCCBuiltin<"__builtin_ia32_vcvtss2usi64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vcvtss2si32 : GCCBuiltin<"__builtin_ia32_vcvtss2si32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vcvtss2si64 : GCCBuiltin<"__builtin_ia32_vcvtss2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vcvtsd2usi32 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vcvtsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vcvtsd2si32 : GCCBuiltin<"__builtin_ia32_vcvtsd2si32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vcvtsd2si64 : GCCBuiltin<"__builtin_ia32_vcvtsd2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=260033&r1=260032&r2=260033&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Feb  7 08:59:13 2016
@@ -21148,6 +21148,8 @@ const char *X86TargetLowering::getTarget
   case X86ISD::VFPCLASS:           return "X86ISD::VFPCLASS";
   case X86ISD::VFPCLASSS:          return "X86ISD::VFPCLASSS";
   case X86ISD::MULTISHIFT:         return "X86ISD::MULTISHIFT";
+  case X86ISD::SCALAR_FP_TO_SINT_RND: return "X86ISD::SCALAR_FP_TO_SINT_RND";
+  case X86ISD::SCALAR_FP_TO_UINT_RND: return "X86ISD::SCALAR_FP_TO_UINT_RND";
   }
   return nullptr;
 }

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=260033&r1=260032&r2=260033&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sun Feb  7 08:59:13 2016
@@ -470,6 +470,8 @@ namespace llvm {
 
       // Vector float/double to signed/unsigned integer.
       FP_TO_SINT_RND, FP_TO_UINT_RND,
+      // Scalar float/double to signed/unsigned integer.
+      SCALAR_FP_TO_SINT_RND, SCALAR_FP_TO_UINT_RND,
       // Save xmm argument registers to the stack, according to %al. An operator
       // is needed so that this can be expanded with control flow.
       VASTART_SAVE_XMM_REGS,

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=260033&r1=260032&r2=260033&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Feb  7 08:59:13 2016
@@ -4945,54 +4945,66 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
 //===----------------------------------------------------------------------===//
 // AVX-512  Scalar convert from float/double to integer
 //===----------------------------------------------------------------------===//
-multiclass avx512_cvt_s_int_round<bits<8> opc, RegisterClass SrcRC,
-                                  RegisterClass DstRC, Intrinsic Int,
-                           Operand memop, ComplexPattern mem_cpat, string asm> {
+multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT ,
+                                  X86VectorVTInfo DstVT, SDNode OpNode, string asm> {
   let hasSideEffects = 0, Predicates = [HasAVX512] in {
-    def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+    def rr : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-                [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
-    def rb : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
-                !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), []>,
+                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
+                EVEX, VEX_LIG;
+    def rb : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
+                !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
+                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>, 
                 EVEX, VEX_LIG, EVEX_B, EVEX_RC;
     let mayLoad = 1 in
-    def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
-                !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
-  } // hasSideEffects = 0, Predicates = [HasAVX512]
+    def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
+                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+                [(set DstVT.RC:$dst, (OpNode 
+                      (SrcVT.VT (scalar_to_vector (SrcVT.ScalarLdFrag addr:$src))),
+                      (i32 FROUND_CURRENT)))]>, 
+                EVEX, VEX_LIG;
+  } // hasSideEffects = 0, Predicates = [HasAVX512] 
 }
 
 // Convert float/double to signed/unsigned int 32/64
-defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
-                                   ssmem, sse_load_f32, "cvtss2si">,
+defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info, 
+                                   X86cvtss2si, "cvtss2si">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
-                                  int_x86_sse_cvtss2si64,
-                                   ssmem, sse_load_f32, "cvtss2si">,
+defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, 
+                                   X86cvtss2si, "cvtss2si">,
                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32,
-                                  int_x86_avx512_cvtss2usi,
-                                   ssmem, sse_load_f32, "cvtss2usi">,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, 
+                                   X86cvtss2usi, "cvtss2usi">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
-                                   int_x86_avx512_cvtss2usi64, ssmem,
-                                   sse_load_f32, "cvtss2usi">, XS, VEX_W,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, 
+                                   X86cvtss2usi, "cvtss2usi">, XS, VEX_W,
                                    EVEX_CD8<32, CD8VT1>;
-defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
-                                   sdmem, sse_load_f64, "cvtsd2si">,
+defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, 
+                                   X86cvtsd2si, "cvtsd2si">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
-                                   int_x86_sse2_cvtsd2si64,
-                                   sdmem, sse_load_f64, "cvtsd2si">,
+defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
+                                   X86cvtsd2si, "cvtsd2si">,
                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, VR128X, GR32,
-                                   int_x86_avx512_cvtsd2usi,
-                                   sdmem, sse_load_f64, "cvtsd2usi">,
+defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
+                                   X86cvtsd2usi, "cvtsd2usi">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
-                                   int_x86_avx512_cvtsd2usi64, sdmem,
-                                   sse_load_f64, "cvtsd2usi">, XD, VEX_W,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
+                                   X86cvtsd2usi, "cvtsd2usi">, XD, VEX_W,
                                    EVEX_CD8<64, CD8VT1>;
 
+// The SSE version of these instructions are disabled for AVX512.
+// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
+let Predicates = [HasAVX512] in {
+  def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
+            (VCVTSS2SIZrr (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+  def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
+            (VCVTSS2SI64Zrr (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+  def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
+            (VCVTSD2SIZrr (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
+            (VCVTSD2SI64Zrr (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+} // HasAVX512
+
 let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
   defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
             int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=260033&r1=260032&r2=260033&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sun Feb  7 08:59:13 2016
@@ -542,6 +542,12 @@ def X86cvttss2IntRnd      : SDNode<"X86I
 def X86cvttss2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTSFloatToIntRnd>;
 def X86cvttsd2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTSDoubleToIntRnd>;
 def X86cvttsd2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTSDoubleToIntRnd>;
+
+def  X86cvtsd2si  : SDNode<"X86ISD::SCALAR_FP_TO_SINT_RND", SDTSDoubleToIntRnd>;
+def  X86cvtsd2usi : SDNode<"X86ISD::SCALAR_FP_TO_UINT_RND", SDTSDoubleToIntRnd>;
+def  X86cvtss2si  : SDNode<"X86ISD::SCALAR_FP_TO_SINT_RND", SDTSFloatToIntRnd>;
+def  X86cvtss2usi : SDNode<"X86ISD::SCALAR_FP_TO_UINT_RND", SDTSFloatToIntRnd>;
+
 // Vector with rounding mode
 
 // cvtt fp-to-int staff

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=260033&r1=260032&r2=260033&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sun Feb  7 08:59:13 2016
@@ -2135,6 +2135,22 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
   X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_FP_TO_UINT_RND, 0),
   X86_INTRINSIC_DATA(fma_vfmadd_pd,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(fma_vfmadd_pd_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(fma_vfmadd_ps,        INTR_TYPE_3OP, X86ISD::FMADD, 0),

Modified: llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp?rev=260033&r1=260032&r2=260033&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp Sun Feb  7 08:59:13 2016
@@ -2275,10 +2275,10 @@ struct MemorySanitizerVisitor : public I
     case llvm::Intrinsic::bswap:
       handleBswap(I);
       break;
-    case llvm::Intrinsic::x86_avx512_cvtsd2usi64:
-    case llvm::Intrinsic::x86_avx512_cvtsd2usi:
-    case llvm::Intrinsic::x86_avx512_cvtss2usi64:
-    case llvm::Intrinsic::x86_avx512_cvtss2usi:
+    case llvm::Intrinsic::x86_avx512_vcvtsd2usi64:
+    case llvm::Intrinsic::x86_avx512_vcvtsd2usi32:
+    case llvm::Intrinsic::x86_avx512_vcvtss2usi64:
+    case llvm::Intrinsic::x86_avx512_vcvtss2usi32:
     case llvm::Intrinsic::x86_avx512_cvttss2usi64:
     case llvm::Intrinsic::x86_avx512_cvttss2usi:
     case llvm::Intrinsic::x86_avx512_cvttsd2usi64:

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=260033&r1=260032&r2=260033&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun Feb  7 08:59:13 2016
@@ -424,12 +424,154 @@ declare i64 @llvm.x86.avx512.cvttss2usi6
 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtsd2usi %xmm0, %rax
+; CHECK-NEXT:    vcvtsd2usi %xmm0, %rcx
+; CHECK-NEXT:    vcvtsd2usi      {rz-sae}, %xmm0, %rax
+; CHECK-NEXT:    vcvtsd2usi      {rd-sae}, %xmm0, %rdx
+; CHECK-NEXT:    addq    %rcx, %rax
+; CHECK-NEXT:    addq    %rdx, %rax
 ; CHECK-NEXT:    retq
-  %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
-  ret i64 %res
+
+  %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4)
+  %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3)
+  %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1)
+  %res3 = add i64 %res, %res1
+  %res4 = add i64 %res3, %res2
+  ret i64 %res4
+}
+declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvtsd2si64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsd2si %xmm0, %rcx
+; CHECK-NEXT:    vcvtsd2si      {rz-sae}, %xmm0, %rax
+; CHECK-NEXT:    vcvtsd2si      {rd-sae}, %xmm0, %rdx
+; CHECK-NEXT:    addq    %rcx, %rax
+; CHECK-NEXT:    addq    %rdx, %rax
+; CHECK-NEXT:    retq
+
+  %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4)
+  %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3)
+  %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1)
+  %res3 = add i64 %res, %res1
+  %res4 = add i64 %res3, %res2
+  ret i64 %res4
+}
+declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvtss2usi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtss2usi %xmm0, %rcx
+; CHECK-NEXT:    vcvtss2usi      {rz-sae}, %xmm0, %rax
+; CHECK-NEXT:    vcvtss2usi      {rd-sae}, %xmm0, %rdx
+; CHECK-NEXT:    addq    %rcx, %rax
+; CHECK-NEXT:    addq    %rdx, %rax
+; CHECK-NEXT:    retq
+
+  %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4)
+  %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3)
+  %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1)
+  %res3 = add i64 %res, %res1
+  %res4 = add i64 %res3, %res2
+  ret i64 %res4
+}
+declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvtss2si64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtss2si %xmm0, %rcx
+; CHECK-NEXT:    vcvtss2si      {rz-sae}, %xmm0, %rax
+; CHECK-NEXT:    vcvtss2si      {rd-sae}, %xmm0, %rdx
+; CHECK-NEXT:    addq    %rcx, %rax
+; CHECK-NEXT:    addq    %rdx, %rax
+; CHECK-NEXT:    retq
+
+  %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4)
+  %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3)
+  %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1)
+  %res3 = add i64 %res, %res1
+  %res4 = add i64 %res3, %res2
+  ret i64 %res4
+}
+declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvtsd2usi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsd2usi %xmm0, %ecx
+; CHECK-NEXT:    vcvtsd2usi      {rz-sae}, %xmm0, %eax
+; CHECK-NEXT:    vcvtsd2usi      {rd-sae}, %xmm0, %edx
+; CHECK-NEXT:    addl    %ecx, %eax
+; CHECK-NEXT:    addl    %edx, %eax
+; CHECK-NEXT:    retq
+
+  %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
+  %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3)
+  %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1)
+  %res3 = add i32 %res, %res1
+  %res4 = add i32 %res3, %res2
+  ret i32 %res4
+}
+declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvtsd2si32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsd2si %xmm0, %ecx
+; CHECK-NEXT:    vcvtsd2si      {rz-sae}, %xmm0, %eax
+; CHECK-NEXT:    vcvtsd2si      {rd-sae}, %xmm0, %edx
+; CHECK-NEXT:    addl    %ecx, %eax
+; CHECK-NEXT:    addl    %edx, %eax
+; CHECK-NEXT:    retq
+
+  %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
+  %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3)
+  %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1)
+  %res3 = add i32 %res, %res1
+  %res4 = add i32 %res3, %res2
+  ret i32 %res4
+}
+declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvtss2usi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtss2usi %xmm0, %ecx
+; CHECK-NEXT:    vcvtss2usi      {rz-sae}, %xmm0, %eax
+; CHECK-NEXT:    vcvtss2usi      {rd-sae}, %xmm0, %edx
+; CHECK-NEXT:    addl    %ecx, %eax
+; CHECK-NEXT:    addl    %edx, %eax
+; CHECK-NEXT:    retq
+
+  %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
+  %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3)
+  %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1)
+  %res3 = add i32 %res, %res1
+  %res4 = add i32 %res3, %res2
+  ret i32 %res4
+}
+declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvtss2si32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtss2si %xmm0, %ecx
+; CHECK-NEXT:    vcvtss2si      {rz-sae}, %xmm0, %eax
+; CHECK-NEXT:    vcvtss2si      {rd-sae}, %xmm0, %edx
+; CHECK-NEXT:    addl    %ecx, %eax
+; CHECK-NEXT:    addl    %edx, %eax
+; CHECK-NEXT:    retq
+
+  %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
+  %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3)
+  %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1)
+  %res3 = add i32 %res, %res1
+  %res4 = add i32 %res3, %res2
+  ret i32 %res4
 }
-declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
+declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone
 
 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
 ; CHECK-LABEL: test_x86_vcvtph2ps_512:




More information about the llvm-commits mailing list