[llvm] r271510 - [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (llvm)

Wed Jul 6 00:16:01 PDT 2016

Sorry this review comment is a little late... but doesn't LangRef say "If
the value won’t fit in the integer type, the results are undefined."?

-Eli

On Thu, Jun 2, 2016 at 3:55 AM, Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: rksimon
> Date: Thu Jun  2 05:55:21 2016
> New Revision: 271510
>
> URL: http://llvm.org/viewvc/llvm-project?rev=271510&view=rev
> Log:
> [X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero)
> f32/f64 to i32 with generic IR (llvm)
>
> This patch removes the llvm intrinsics (V)CVTTPS2DQ and VCVTTPD2DQ
> truncation (round to zero) conversions and auto-upgrades to FP_TO_SINT
> calls instead.
>
> Note: I looked at updating CVTTPD2DQ as well but this still requires a lot
> more work to correctly lower.
>
> Differential Revision: http://reviews.llvm.org/D20860
>
> Modified:
>     llvm/trunk/include/llvm/IR/IntrinsicsX86.td
>     llvm/trunk/lib/IR/AutoUpgrade.cpp
>     llvm/trunk/lib/Target/X86/X86InstrSSE.td
>     llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
>     llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
>     llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
>     llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
>     llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
>     llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
>
> Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Jun  2 05:55:21 2016
> @@ -488,8 +488,6 @@ let TargetPrefix = "x86" in {  // All in
>                Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
>    def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
>                Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -  def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
> -              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
>    def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
>                Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
>    def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
> @@ -1725,12 +1723,8 @@ let TargetPrefix = "x86" in {  // All in
>          Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
>    def int_x86_avx_cvt_ps2dq_256 :
> GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
>          Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
> -  def int_x86_avx_cvtt_pd2dq_256 :
> GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
> -        Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
>    def int_x86_avx_cvt_pd2dq_256 :
> GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
>          Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
> -  def int_x86_avx_cvtt_ps2dq_256 :
> GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
> -        Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
>  }
>
>  // Vector bit test
>
> Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
> +++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Jun  2 05:55:21 2016
> @@ -185,6 +185,8 @@ static bool UpgradeIntrinsicFunction1(Fu
>          Name == "x86.sse2.cvtps2pd" ||
>          Name == "x86.avx.cvtdq2.pd.256" ||
>          Name == "x86.avx.cvt.ps2.pd.256" ||
> +        Name == "x86.sse2.cvttps2dq" ||
> +        Name.startswith("x86.avx.cvtt.") ||
>          Name.startswith("x86.avx.vinsertf128.") ||
>          Name == "x86.avx2.vinserti128" ||
>          Name.startswith("x86.avx.vextractf128.") ||
> @@ -498,6 +500,12 @@ void llvm::UpgradeIntrinsicCall(CallInst
>          Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
>        else
>          Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
> +    } else if (Name == "llvm.x86.sse2.cvttps2dq" ||
> +               Name.startswith("llvm.x86.avx.cvtt.")) {
> +      // Truncation (round to zero) float/double to i32 vector conversion.
> +      Value *Src = CI->getArgOperand(0);
> +      VectorType *DstTy = cast<VectorType>(CI->getType());
> +      Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt");
>      } else if (Name.startswith("llvm.x86.avx.movnt.")) {
>        Module *M = F->getParent();
>        SmallVector<Metadata *, 1> Elts;
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Jun  2 05:55:21 2016
> @@ -2013,35 +2013,24 @@ def CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (
>  // SSE2 packed instructions with XS prefix
>  def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins
> VR128:$src),
>                           "cvttps2dq\t{$src, $dst|$dst, $src}",
> -                         [(set VR128:$dst,
> -                           (int_x86_sse2_cvttps2dq VR128:$src))],
> -                         IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
> +                         [], IIC_SSE_CVT_PS_RR>, VEX,
> Sched<[WriteCvtF2I]>;
>  def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins
> f128mem:$src),
>                           "cvttps2dq\t{$src, $dst|$dst, $src}",
> -                         [(set VR128:$dst, (int_x86_sse2_cvttps2dq
> -                                            (loadv4f32 addr:$src)))],
> -                         IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
> +                         [], IIC_SSE_CVT_PS_RM>, VEX,
> Sched<[WriteCvtF2ILd]>;
>  def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins
> VR256:$src),
>                            "cvttps2dq\t{$src, $dst|$dst, $src}",
> -                          [(set VR256:$dst,
> -                            (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
> -                          IIC_SSE_CVT_PS_RR>, VEX, VEX_L,
> Sched<[WriteCvtF2I]>;
> +                          [], IIC_SSE_CVT_PS_RR>, VEX, VEX_L,
> Sched<[WriteCvtF2I]>;
>  def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins
> f256mem:$src),
>                            "cvttps2dq\t{$src, $dst|$dst, $src}",
> -                          [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
> -                                             (loadv8f32 addr:$src)))],
> -                          IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
> +                          [], IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
>                            Sched<[WriteCvtF2ILd]>;
>
>  def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins
> VR128:$src),
>                         "cvttps2dq\t{$src, $dst|$dst, $src}",
> -                       [(set VR128:$dst, (int_x86_sse2_cvttps2dq
> VR128:$src))],
> -                       IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
> +                       [], IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
>  def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins
> f128mem:$src),
>                         "cvttps2dq\t{$src, $dst|$dst, $src}",
> -                       [(set VR128:$dst,
> -                         (int_x86_sse2_cvttps2dq (memopv4f32
> addr:$src)))],
> -                       IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
> +                       [], IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
>
>  let Predicates = [HasAVX] in {
>    def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
> @@ -2111,14 +2100,10 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem
>  // YMM only
>  def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins
> VR256:$src),
>                           "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
> -                         [(set VR128:$dst,
> -                           (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
> -                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L,
> Sched<[WriteCvtF2I]>;
> +                         [], IIC_SSE_CVT_PD_RR>, VEX, VEX_L,
> Sched<[WriteCvtF2I]>;
>  def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins
> f256mem:$src),
>                           "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
> -                         [(set VR128:$dst,
> -                          (int_x86_avx_cvtt_pd2dq_256 (loadv4f64
> addr:$src)))],
> -                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L,
> Sched<[WriteCvtF2ILd]>;
> +                         [], IIC_SSE_CVT_PD_RM>, VEX, VEX_L,
> Sched<[WriteCvtF2ILd]>;
>  def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
>                  (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
>
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Thu Jun  2
> 05:55:21 2016
> @@ -675,11 +675,10 @@ define <2 x i64> @test_mm256_cvttpd_epi3
>  ; X64-NEXT:    vcvttpd2dqy %ymm0, %xmm0
>  ; X64-NEXT:    vzeroupper
>  ; X64-NEXT:    retq
> -  %cvt = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0)
> +  %cvt = fptosi <4 x double> %a0 to <4 x i32>
>    %res = bitcast <4 x i32> %cvt to <2 x i64>
>    ret <2 x i64> %res
>  }
> -declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind
> readnone
>
>  define <4 x i64> @test_mm256_cvttps_epi32(<8 x float> %a0) nounwind {
>  ; X32-LABEL: test_mm256_cvttps_epi32:
> @@ -691,11 +690,10 @@ define <4 x i64> @test_mm256_cvttps_epi3
>  ; X64:       # BB#0:
>  ; X64-NEXT:    vcvttps2dq %ymm0, %ymm0
>  ; X64-NEXT:    retq
> -  %cvt = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0)
> +  %cvt = fptosi <8 x float> %a0 to <8 x i32>
>    %res = bitcast <8 x i32> %cvt to <4 x i64>
>    ret <4 x i64> %res
>  }
> -declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind
> readnone
>
>  define <4 x double> @test_mm256_div_pd(<4 x double> %a0, <4 x double>
> %a1) nounwind {
>  ; X32-LABEL: test_mm256_div_pd:
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll Thu Jun  2
> 05:55:21 2016
> @@ -357,12 +357,35 @@ define <4 x double> @test_x86_avx_cvt_ps
>  declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind
> readnone
>
>
> +define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
> +; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
> +; CHECK:       ## BB#0:
> +; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
> +; CHECK-NEXT:    vzeroupper
> +; CHECK-NEXT:    retl
> +  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ;
> <<4 x i32>> [#uses=1]
> +  ret <4 x i32> %res
> +}
> +declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind
> readnone
> +
> +
> +define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
> +; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
> +; CHECK:       ## BB#0:
> +; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
> +; CHECK-NEXT:    retl
> +  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ;
> <<8 x i32>> [#uses=1]
> +  ret <8 x i32> %res
> +}
> +declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind
> readnone
> +
> +
>  define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
>    ; add operation forces the execution domain.
>  ; CHECK-LABEL: test_x86_sse2_storeu_dq:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
> -; CHECK-NEXT:    vpaddb LCPI32_0, %xmm0, %xmm0
> +; CHECK-NEXT:    vpaddb LCPI34_0, %xmm0, %xmm0
>  ; CHECK-NEXT:    vmovdqu %xmm0, (%eax)
>  ; CHECK-NEXT:    retl
>    %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8
> 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Thu Jun  2 05:55:21
> 2016
> @@ -3407,39 +3407,6 @@ define <8 x float> @test_x86_avx_cvtdq2_
>  declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind
> readnone
>
>
> -define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
> -; AVX-LABEL: test_x86_avx_cvtt_pd2dq_256:
> -; AVX:       ## BB#0:
> -; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
> -; AVX-NEXT:    vzeroupper
> -; AVX-NEXT:    retl
> -;
> -; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256:
> -; AVX512VL:       ## BB#0:
> -; AVX512VL-NEXT:    vcvttpd2dqy %ymm0, %xmm0
> -; AVX512VL-NEXT:    retl
> -  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ;
> <<4 x i32>> [#uses=1]
> -  ret <4 x i32> %res
> -}
> -declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind
> readnone
> -
> -
> -define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
> -; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256:
> -; AVX:       ## BB#0:
> -; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
> -; AVX-NEXT:    retl
> -;
> -; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256:
> -; AVX512VL:       ## BB#0:
> -; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0
> -; AVX512VL-NEXT:    retl
> -  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ;
> <<8 x i32>> [#uses=1]
> -  ret <8 x i32> %res
> -}
> -declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind
> readnone
> -
> -
>  define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float>
> %a1) {
>  ; AVX-LABEL: test_x86_avx_dp_ps_256:
>  ; AVX:       ## BB#0:
> @@ -4133,7 +4100,7 @@ define <4 x double> @test_x86_avx_vpermi
>  ;
>  ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
>  ; AVX512VL:       ## BB#0:
> -; AVX512VL-NEXT:    vpermilpd LCPI233_0, %ymm0, %ymm0
> +; AVX512VL-NEXT:    vpermilpd LCPI231_0, %ymm0, %ymm0
>  ; AVX512VL-NEXT:    retl
>    %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>
> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
>    ret <4 x double> %res
> @@ -4625,7 +4592,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
>  ; AVX-LABEL: movnt_dq:
>  ; AVX:       ## BB#0:
>  ; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
> -; AVX-NEXT:    vpaddq LCPI260_0, %xmm0, %xmm0
> +; AVX-NEXT:    vpaddq LCPI258_0, %xmm0, %xmm0
>  ; AVX-NEXT:    vmovntdq %ymm0, (%eax)
>  ; AVX-NEXT:    vzeroupper
>  ; AVX-NEXT:    retl
> @@ -4633,7 +4600,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
>  ; AVX512VL-LABEL: movnt_dq:
>  ; AVX512VL:       ## BB#0:
>  ; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
> -; AVX512VL-NEXT:    vpaddq LCPI260_0, %xmm0, %xmm0
> +; AVX512VL-NEXT:    vpaddq LCPI258_0, %xmm0, %xmm0
>  ; AVX512VL-NEXT:    vmovntdq %ymm0, (%eax)
>  ; AVX512VL-NEXT:    retl
>    %a2 = add <2 x i64> %a1, <i64 1, i64 1>
>
> Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll Thu Jun  2
> 05:55:21 2016
> @@ -1280,11 +1280,10 @@ define <2 x i64> @test_mm_cvttps_epi32(<
>  ; X64:       # BB#0:
>  ; X64-NEXT:    cvttps2dq %xmm0, %xmm0
>  ; X64-NEXT:    retq
> -  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
> +  %res = fptosi <4 x float> %a0 to <4 x i32>
>    %bc = bitcast <4 x i32> %res to <2 x i64>
>    ret <2 x i64> %bc
>  }
> -declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
>
>  define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
>  ; X32-LABEL: test_mm_cvttsd_si32:
>
> Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll Thu Jun  2
> 05:55:21 2016
> @@ -84,6 +84,17 @@ define <2 x double> @test_x86_sse2_cvtps
>  declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind
> readnone
>
>
> +define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
> +; CHECK-LABEL: test_x86_sse2_cvttps2dq:
> +; CHECK:       ## BB#0:
> +; CHECK-NEXT:    cvttps2dq %xmm0, %xmm0
> +; CHECK-NEXT:    retl
> +  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x
> i32>> [#uses=1]
> +  ret <4 x i32> %res
> +}
> +declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
> +
> +
>  define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
>  ; CHECK-LABEL: test_x86_sse2_storel_dq:
>  ; CHECK:       ## BB#0:
> @@ -101,7 +112,7 @@ define void @test_x86_sse2_storeu_dq(i8*
>  ; CHECK-LABEL: test_x86_sse2_storeu_dq:
>  ; CHECK:       ## BB#0:
>  ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
> -; CHECK-NEXT:    paddb LCPI7_0, %xmm0
> +; CHECK-NEXT:    paddb LCPI8_0, %xmm0
>  ; CHECK-NEXT:    movdqu %xmm0, (%eax)
>  ; CHECK-NEXT:    retl
>    %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8
> 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
>
> Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=271510&r1=271509&r2=271510&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Thu Jun  2 05:55:21
> 2016
> @@ -322,22 +322,6 @@ define <4 x i32> @test_x86_sse2_cvttpd2d
>  declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
>
>
> -define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
> -; SSE-LABEL: test_x86_sse2_cvttps2dq:
> -; SSE:       ## BB#0:
> -; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
> -; SSE-NEXT:    retl
> -;
> -; KNL-LABEL: test_x86_sse2_cvttps2dq:
> -; KNL:       ## BB#0:
> -; KNL-NEXT:    vcvttps2dq %xmm0, %xmm0
> -; KNL-NEXT:    retl
> -  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x
> i32>> [#uses=1]
> -  ret <4 x i32> %res
> -}
> -declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
> -
> -
>  define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
>  ; SSE-LABEL: test_x86_sse2_cvttsd2si:
>  ; SSE:       ## BB#0:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160706/b690d603/attachment.html>