[llvm] r270678 - [X86][SSE] Replace (V)CVTDQ2PD(Y) and (V)CVTPS2PD(Y) lossless conversion intrinsics with generic IR
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed May 25 01:59:21 PDT 2016
Author: rksimon
Date: Wed May 25 03:59:18 2016
New Revision: 270678
URL: http://llvm.org/viewvc/llvm-project?rev=270678&view=rev
Log:
[X86][SSE] Replace (V)CVTDQ2PD(Y) and (V)CVTPS2PD(Y) lossless conversion intrinsics with generic IR
Followup to D20528 clang patch, this removes the (V)CVTDQ2PD(Y) and (V)CVTPS2PD(Y) llvm intrinsics and auto-upgrades to sitofp/fpext instead.
Differential Revision: http://reviews.llvm.org/D20568
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=270678&r1=270677&r2=270678&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Wed May 25 03:59:18 2016
@@ -485,8 +485,6 @@ let TargetPrefix = "x86" in { // All in
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
@@ -499,8 +497,6 @@ let TargetPrefix = "x86" in { // All in
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
@@ -1762,16 +1758,12 @@ let TargetPrefix = "x86" in { // All in
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=270678&r1=270677&r2=270678&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Wed May 25 03:59:18 2016
@@ -178,6 +178,10 @@ static bool UpgradeIntrinsicFunction1(Fu
Name.startswith("x86.avx2.pbroadcast") ||
Name.startswith("x86.avx.vpermil.") ||
Name.startswith("x86.sse41.pmovsx") ||
+ Name == "x86.sse2.cvtdq2pd" ||
+ Name == "x86.sse2.cvtps2pd" ||
+ Name == "x86.avx.cvtdq2.pd.256" ||
+ Name == "x86.avx.cvt.ps2.pd.256" ||
Name == "x86.avx.vinsertf128.pd.256" ||
Name == "x86.avx.vinsertf128.ps.256" ||
Name == "x86.avx.vinsertf128.si.256" ||
@@ -397,6 +401,29 @@ void llvm::UpgradeIntrinsicCall(CallInst
"pcmpgt");
// need to sign extend since icmp returns vector of i1
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if (Name == "llvm.x86.sse2.cvtdq2pd" ||
+ Name == "llvm.x86.sse2.cvtps2pd" ||
+ Name == "llvm.x86.avx.cvtdq2.pd.256" ||
+ Name == "llvm.x86.avx.cvt.ps2.pd.256") {
+ // Lossless i32/float to double conversion.
+ // Extract the bottom elements if necessary and convert to double vector.
+ Value *Src = CI->getArgOperand(0);
+ VectorType *SrcTy = cast<VectorType>(Src->getType());
+ VectorType *DstTy = cast<VectorType>(CI->getType());
+ Rep = CI->getArgOperand(0);
+
+ unsigned NumDstElts = DstTy->getNumElements();
+ if (NumDstElts < SrcTy->getNumElements()) {
+ assert(NumDstElts == 2 && "Unexpected vector size");
+ const int ShuffleMask[2] = { 0, 1 };
+ Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), ShuffleMask);
+ }
+
+ bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
+ if (Int2Double)
+ Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
+ else
+ Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
} else if (Name == "llvm.x86.avx.movnt.dq.256" ||
Name == "llvm.x86.avx.movnt.ps.256" ||
Name == "llvm.x86.avx.movnt.pd.256") {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=270678&r1=270677&r2=270678&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed May 25 03:59:18 2016
@@ -2163,30 +2163,24 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [], IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
- IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+ [], IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+ [], IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [], IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
@@ -2197,24 +2191,17 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (out
let Predicates = [HasAVX] in {
let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- []>, VEX, Sched<[WriteCvtI2FLd]>;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
- Sched<[WriteCvtI2F]>;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, Sched<[WriteCvtI2F]>;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256
- (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L,
- Sched<[WriteCvtI2FLd]>;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, VEX_L, Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
- Sched<[WriteCvtI2F]>;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX, VEX_L, Sched<[WriteCvtI2F]>;
}
let hasSideEffects = 0, mayLoad = 1 in
@@ -2222,8 +2209,7 @@ def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem,
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
// AVX register conversion intrinsics
Modified: llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp?rev=270678&r1=270677&r2=270678&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp Wed May 25 03:59:18 2016
@@ -2347,8 +2347,6 @@ struct MemorySanitizerVisitor : public I
case llvm::Intrinsic::x86_sse_cvttss2si:
handleVectorConvertIntrinsic(I, 1);
break;
- case llvm::Intrinsic::x86_sse2_cvtdq2pd:
- case llvm::Intrinsic::x86_sse2_cvtps2pd:
case llvm::Intrinsic::x86_sse_cvtps2pi:
case llvm::Intrinsic::x86_sse_cvttps2pi:
handleVectorConvertIntrinsic(I, 2);
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll?rev=270678&r1=270677&r2=270678&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll Wed May 25 03:59:18 2016
@@ -245,3 +245,83 @@ define <2 x i64> @test_x86_sse41_pmovsxw
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+; AVX-LABEL: test_x86_sse2_cvtdq2pd:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX512VL-NEXT: retl
+; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
+; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
+; CHECK-NEXT: retl
+ %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+; AVX-LABEL: test_x86_sse2_cvtps2pd:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvtps2pd %xmm0, %xmm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0
+; AVX512VL-NEXT: retl
+; CHECK-LABEL: test_x86_sse2_cvtps2pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
+; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
+; AVX: ## BB#0:
+; AVX-NEXT: vcvtps2pd %xmm0, %ymm0
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
+; CHECK-NEXT: retl
+ %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=270678&r1=270677&r2=270678&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Wed May 25 03:59:18 2016
@@ -274,22 +274,6 @@ define i32 @test_x86_sse2_comineq_sd(<2
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
-define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
-; AVX-LABEL: test_x86_sse2_cvtdq2pd:
-; AVX: ## BB#0:
-; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX512VL-NEXT: retl
- %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
-
-
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
; AVX-LABEL: test_x86_sse2_cvtdq2ps:
; AVX: ## BB#0:
@@ -354,22 +338,6 @@ define <4 x i32> @test_x86_sse2_cvtps2dq
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
-define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
-; AVX-LABEL: test_x86_sse2_cvtps2pd:
-; AVX: ## BB#0:
-; AVX-NEXT: vcvtps2pd %xmm0, %xmm0
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0
-; AVX512VL-NEXT: retl
- %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
-
-
define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
; AVX-LABEL: test_x86_sse2_cvtsd2si:
; AVX: ## BB#0:
@@ -1258,14 +1226,14 @@ define void @test_x86_sse2_storeu_dq(i8*
; AVX-LABEL: test_x86_sse2_storeu_dq:
; AVX: ## BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0
+; AVX-NEXT: vpaddb LCPI74_0, %xmm0, %xmm0
; AVX-NEXT: vmovdqu %xmm0, (%eax)
; AVX-NEXT: retl
;
; AVX512VL-LABEL: test_x86_sse2_storeu_dq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpaddb LCPI74_0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovdqu %xmm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -3569,22 +3537,6 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq
declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
-define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
-; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
-; AVX: ## BB#0:
-; AVX-NEXT: vcvtps2pd %xmm0, %ymm0
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0
-; AVX512VL-NEXT: retl
- %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
-
-
define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
; AVX-LABEL: test_x86_avx_cvt_ps2dq_256:
; AVX: ## BB#0:
@@ -3601,22 +3553,6 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq
declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
-define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
-; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
-; AVX: ## BB#0:
-; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
-; AVX-NEXT: retl
-;
-; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
-; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0
-; AVX512VL-NEXT: retl
- %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
-
-
define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
; AVX: ## BB#0:
@@ -4190,7 +4126,7 @@ define void @test_x86_avx_storeu_dq_256(
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI235_0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddb LCPI231_0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -4431,7 +4367,7 @@ define <4 x double> @test_x86_avx_vpermi
;
; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vpermilpd LCPI249_0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilpd LCPI245_0, %ymm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
@@ -4923,7 +4859,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
; AVX-LABEL: movnt_dq:
; AVX: ## BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
+; AVX-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
; AVX-NEXT: vmovntdq %ymm0, (%eax)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retl
@@ -4931,7 +4867,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
; AVX512VL-LABEL: movnt_dq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll?rev=270678&r1=270677&r2=270678&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll Wed May 25 03:59:18 2016
@@ -44,6 +44,46 @@ define <2 x i64> @test_x86_sse2_psrl_dq(
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+; SSE-LABEL: test_x86_sse2_cvtdq2pd:
+; SSE: ## BB#0:
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtdq2pd:
+; KNL: ## BB#0:
+; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; KNL-NEXT: retl
+; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+; SSE-LABEL: test_x86_sse2_cvtps2pd:
+; SSE: ## BB#0:
+; SSE-NEXT: cvtps2pd %xmm0, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtps2pd:
+; KNL: ## BB#0:
+; KNL-NEXT: vcvtps2pd %xmm0, %xmm0
+; KNL-NEXT: retl
+; CHECK-LABEL: test_x86_sse2_cvtps2pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: cvtps2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_storel_dq:
; CHECK: ## BB#0:
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=270678&r1=270677&r2=270678&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Wed May 25 03:59:18 2016
@@ -178,22 +178,6 @@ define i32 @test_x86_sse2_comineq_sd(<2
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
-define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
-; SSE-LABEL: test_x86_sse2_cvtdq2pd:
-; SSE: ## BB#0:
-; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_cvtdq2pd:
-; KNL: ## BB#0:
-; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0
-; KNL-NEXT: retl
- %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
-
-
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
; SSE-LABEL: test_x86_sse2_cvtdq2ps:
; SSE: ## BB#0:
@@ -258,22 +242,6 @@ define <4 x i32> @test_x86_sse2_cvtps2dq
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
-define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
-; SSE-LABEL: test_x86_sse2_cvtps2pd:
-; SSE: ## BB#0:
-; SSE-NEXT: cvtps2pd %xmm0, %xmm0
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_cvtps2pd:
-; KNL: ## BB#0:
-; KNL-NEXT: vcvtps2pd %xmm0, %xmm0
-; KNL-NEXT: retl
- %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
-
-
define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvtsd2si:
; SSE: ## BB#0:
@@ -1162,14 +1130,14 @@ define void @test_x86_sse2_storeu_dq(i8*
; SSE-LABEL: test_x86_sse2_storeu_dq:
; SSE: ## BB#0:
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE-NEXT: paddb LCPI70_0, %xmm0
+; SSE-NEXT: paddb LCPI68_0, %xmm0
; SSE-NEXT: movdqu %xmm0, (%eax)
; SSE-NEXT: retl
;
; KNL-LABEL: test_x86_sse2_storeu_dq:
; KNL: ## BB#0:
; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL-NEXT: vpaddb LCPI70_0, %xmm0, %xmm0
+; KNL-NEXT: vpaddb LCPI68_0, %xmm0, %xmm0
; KNL-NEXT: vmovdqu %xmm0, (%eax)
; KNL-NEXT: retl
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
More information about the llvm-commits
mailing list