r272840 - [x86] translate SSE packed FP comparison builtins to IR
Sanjay Patel via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 15 14:20:05 PDT 2016
Author: spatel
Date: Wed Jun 15 16:20:04 2016
New Revision: 272840
URL: http://llvm.org/viewvc/llvm-project?rev=272840&view=rev
Log:
[x86] translate SSE packed FP comparison builtins to IR
As noted in the code comment, a potential follow-on would be to remove
the builtins themselves. Other than ord/unord, this already works as
expected. Eg:
typedef float v4sf __attribute__((__vector_size__(16)));
v4sf fcmpgt(v4sf a, v4sf b) { return a > b; }
Differential Revision: http://reviews.llvm.org/D21268
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx2-builtins.c
cfe/trunk/test/CodeGen/sse-builtins.c
cfe/trunk/test/CodeGen/sse2-builtins.c
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=272840&r1=272839&r2=272840&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Jun 15 16:20:04 2016
@@ -6419,6 +6419,36 @@ Value *CodeGenFunction::EmitX86BuiltinEx
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
}
+ // These exist so that the builtin that takes an immediate can be bounds
+ // checked by clang to avoid passing bad immediates to the backend. Since
+ // AVX has a larger immediate than SSE we would need separate builtins to
+ // do the different bounds checking. Rather than create a clang specific
+ // SSE only builtin, this implements eight separate builtins to match gcc
+ // implementation.
+ auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
+ Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
+ llvm::Function *F = CGM.getIntrinsic(ID);
+ return Builder.CreateCall(F, Ops);
+ };
+
+ // For the vector forms of FP comparisons, translate the builtins directly to
+ // IR.
+ // TODO: The builtins could be removed if the SSE header files used vector
+ // extension comparisons directly (vector ordered/unordered may need
+ // additional support via __builtin_isnan()).
+ llvm::VectorType *V2F64 =
+ llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2);
+ llvm::VectorType *V4F32 =
+ llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4);
+
+ auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred,
+ llvm::VectorType *FPVecTy) {
+ Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
+ Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
+ return Builder.CreateBitCast(Sext, FPVecTy);
+ };
+
switch (BuiltinID) {
default: return nullptr;
case X86::BI__builtin_cpu_supports: {
@@ -6857,154 +6887,74 @@ Value *CodeGenFunction::EmitX86BuiltinEx
Ops[0]);
return Builder.CreateExtractValue(Call, 1);
}
- // SSE comparison intrisics
+
+ // SSE packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
+ return getVectorFCmpIR(CmpInst::FCMP_OEQ, V4F32);
case X86::BI__builtin_ia32_cmpltps:
+ return getVectorFCmpIR(CmpInst::FCMP_OLT, V4F32);
case X86::BI__builtin_ia32_cmpleps:
+ return getVectorFCmpIR(CmpInst::FCMP_OLE, V4F32);
case X86::BI__builtin_ia32_cmpunordps:
+ return getVectorFCmpIR(CmpInst::FCMP_UNO, V4F32);
case X86::BI__builtin_ia32_cmpneqps:
+ return getVectorFCmpIR(CmpInst::FCMP_UNE, V4F32);
case X86::BI__builtin_ia32_cmpnltps:
+ return getVectorFCmpIR(CmpInst::FCMP_UGE, V4F32);
case X86::BI__builtin_ia32_cmpnleps:
+ return getVectorFCmpIR(CmpInst::FCMP_UGT, V4F32);
case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpordss:
+ return getVectorFCmpIR(CmpInst::FCMP_ORD, V4F32);
case X86::BI__builtin_ia32_cmpeqpd:
+ return getVectorFCmpIR(CmpInst::FCMP_OEQ, V2F64);
case X86::BI__builtin_ia32_cmpltpd:
+ return getVectorFCmpIR(CmpInst::FCMP_OLT, V2F64);
case X86::BI__builtin_ia32_cmplepd:
+ return getVectorFCmpIR(CmpInst::FCMP_OLE, V2F64);
case X86::BI__builtin_ia32_cmpunordpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UNO, V2F64);
case X86::BI__builtin_ia32_cmpneqpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UNE, V2F64);
case X86::BI__builtin_ia32_cmpnltpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UGE, V2F64);
case X86::BI__builtin_ia32_cmpnlepd:
+ return getVectorFCmpIR(CmpInst::FCMP_UGT, V2F64);
case X86::BI__builtin_ia32_cmpordpd:
+ return getVectorFCmpIR(CmpInst::FCMP_ORD, V2F64);
+
+ // SSE scalar comparison intrinsics
+ case X86::BI__builtin_ia32_cmpeqss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
+ case X86::BI__builtin_ia32_cmpltss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
+ case X86::BI__builtin_ia32_cmpless:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
+ case X86::BI__builtin_ia32_cmpunordss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
+ case X86::BI__builtin_ia32_cmpneqss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
+ case X86::BI__builtin_ia32_cmpnltss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
+ case X86::BI__builtin_ia32_cmpnless:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
+ case X86::BI__builtin_ia32_cmpordss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
case X86::BI__builtin_ia32_cmpeqsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
case X86::BI__builtin_ia32_cmpltsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
case X86::BI__builtin_ia32_cmplesd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
case X86::BI__builtin_ia32_cmpunordsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
case X86::BI__builtin_ia32_cmpneqsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
case X86::BI__builtin_ia32_cmpnltsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
case X86::BI__builtin_ia32_cmpnlesd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
case X86::BI__builtin_ia32_cmpordsd:
- // These exist so that the builtin that takes an immediate can be bounds
- // checked by clang to avoid passing bad immediates to the backend. Since
- // AVX has a larger immediate than SSE we would need separate builtins to
- // do the different bounds checking. Rather than create a clang specific
- // SSE only builtin, this implements eight separate builtins to match gcc
- // implementation.
-
- // Choose the immediate.
- unsigned Imm;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpeqsd:
- Imm = 0;
- break;
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmpltsd:
- Imm = 1;
- break;
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmplesd:
- Imm = 2;
- break;
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpunordsd:
- Imm = 3;
- break;
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpneqpd:
- case X86::BI__builtin_ia32_cmpneqsd:
- Imm = 4;
- break;
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnltpd:
- case X86::BI__builtin_ia32_cmpnltsd:
- Imm = 5;
- break;
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpnlepd:
- case X86::BI__builtin_ia32_cmpnlesd:
- Imm = 6;
- break;
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordss:
- case X86::BI__builtin_ia32_cmpordpd:
- case X86::BI__builtin_ia32_cmpordsd:
- Imm = 7;
- break;
- }
-
- // Choose the intrinsic ID.
- const char *name;
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpordps:
- name = "cmpps";
- ID = Intrinsic::x86_sse_cmp_ps;
- break;
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpordss:
- name = "cmpss";
- ID = Intrinsic::x86_sse_cmp_ss;
- break;
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpneqpd:
- case X86::BI__builtin_ia32_cmpnltpd:
- case X86::BI__builtin_ia32_cmpnlepd:
- case X86::BI__builtin_ia32_cmpordpd:
- name = "cmppd";
- ID = Intrinsic::x86_sse2_cmp_pd;
- break;
- case X86::BI__builtin_ia32_cmpeqsd:
- case X86::BI__builtin_ia32_cmpltsd:
- case X86::BI__builtin_ia32_cmplesd:
- case X86::BI__builtin_ia32_cmpunordsd:
- case X86::BI__builtin_ia32_cmpneqsd:
- case X86::BI__builtin_ia32_cmpnltsd:
- case X86::BI__builtin_ia32_cmpnlesd:
- case X86::BI__builtin_ia32_cmpordsd:
- name = "cmpsd";
- ID = Intrinsic::x86_sse2_cmp_sd;
- break;
- }
-
- Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
- llvm::Function *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Ops, name);
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
}
}
Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=272840&r1=272839&r2=272840&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Wed Jun 15 16:20:04 2016
@@ -473,7 +473,9 @@ __m256i test_mm256_mask_i32gather_epi64(
__m128d test_mm_i32gather_pd(double const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
// CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
return _mm_i32gather_pd(b, c, 2);
}
@@ -499,7 +501,9 @@ __m256d test_mm256_mask_i32gather_pd(__m
__m128 test_mm_i32gather_ps(float const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
// CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm_i32gather_ps(b, c, 2);
}
@@ -573,7 +577,9 @@ __m256i test_mm256_mask_i64gather_epi64(
__m128d test_mm_i64gather_pd(double const *b, __m128i c) {
// CHECK-LABEL: test_mm_i64gather_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
// CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> undef, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
return _mm_i64gather_pd(b, c, 2);
}
@@ -599,7 +605,9 @@ __m256d test_mm256_mask_i64gather_pd(__m
__m128 test_mm_i64gather_ps(float const *b, __m128i c) {
// CHECK-LABEL: test_mm_i64gather_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
// CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> undef, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm_i64gather_ps(b, c, 2);
}
@@ -612,7 +620,9 @@ __m128 test_mm_mask_i64gather_ps(__m128
__m128 test_mm256_i64gather_ps(float const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i64gather_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
// CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm256_i64gather_ps(b, c, 2);
}
Modified: cfe/trunk/test/CodeGen/sse-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=272840&r1=272839&r2=272840&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse-builtins.c Wed Jun 15 16:20:04 2016
@@ -37,7 +37,10 @@ __m128 test_mm_andnot_ps(__m128 A, __m12
__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpeq_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpeq_ps(__a, __b);
}
@@ -49,7 +52,10 @@ __m128 test_mm_cmpeq_ss(__m128 __a, __m1
__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpge_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpge_ps(__a, __b);
}
@@ -62,7 +68,10 @@ __m128 test_mm_cmpge_ss(__m128 __a, __m1
__m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpgt_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+ // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpgt_ps(__a, __b);
}
@@ -75,7 +84,10 @@ __m128 test_mm_cmpgt_ss(__m128 __a, __m1
__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmple_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmple_ps(__a, __b);
}
@@ -87,7 +99,10 @@ __m128 test_mm_cmple_ss(__m128 __a, __m1
__m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmplt_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+ // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmplt_ps(__a, __b);
}
@@ -99,7 +114,10 @@ __m128 test_mm_cmplt_ss(__m128 __a, __m1
__m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpneq_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
+ // CHECK: [[CMP:%.*]] = fcmp une <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpneq_ps(__a, __b);
}
@@ -111,7 +129,10 @@ __m128 test_mm_cmpneq_ss(__m128 __a, __m
__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpnge_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+ // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnge_ps(__a, __b);
}
@@ -124,7 +145,10 @@ __m128 test_mm_cmpnge_ss(__m128 __a, __m
__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpngt_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+ // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpngt_ps(__a, __b);
}
@@ -137,7 +161,10 @@ __m128 test_mm_cmpngt_ss(__m128 __a, __m
__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpnle_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+ // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnle_ps(__a, __b);
}
@@ -149,7 +176,10 @@ __m128 test_mm_cmpnle_ss(__m128 __a, __m
__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpnlt_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+ // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnlt_ps(__a, __b);
}
@@ -161,7 +191,10 @@ __m128 test_mm_cmpnlt_ss(__m128 __a, __m
__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpord_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
+ // CHECK: [[CMP:%.*]] = fcmp ord <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpord_ps(__a, __b);
}
@@ -173,7 +206,10 @@ __m128 test_mm_cmpord_ss(__m128 __a, __m
__m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpunord_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
+ // CHECK: [[CMP:%.*]] = fcmp uno <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpunord_ps(__a, __b);
}
Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=272840&r1=272839&r2=272840&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Wed Jun 15 16:20:04 2016
@@ -183,7 +183,10 @@ __m128i test_mm_cmpeq_epi32(__m128i A, _
__m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpeq_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpeq_pd(A, B);
}
@@ -195,7 +198,10 @@ __m128d test_mm_cmpeq_sd(__m128d A, __m1
__m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpge_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpge_pd(A, B);
}
@@ -229,7 +235,10 @@ __m128i test_mm_cmpgt_epi32(__m128i A, _
__m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpgt_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+ // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpgt_pd(A, B);
}
@@ -245,7 +254,10 @@ __m128d test_mm_cmpgt_sd(__m128d A, __m1
__m128d test_mm_cmple_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmple_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmple_pd(A, B);
}
@@ -275,7 +287,10 @@ __m128i test_mm_cmplt_epi32(__m128i A, _
__m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmplt_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+ // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmplt_pd(A, B);
}
@@ -287,7 +302,10 @@ __m128d test_mm_cmplt_sd(__m128d A, __m1
__m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpneq_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+ // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpneq_pd(A, B);
}
@@ -299,7 +317,10 @@ __m128d test_mm_cmpneq_sd(__m128d A, __m
__m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnge_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+ // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpnge_pd(A, B);
}
@@ -315,7 +336,10 @@ __m128d test_mm_cmpnge_sd(__m128d A, __m
__m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpngt_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+ // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpngt_pd(A, B);
}
@@ -331,7 +355,10 @@ __m128d test_mm_cmpngt_sd(__m128d A, __m
__m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnle_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+ // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpnle_pd(A, B);
}
@@ -343,7 +370,10 @@ __m128d test_mm_cmpnle_sd(__m128d A, __m
__m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnlt_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+ // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpnlt_pd(A, B);
}
@@ -355,7 +385,10 @@ __m128d test_mm_cmpnlt_sd(__m128d A, __m
__m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpord_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+ // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpord_pd(A, B);
}
@@ -367,7 +400,10 @@ __m128d test_mm_cmpord_sd(__m128d A, __m
__m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpunord_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+ // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpunord_pd(A, B);
}
More information about the cfe-commits
mailing list