r335339 - [X86] Lower _mm[256|512]_cmp[.]_mask intrinsics to native llvm IR
Gabor Buella via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 22 04:59:16 PDT 2018
Author: gbuella
Date: Fri Jun 22 04:59:16 2018
New Revision: 335339
URL: http://llvm.org/viewvc/llvm-project?rev=335339&view=rev
Log:
[X86] Lower _mm[256|512]_cmp[.]_mask intrinsics to native llvm IR
Summary:
Lowering some vector comparision builtins to fcmp IR instructions.
This ignores the signaling behaviour specified in the predicate
argument of said builtins.
Affected AVX512 builtins:
__builtin_ia32_cmpps128_mask
__builtin_ia32_cmpps256_mask
__builtin_ia32_cmpps512_mask
__builtin_ia32_cmppd128_mask
__builtin_ia32_cmppd256_mask
__builtin_ia32_cmppd512_mask
Reviewers: craig.topper, uriel.k, RKSimon, andrew.w.kaylor, spatel, scanon, efriedma
Reviewed By: craig.topper, spatel, efriedma
Differential Revision: https://reviews.llvm.org/D45616
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx-builtins.c
cfe/trunk/test/CodeGen/avx-cmp-builtins.c
cfe/trunk/test/CodeGen/avx2-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=335339&r1=335338&r2=335339&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun 22 04:59:16 2018
@@ -10120,44 +10120,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx
return Builder.CreateExtractValue(Call, 1);
}
- case X86::BI__builtin_ia32_cmpps128_mask:
- case X86::BI__builtin_ia32_cmpps256_mask:
- case X86::BI__builtin_ia32_cmpps512_mask:
- case X86::BI__builtin_ia32_cmppd128_mask:
- case X86::BI__builtin_ia32_cmppd256_mask:
- case X86::BI__builtin_ia32_cmppd512_mask: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
- Value *MaskIn = Ops[3];
- Ops.erase(&Ops[3]);
-
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpps128_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_ps_128;
- break;
- case X86::BI__builtin_ia32_cmpps256_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_ps_256;
- break;
- case X86::BI__builtin_ia32_cmpps512_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_ps_512;
- break;
- case X86::BI__builtin_ia32_cmppd128_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_pd_128;
- break;
- case X86::BI__builtin_ia32_cmppd256_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_pd_256;
- break;
- case X86::BI__builtin_ia32_cmppd512_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_pd_512;
- break;
- }
-
- Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
- }
-
- // SSE packed comparison intrinsics
+ // packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqpd:
return getVectorFCmpIR(CmpInst::FCMP_OEQ);
@@ -10185,64 +10148,84 @@ Value *CodeGenFunction::EmitX86BuiltinEx
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmpps256:
case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256: {
+ case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask: {
+ // Lowering vector comparisons to fcmp instructions, while
+ // ignoring signalling behaviour requested
+ // ignoring rounding mode requested
+ // This is is only possible as long as FENV_ACCESS is not implemented.
+ // See also: https://reviews.llvm.org/D45616
+
+ // The third argument is the comparison condition, and integer in the
+ // range [0, 31]
unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
- // If this one of the SSE immediates, we can use native IR.
- if (CC < 8) {
- FCmpInst::Predicate Pred;
- switch (CC) {
- case 0: Pred = FCmpInst::FCMP_OEQ; break;
- case 1: Pred = FCmpInst::FCMP_OLT; break;
- case 2: Pred = FCmpInst::FCMP_OLE; break;
- case 3: Pred = FCmpInst::FCMP_UNO; break;
- case 4: Pred = FCmpInst::FCMP_UNE; break;
- case 5: Pred = FCmpInst::FCMP_UGE; break;
- case 6: Pred = FCmpInst::FCMP_UGT; break;
- case 7: Pred = FCmpInst::FCMP_ORD; break;
- }
- return getVectorFCmpIR(Pred);
+
+ // Lowering to IR fcmp instruction.
+ // Ignoring requested signaling behaviour,
+ // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
+ FCmpInst::Predicate Pred;
+ switch (CC) {
+ case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x01: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x02: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x03: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x04: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x05: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x06: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x07: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x09: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
+ case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x11: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x12: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x13: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x14: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x15: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x16: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x17: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x19: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
+ // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
+ // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
+ case 0x0b: // FALSE_OQ
+ case 0x1b: // FALSE_OS
+ return llvm::Constant::getNullValue(ConvertType(E->getType()));
+ case 0x0f: // TRUE_UQ
+ case 0x1f: // TRUE_US
+ return llvm::Constant::getAllOnesValue(ConvertType(E->getType()));
+
+ default: llvm_unreachable("Unhandled CC");
}
- // We can't handle 8-31 immediates with native IR, use the intrinsic.
- // Except for predicates that create constants.
- Intrinsic::ID ID;
+ // Builtins without the _mask suffix return a vector of integers
+ // of the same width as the input vectors
switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpps:
- ID = Intrinsic::x86_sse_cmp_ps;
- break;
- case X86::BI__builtin_ia32_cmpps256:
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
- Value *Constant = (CC == 0xf || CC == 0x1f) ?
- llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
- llvm::Constant::getNullValue(Builder.getInt32Ty());
- Value *Vec = Builder.CreateVectorSplat(
- Ops[0]->getType()->getVectorNumElements(), Constant);
- return Builder.CreateBitCast(Vec, Ops[0]->getType());
- }
- ID = Intrinsic::x86_avx_cmp_ps_256;
- break;
- case X86::BI__builtin_ia32_cmppd:
- ID = Intrinsic::x86_sse2_cmp_pd;
- break;
- case X86::BI__builtin_ia32_cmppd256:
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
- Value *Constant = (CC == 0xf || CC == 0x1f) ?
- llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
- llvm::Constant::getNullValue(Builder.getInt64Ty());
- Value *Vec = Builder.CreateVectorSplat(
- Ops[0]->getType()->getVectorNumElements(), Constant);
- return Builder.CreateBitCast(Vec, Ops[0]->getType());
- }
- ID = Intrinsic::x86_avx_cmp_pd_256;
- break;
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
+ }
+ default:
+ return getVectorFCmpIR(Pred);
}
-
- return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
}
// SSE scalar comparison intrinsics
Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=335339&r1=335338&r2=335339&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Fri Jun 22 04:59:16 2018
@@ -214,25 +214,25 @@ __m256 test_mm_ceil_ps(__m256 x) {
__m128d test_mm_cmp_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmp_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 13)
+ // CHECK: [[CMP:%.*]] = fcmp oge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(A, B, _CMP_GE_OS);
}
__m256d test_mm256_cmp_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_cmp_pd
- // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 13)
+ // CHECK: [[CMP:%.*]] = fcmp oge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(A, B, _CMP_GE_OS);
}
__m128 test_mm_cmp_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_cmp_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 13)
+ // CHECK: [[CMP:%.*]] = fcmp oge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(A, B, _CMP_GE_OS);
}
__m256 test_mm256_cmp_ps(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_cmp_ps
- // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 13)
+ // CHECK: [[CMP:%.*]] = fcmp oge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(A, B, _CMP_GE_OS);
}
@@ -1401,69 +1401,117 @@ __m256i test_mm256_zextsi128_si256(__m12
double test_mm256_cvtsd_f64(__m256d __a)
{
- // CHECK-LABEL: @test_mm256_cvtsd_f64
- // CHECK: extractelement <4 x double> %{{.*}}, i32 0
- return _mm256_cvtsd_f64(__a);
+ // CHECK-LABEL: @test_mm256_cvtsd_f64
+ // CHECK: extractelement <4 x double> %{{.*}}, i32 0
+ return _mm256_cvtsd_f64(__a);
}
int test_mm256_cvtsi256_si32(__m256i __a)
{
- // CHECK-LABEL: @test_mm256_cvtsi256_si32
- // CHECK: extractelement <8 x i32> %{{.*}}, i32 0
- return _mm256_cvtsi256_si32(__a);
+ // CHECK-LABEL: @test_mm256_cvtsi256_si32
+ // CHECK: extractelement <8 x i32> %{{.*}}, i32 0
+ return _mm256_cvtsi256_si32(__a);
}
float test_mm256_cvtss_f32(__m256 __a)
{
- // CHECK-LABEL: @test_mm256_cvtss_f32
- // CHECK: extractelement <8 x float> %{{.*}}, i32 0
- return _mm256_cvtss_f32(__a);
+ // CHECK-LABEL: @test_mm256_cvtss_f32
+ // CHECK: extractelement <8 x float> %{{.*}}, i32 0
+ return _mm256_cvtss_f32(__a);
}
__m256 test_mm256_cmp_ps_true(__m256 a, __m256 b) {
- // CHECK-LABEL: @test_mm256_cmp_ps_true
- // CHECK: ret <8 x float> <float 0xFFFFFFFFE0000000,
- return _mm256_cmp_ps(a, b, _CMP_TRUE_UQ);
+ // CHECK-LABEL: @test_mm256_cmp_ps_true
+ // CHECK: ret <8 x float> <float 0xFFFFFFFFE0000000,
+ return _mm256_cmp_ps(a, b, _CMP_TRUE_UQ);
}
__m256d test_mm256_cmp_pd_true(__m256d a, __m256d b) {
- // CHECK-LABEL: @test_mm256_cmp_pd_true
- // CHECK: ret <4 x double> <double 0xFFFFFFFFFFFFFFFF,
+ // CHECK-LABEL: @test_mm256_cmp_pd_true
+ // CHECK: ret <4 x double> <double 0xFFFFFFFFFFFFFFFF,
return _mm256_cmp_pd(a, b, _CMP_TRUE_UQ);
}
__m256 test_mm256_cmp_ps_false(__m256 a, __m256 b) {
- // CHECK-LABEL: @test_mm256_cmp_ps_false
- // CHECK: ret <8 x float> zeroinitializer
- return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ);
+ // CHECK-LABEL: @test_mm256_cmp_ps_false
+ // CHECK: ret <8 x float> zeroinitializer
+ return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ);
}
__m256d test_mm256_cmp_pd_false(__m256d a, __m256d b) {
- // CHECK-LABEL: @test_mm256_cmp_pd_false
- // CHECK: ret <4 x double> zeroinitializer
+ // CHECK-LABEL: @test_mm256_cmp_pd_false
+ // CHECK: ret <4 x double> zeroinitializer
return _mm256_cmp_pd(a, b, _CMP_FALSE_OQ);
}
__m256 test_mm256_cmp_ps_strue(__m256 a, __m256 b) {
- // CHECK-LABEL: @test_mm256_cmp_ps_strue
- // CHECK: ret <8 x float> <float 0xFFFFFFFFE0000000,
- return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
+ // CHECK-LABEL: @test_mm256_cmp_ps_strue
+ // CHECK: ret <8 x float> <float 0xFFFFFFFFE0000000,
+ return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
}
__m256d test_mm256_cmp_pd_strue(__m256d a, __m256d b) {
- // CHECK-LABEL: @test_mm256_cmp_pd_strue
- // CHECK: ret <4 x double> <double 0xFFFFFFFFFFFFFFFF,
+ // CHECK-LABEL: @test_mm256_cmp_pd_strue
+ // CHECK: ret <4 x double> <double 0xFFFFFFFFFFFFFFFF,
return _mm256_cmp_pd(a, b, _CMP_TRUE_US);
}
__m256 test_mm256_cmp_ps_sfalse(__m256 a, __m256 b) {
- // CHECK-LABEL: @test_mm256_cmp_ps_sfalse
- // CHECK: ret <8 x float> zeroinitializer
- return _mm256_cmp_ps(a, b, _CMP_FALSE_OS);
+ // CHECK-LABEL: @test_mm256_cmp_ps_sfalse
+ // CHECK: ret <8 x float> zeroinitializer
+ return _mm256_cmp_ps(a, b, _CMP_FALSE_OS);
}
__m256d test_mm256_cmp_pd_sfalse(__m256d a, __m256d b) {
- // CHECK-LABEL: @test_mm256_cmp_pd_sfalse
- // CHECK: ret <4 x double> zeroinitializer
+ // CHECK-LABEL: @test_mm256_cmp_pd_sfalse
+ // CHECK: ret <4 x double> zeroinitializer
return _mm256_cmp_pd(a, b, _CMP_FALSE_OS);
}
+
+__m128 test_mm_cmp_ps_true(__m128 a, __m128 b) {
+ // CHECK-LABEL: @test_mm_cmp_ps_true
+ // CHECK: ret <4 x float> <float 0xFFFFFFFFE0000000,
+ return _mm_cmp_ps(a, b, _CMP_TRUE_UQ);
+}
+
+__m128 test_mm_cmp_pd_true(__m128 a, __m128 b) {
+ // CHECK-LABEL: @test_mm_cmp_pd_true
+ // CHECK: ret <4 x float> <float 0xFFFFFFFFE0000000,
+ return _mm_cmp_pd(a, b, _CMP_TRUE_UQ);
+}
+
+__m128 test_mm_cmp_ps_false(__m128 a, __m128 b) {
+ // CHECK-LABEL: @test_mm_cmp_ps_false
+ // CHECK: ret <4 x float> zeroinitializer
+ return _mm_cmp_ps(a, b, _CMP_FALSE_OQ);
+}
+
+__m128 test_mm_cmp_pd_false(__m128 a, __m128 b) {
+ // CHECK-LABEL: @test_mm_cmp_pd_false
+ // CHECK: ret <4 x float> zeroinitializer
+ return _mm_cmp_pd(a, b, _CMP_FALSE_OQ);
+}
+
+__m128 test_mm_cmp_ps_strue(__m128 a, __m128 b) {
+ // CHECK-LABEL: @test_mm_cmp_ps_strue
+ // CHECK: ret <4 x float> <float 0xFFFFFFFFE0000000,
+ return _mm_cmp_ps(a, b, _CMP_TRUE_US);
+}
+
+__m128 test_mm_cmp_pd_strue(__m128 a, __m128 b) {
+ // CHECK-LABEL: @test_mm_cmp_pd_strue
+ // CHECK: ret <4 x float> <float 0xFFFFFFFFE0000000,
+ return _mm_cmp_pd(a, b, _CMP_TRUE_US);
+}
+
+__m128 test_mm_cmp_ps_sfalse(__m128 a, __m128 b) {
+ // CHECK-LABEL: @test_mm_cmp_ps_sfalse
+ // CHECK: ret <4 x float> zeroinitializer
+ return _mm_cmp_ps(a, b, _CMP_FALSE_OS);
+}
+
+__m128 test_mm_cmp_pd_sfalse(__m128 a, __m128 b) {
+ // CHECK-LABEL: @test_mm_cmp_pd_sfalse
+ // CHECK: ret <4 x float> zeroinitializer
+ return _mm_cmp_pd(a, b, _CMP_FALSE_OS);
+}
Modified: cfe/trunk/test/CodeGen/avx-cmp-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-cmp-builtins.c?rev=335339&r1=335338&r2=335339&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx-cmp-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-cmp-builtins.c Fri Jun 22 04:59:16 2018
@@ -8,30 +8,6 @@
// Test LLVM IR codegen of cmpXY instructions
//
-__m128d test_cmp_pd(__m128d a, __m128d b) {
- // Expects that the third argument in LLVM IR is immediate expression
- // CHECK: @llvm.x86.sse2.cmp.pd({{.*}}, i8 13)
- return _mm_cmp_pd(a, b, _CMP_GE_OS);
-}
-
-__m128d test_cmp_ps(__m128 a, __m128 b) {
- // Expects that the third argument in LLVM IR is immediate expression
- // CHECK: @llvm.x86.sse.cmp.ps({{.*}}, i8 13)
- return _mm_cmp_ps(a, b, _CMP_GE_OS);
-}
-
-__m256d test_cmp_pd256(__m256d a, __m256d b) {
- // Expects that the third argument in LLVM IR is immediate expression
- // CHECK: @llvm.x86.avx.cmp.pd.256({{.*}}, i8 13)
- return _mm256_cmp_pd(a, b, _CMP_GE_OS);
-}
-
-__m256d test_cmp_ps256(__m256 a, __m256 b) {
- // Expects that the third argument in LLVM IR is immediate expression
- // CHECK: @llvm.x86.avx.cmp.ps.256({{.*}}, i8 13)
- return _mm256_cmp_ps(a, b, _CMP_GE_OS);
-}
-
__m128d test_cmp_sd(__m128d a, __m128d b) {
// Expects that the third argument in LLVM IR is immediate expression
// CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 13)
Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=335339&r1=335338&r2=335339&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Fri Jun 22 04:59:16 2018
@@ -612,9 +612,7 @@ __m128d test_mm_mask_i64gather_pd(__m128
__m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i64gather_pd
- // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double>
- // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
- // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
+ // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
// CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
return _mm256_i64gather_pd(b, c, 2);
}
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=335339&r1=335338&r2=335339&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Fri Jun 22 04:59:16 2018
@@ -1279,245 +1279,317 @@ __m512 test_mm512_unpacklo_ps(__m512 a,
__mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmp_round_ps_mask
- // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION);
}
__mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask
- // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}}
// CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION);
}
__mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmp_ps_mask
- // CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmp_ps_mask(a, b, 0);
}
+__mmask16 test_mm512_cmp_ps_mask_true_uq(__m512 a, __m512 b) {
+ // CHECK-LABEL: @test_mm512_cmp_ps_mask_true_uq
+ // CHECK-NOT: call
+ // CHECK: ret i16 -1
+ return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_true_us(__m512 a, __m512 b) {
+ // CHECK-LABEL: @test_mm512_cmp_ps_mask_true_us
+ // CHECK-NOT: call
+ // CHECK: ret i16 -1
+ return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_false_oq(__m512 a, __m512 b) {
+ // CHECK-LABEL: @test_mm512_cmp_ps_mask_false_oq
+ // CHECK-NOT: call
+ // CHECK: ret i16 0
+ return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_false_os(__m512 a, __m512 b) {
+ // CHECK-LABEL: @test_mm512_cmp_ps_mask_false_os
+ // CHECK-NOT: call
+ // CHECK: ret i16 0
+ return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OS);
+}
+
__mmask16 test_mm512_mask_cmp_ps_mask(__mmask16 m, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmp_ps_mask
- // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}}
// CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmp_ps_mask(m, a, b, 0);
}
__mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmp_round_pd_mask
- // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION);
}
__mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask
- // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}}
// CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION);
}
__mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmp_pd_mask
- // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmp_pd_mask(a, b, 0);
}
+__mmask8 test_mm512_cmp_pd_mask_true_uq(__m512d a, __m512d b) {
+ // CHECK-LABEL: @test_mm512_cmp_pd_mask_true_uq
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_true_us(__m512d a, __m512d b) {
+ // CHECK-LABEL: @test_mm512_cmp_pd_mask_true_us
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_false_oq(__m512d a, __m512d b) {
+ // CHECK-LABEL: @test_mm512_cmp_pd_mask_false_oq
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_false_os(__m512d a, __m512d b) {
+ // CHECK-LABEL: @test_mm512_cmp_pd_mask_false_os
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OS);
+}
+
__mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmp_pd_mask
- // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}}
// CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmp_pd_mask(m, a, b, 0);
}
__mmask8 test_mm512_cmpeq_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmpeq_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmpeq_pd_mask(a, b);
}
__mmask8 test_mm512_cmpeq_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmpeq_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmpeq_ps_mask(a, b);
}
__mmask8 test_mm512_mask_cmpeq_pd_mask(__mmask8 k, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmpeq_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpeq_pd_mask(k, a, b);
}
__mmask8 test_mm512_mask_cmpeq_ps_mask(__mmask8 k, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmpeq_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpeq_ps_mask(k, a, b);
}
__mmask8 test_mm512_cmple_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmple_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp ole <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmple_pd_mask(a, b);
}
__mmask8 test_mm512_cmple_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmple_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp ole <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmple_ps_mask(a, b);
}
__mmask8 test_mm512_mask_cmple_pd_mask(__mmask8 k, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmple_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp ole <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmple_pd_mask(k, a, b);
}
__mmask8 test_mm512_mask_cmple_ps_mask(__mmask8 k, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmple_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp ole <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmple_ps_mask(k, a, b);
}
__mmask8 test_mm512_cmplt_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmplt_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp olt <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmplt_pd_mask(a, b);
}
__mmask8 test_mm512_cmplt_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmplt_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp olt <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmplt_ps_mask(a, b);
}
__mmask8 test_mm512_mask_cmplt_pd_mask(__mmask8 k, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmplt_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp olt <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmplt_pd_mask(k, a, b);
}
__mmask8 test_mm512_mask_cmplt_ps_mask(__mmask8 k, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmplt_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp olt <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmplt_ps_mask(k, a, b);
}
__mmask8 test_mm512_cmpneq_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmpneq_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp une <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmpneq_pd_mask(a, b);
}
__mmask8 test_mm512_cmpneq_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmpneq_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp une <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmpneq_ps_mask(a, b);
}
__mmask8 test_mm512_mask_cmpneq_pd_mask(__mmask8 k, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmpneq_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp une <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpneq_pd_mask(k, a, b);
}
__mmask8 test_mm512_mask_cmpneq_ps_mask(__mmask8 k, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmpneq_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp une <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpneq_ps_mask(k, a, b);
}
__mmask8 test_mm512_cmpnle_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmpnle_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp ugt <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmpnle_pd_mask(a, b);
}
__mmask8 test_mm512_cmpnle_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmpnle_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp ugt <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmpnle_ps_mask(a, b);
}
__mmask8 test_mm512_mask_cmpnle_pd_mask(__mmask8 k, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmpnle_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp ugt <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpnle_pd_mask(k, a, b);
}
__mmask8 test_mm512_mask_cmpnle_ps_mask(__mmask8 k, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmpnle_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp ugt <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpnle_ps_mask(k, a, b);
}
__mmask8 test_mm512_cmpnlt_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmpnlt_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp uge <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmpnlt_pd_mask(a, b);
}
__mmask8 test_mm512_cmpnlt_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmpnlt_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp uge <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmpnlt_ps_mask(a, b);
}
__mmask8 test_mm512_mask_cmpnlt_pd_mask(__mmask8 k, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmpnlt_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp uge <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpnlt_pd_mask(k, a, b);
}
__mmask8 test_mm512_mask_cmpnlt_ps_mask(__mmask8 k, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmpnlt_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp uge <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpnlt_ps_mask(k, a, b);
}
__mmask8 test_mm512_cmpord_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmpord_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp ord <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmpord_pd_mask(a, b);
}
__mmask8 test_mm512_cmpord_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmpord_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp ord <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmpord_ps_mask(a, b);
}
__mmask8 test_mm512_mask_cmpord_pd_mask(__mmask8 k, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmpord_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp ord <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpord_pd_mask(k, a, b);
}
__mmask8 test_mm512_mask_cmpord_ps_mask(__mmask8 k, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmpord_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp ord <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpord_ps_mask(k, a, b);
}
__mmask8 test_mm512_cmpunord_pd_mask(__m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_cmpunord_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: fcmp uno <8 x double> %{{.*}}, %{{.*}}
return _mm512_cmpunord_pd_mask(a, b);
}
__mmask8 test_mm512_cmpunord_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_cmpunord_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: fcmp uno <16 x float> %{{.*}}, %{{.*}}
return _mm512_cmpunord_ps_mask(a, b);
}
__mmask8 test_mm512_mask_cmpunord_pd_mask(__mmask8 k, __m512d a, __m512d b) {
// CHECK-LABEL: @test_mm512_mask_cmpunord_pd_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
+ // CHECK: [[CMP:%.*]] = fcmp uno <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpunord_pd_mask(k, a, b);
}
__mmask8 test_mm512_mask_cmpunord_ps_mask(__mmask8 k, __m512 a, __m512 b) {
// CHECK-LABEL: @test_mm512_mask_cmpunord_ps_mask
- // CHECK: @llvm.x86.avx512.mask.cmp.ps.512
+ // CHECK: [[CMP:%.*]] = fcmp uno <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> [[CMP]], {{.*}}
return _mm512_mask_cmpunord_ps_mask(k, a, b);
}
Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=335339&r1=335338&r2=335339&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Fri Jun 22 04:59:16 2018
@@ -1073,53 +1073,168 @@ __m128i test_mm_maskz_xor_epi64 (__mmask
__mmask8 test_mm256_cmp_ps_mask(__m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_cmp_ps_mask
- // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256
+ // CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}}
return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0);
}
+__mmask8 test_mm256_cmp_ps_mask_true_uq(__m256 __A, __m256 __B) {
+ // CHECK-LABEL: @test_mm256_cmp_ps_mask_true_uq
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_true_us(__m256 __A, __m256 __B) {
+ // CHECK-LABEL: @test_mm256_cmp_ps_mask_true_us
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_false_oq(__m256 __A, __m256 __B) {
+ // CHECK-LABEL: @test_mm256_cmp_ps_mask_false_oq
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_false_os(__m256 __A, __m256 __B) {
+ // CHECK-LABEL: @test_mm256_cmp_ps_mask_false_os
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return (__mmask8)_mm256_cmp_ps_mask(__A, __B, _CMP_FALSE_OS);
+}
+
__mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_mask_cmp_ps_mask
- // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256
- // CHECK: and <8 x i1> [[CMP]], {{.*}}
+ // CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_ps_mask(m, __A, __B, 0);
}
__mmask8 test_mm_cmp_ps_mask(__m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_cmp_ps_mask
- // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128
+ // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0);
}
+__mmask8 test_mm_cmp_ps_mask_true_uq(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_cmp_ps_mask_true_uq
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_true_us(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_cmp_ps_mask_true_us
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_cmp_ps_mask_false_oq(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_cmp_ps_mask_false_oq
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_false_os(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_cmp_ps_mask_false_os
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return (__mmask8)_mm_cmp_ps_mask(__A, __B, _CMP_FALSE_OS);
+}
+
__mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_cmp_ps_mask
- // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128
- // CHECK: and <4 x i1> [[CMP]], {{.*}}
+ // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // CHECK: and <4 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_ps_mask(m, __A, __B, 0);
}
__mmask8 test_mm256_cmp_pd_mask(__m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_cmp_pd_mask
- // CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256
+ // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0);
}
+__mmask8 test_mm256_cmp_pd_mask_true_uq(__m256d __A, __m256d __B) {
+ // CHECK-LABEL: @test_mm256_cmp_pd_mask_true_uq
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_true_us(__m256d __A, __m256d __B) {
+ // CHECK-LABEL: @test_mm256_cmp_pd_mask_true_us
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_false_oq(__m256d __A, __m256d __B) {
+ // CHECK-LABEL: @test_mm256_cmp_pd_mask_false_oq
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_false_os(__m256d __A, __m256d __B) {
+ // CHECK-LABEL: @test_mm256_cmp_pd_mask_false_os
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return (__mmask8)_mm256_cmp_pd_mask(__A, __B, _CMP_FALSE_OS);
+}
+
__mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_mask_cmp_pd_mask
- // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256
- // CHECK: and <4 x i1> [[CMP]], {{.*}}
+ // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // CHECK: and <4 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pd_mask(m, __A, __B, 0);
}
__mmask8 test_mm_cmp_pd_mask(__m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_cmp_pd_mask
- // CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128
+ // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0);
}
+__mmask8 test_mm_cmp_pd_mask_true_uq(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_cmp_pd_mask_true_uq
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_true_us(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_cmp_pd_mask_true_us
+ // CHECK-NOT: call
+ // CHECK: ret i8 -1
+ return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_cmp_pd_mask_false_oq(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_cmp_pd_mask_false_oq
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_false_os(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_cmp_pd_mask_false_os
+ // CHECK-NOT: call
+ // CHECK: ret i8 0
+ return (__mmask8)_mm_cmp_pd_mask(__A, __B, _CMP_FALSE_OS);
+}
+
__mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_cmp_pd_mask
- // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128
- // CHECK: and <2 x i1> [[CMP]], {{.*}}
+ // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
+ // CHECK: and <2 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pd_mask(m, __A, __B, 0);
}
More information about the cfe-commits
mailing list