r340719 - [X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, and 64-bit mask registers.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Sun Aug 26 23:20:22 PDT 2018
Author: ctopper
Date: Sun Aug 26 23:20:22 2018
New Revision: 340719
URL: http://llvm.org/viewvc/llvm-project?rev=340719&view=rev
Log:
[X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, and 64-bit mask registers.
This also adds a second intrinsic name for the 16-bit mask versions.
These intrinsics match gcc and icc. They just aren't published in the Intel Intrinsics Guide so I only recently found they existed.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=340719&r1=340718&r2=340719&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun Aug 26 23:20:22 2018
@@ -1005,7 +1005,10 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfd
TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", "avx512pf")
TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_knotdi, "ULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
@@ -1734,14 +1737,29 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps5
TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kanddi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kandndi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxnordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=340719&r1=340718&r2=340719&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Aug 26 23:20:22 2018
@@ -8603,8 +8603,9 @@ static Value *EmitX86CompressStore(CodeG
}
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
- unsigned NumElts, ArrayRef<Value *> Ops,
+ ArrayRef<Value *> Ops,
bool InvertLHS = false) {
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
@@ -10013,7 +10014,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx
case X86::BI__builtin_ia32_kortestchi:
case X86::BI__builtin_ia32_kortestzhi: {
- Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
Value *C;
if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
@@ -10023,26 +10024,45 @@ Value *CodeGenFunction::EmitX86BuiltinEx
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}
+ case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
- return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
+ case X86::BI__builtin_ia32_kandsi:
+ case X86::BI__builtin_ia32_kanddi:
+ return EmitX86MaskLogic(*this, Instruction::And, Ops);
+ case X86::BI__builtin_ia32_kandnqi:
case X86::BI__builtin_ia32_kandnhi:
- return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
+ case X86::BI__builtin_ia32_kandnsi:
+ case X86::BI__builtin_ia32_kandndi:
+ return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
+ case X86::BI__builtin_ia32_korqi:
case X86::BI__builtin_ia32_korhi:
- return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+ case X86::BI__builtin_ia32_korsi:
+ case X86::BI__builtin_ia32_kordi:
+ return EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ case X86::BI__builtin_ia32_kxnorqi:
case X86::BI__builtin_ia32_kxnorhi:
- return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
+ case X86::BI__builtin_ia32_kxnorsi:
+ case X86::BI__builtin_ia32_kxnordi:
+ return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
+ case X86::BI__builtin_ia32_kxorqi:
case X86::BI__builtin_ia32_kxorhi:
- return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
- case X86::BI__builtin_ia32_knothi: {
- Ops[0] = getMaskVecValue(*this, Ops[0], 16);
- return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
- Builder.getInt16Ty());
+ case X86::BI__builtin_ia32_kxorsi:
+ case X86::BI__builtin_ia32_kxordi:
+ return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
+ case X86::BI__builtin_ia32_knotqi:
+ case X86::BI__builtin_ia32_knothi:
+ case X86::BI__builtin_ia32_knotsi:
+ case X86::BI__builtin_ia32_knotdi: {
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
+ return Builder.CreateBitCast(Builder.CreateNot(Res),
+ Ops[0]->getType());
}
case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
case X86::BI__builtin_ia32_kunpckhi: {
- unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
uint32_t Indices[64];
Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340719&r1=340718&r2=340719&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Sun Aug 26 23:20:22 2018
@@ -35,6 +35,78 @@ typedef unsigned long long __mmask64;
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
+static __inline __mmask32 __DEFAULT_FN_ATTRS
+_knot_mask32(__mmask32 __M)
+{
+ return __builtin_ia32_knotsi(__M);
+}
+
+static __inline __mmask64 __DEFAULT_FN_ATTRS
+_knot_mask64(__mmask64 __M)
+{
+ return __builtin_ia32_knotdi(__M);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_kand_mask32(__mmask32 __A, __mmask32 __B)
+{
+ return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_kand_mask64(__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_kandn_mask32(__mmask32 __A, __mmask32 __B)
+{
+ return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_kandn_mask64(__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_kor_mask32(__mmask32 __A, __mmask32 __B)
+{
+ return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_kor_mask64(__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_kxnor_mask32(__mmask32 __A, __mmask32 __B)
+{
+ return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_kxnor_mask64(__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_kxor_mask32(__mmask32 __A, __mmask32 __B)
+{
+ return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_kxor_mask64(__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
+}
+
/* Integer compare */
#define _mm512_cmp_epi8_mask(a, b, p) \
Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=340719&r1=340718&r2=340719&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Sun Aug 26 23:20:22 2018
@@ -30,6 +30,43 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
+
+static __inline __mmask8 __DEFAULT_FN_ATTRS
+_knot_mask8(__mmask8 __M)
+{
+ return __builtin_ia32_knotqi(__M);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_kand_mask8(__mmask8 __A, __mmask8 __B)
+{
+ return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_kandn_mask8(__mmask8 __A, __mmask8 __B)
+{
+ return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_kor_mask8(__mmask8 __A, __mmask8 __B)
+{
+ return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_kxnor_mask8(__mmask8 __A, __mmask8 __B)
+{
+ return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_kxor_mask8(__mmask8 __A, __mmask8 __B)
+{
+ return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
+}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
@@ -1257,5 +1294,6 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __
(__mmask8)(U))
#undef __DEFAULT_FN_ATTRS512
+#undef __DEFAULT_FN_ATTRS
#endif
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=340719&r1=340718&r2=340719&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Aug 26 23:20:22 2018
@@ -8369,6 +8369,13 @@ _mm512_kxor (__mmask16 __A, __mmask16 __
return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
}
+#define _kand_mask16 _mm512_kand
+#define _kandn_mask16 _mm512_kandn
+#define _knot_mask16 _mm512_knot
+#define _kor_mask16 _mm512_kor
+#define _kxnor_mask16 _mm512_kxnor
+#define _kxor_mask16 _mm512_kxor
+
static __inline__ void __DEFAULT_FN_ATTRS512
_mm512_stream_si512 (__m512i * __P, __m512i __A)
{
Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=340719&r1=340718&r2=340719&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Sun Aug 26 23:20:22 2018
@@ -4,6 +4,136 @@
#include <immintrin.h>
+__mmask32 test_knot_mask32(__mmask32 a) {
+ // CHECK-LABEL: @test_knot_mask32
+ // CHECK: [[IN:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[NOT:%.*]] = xor <32 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: bitcast <32 x i1> [[NOT]] to i32
+ return _knot_mask32(a);
+}
+
+__mmask64 test_knot_mask64(__mmask64 a) {
+ // CHECK-LABEL: @test_knot_mask64
+ // CHECK: [[IN:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[NOT:%.*]] = xor <64 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: bitcast <64 x i1> [[NOT]] to i64
+ return _knot_mask64(a);
+}
+
+__mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kand_mask32
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RES:%.*]] = and <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <32 x i1> [[RES]] to i32
+ return _mm512_mask_cmpneq_epu16_mask(_kand_mask32(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kand_mask64
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RES:%.*]] = and <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <64 x i1> [[RES]] to i64
+ return _mm512_mask_cmpneq_epu8_mask(_kand_mask64(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kandn_mask32
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: [[RES:%.*]] = and <32 x i1> [[NOT]], [[RHS]]
+ // CHECK: bitcast <32 x i1> [[RES]] to i32
+ return _mm512_mask_cmpneq_epu16_mask(_kandn_mask32(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kandn_mask64
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: [[RES:%.*]] = and <64 x i1> [[NOT]], [[RHS]]
+ // CHECK: bitcast <64 x i1> [[RES]] to i64
+ return _mm512_mask_cmpneq_epu8_mask(_kandn_mask64(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask32 test_kor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kor_mask32
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RES:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <32 x i1> [[RES]] to i32
+ return _mm512_mask_cmpneq_epu16_mask(_kor_mask32(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask64 test_kor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kor_mask64
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RES:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <64 x i1> [[RES]] to i64
+ return _mm512_mask_cmpneq_epu8_mask(_kor_mask64(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kxnor_mask32
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: [[RES:%.*]] = xor <32 x i1> [[NOT]], [[RHS]]
+ // CHECK: bitcast <32 x i1> [[RES]] to i32
+ return _mm512_mask_cmpneq_epu16_mask(_kxnor_mask32(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kxnor_mask64
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: [[RES:%.*]] = xor <64 x i1> [[NOT]], [[RHS]]
+ // CHECK: bitcast <64 x i1> [[RES]] to i64
+ return _mm512_mask_cmpneq_epu8_mask(_kxnor_mask64(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask32 test_kxor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kxor_mask32
+ // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: [[RES:%.*]] = xor <32 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <32 x i1> [[RES]] to i32
+ return _mm512_mask_cmpneq_epu16_mask(_kxor_mask32(_mm512_cmpneq_epu16_mask(__A, __B),
+ _mm512_cmpneq_epu16_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask64 test_kxor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kxor_mask64
+ // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // CHECK: [[RES:%.*]] = xor <64 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <64 x i1> [[RES]] to i64
+ return _mm512_mask_cmpneq_epu8_mask(_kxor_mask64(_mm512_cmpneq_epu8_mask(__A, __B),
+ _mm512_cmpneq_epu8_mask(__C, __D)),
+ __E, __F);
+}
+
__mmask64 test_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
// CHECK-LABEL: @test_mm512_cmpeq_epi8_mask
// CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}}
Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=340719&r1=340718&r2=340719&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Sun Aug 26 23:20:22 2018
@@ -3,6 +3,71 @@
#include <immintrin.h>
+__mmask8 test_knot_mask8(__mmask8 a) {
+ // CHECK-LABEL: @test_knot_mask8
+ // CHECK: [[IN:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: bitcast <8 x i1> [[NOT]] to i8
+ return _knot_mask8(a);
+}
+
+__mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kand_mask8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RES:%.*]] = and <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <8 x i1> [[RES]] to i8
+ return _mm512_mask_cmpneq_epu64_mask(_kand_mask8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kandn_mask8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: [[RES:%.*]] = and <8 x i1> [[NOT]], [[RHS]]
+ // CHECK: bitcast <8 x i1> [[RES]] to i8
+ return _mm512_mask_cmpneq_epu64_mask(_kandn_mask8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kor_mask8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RES:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <8 x i1> [[RES]] to i8
+ return _mm512_mask_cmpneq_epu64_mask(_kor_mask8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kxnor_mask8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: [[RES:%.*]] = xor <8 x i1> [[NOT]], [[RHS]]
+ // CHECK: bitcast <8 x i1> [[RES]] to i8
+ return _mm512_mask_cmpneq_epu64_mask(_kxnor_mask8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kxor_mask8
+ // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: [[RES:%.*]] = xor <8 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <8 x i1> [[RES]] to i8
+ return _mm512_mask_cmpneq_epu64_mask(_kxor_mask8(_mm512_cmpneq_epu64_mask(__A, __B),
+ _mm512_cmpneq_epu64_mask(__C, __D)),
+ __E, __F);
+}
+
__m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mullo_epi64
// CHECK: mul <8 x i64>
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=340719&r1=340718&r2=340719&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun Aug 26 23:20:22 2018
@@ -8185,6 +8185,71 @@ __mmask16 test_mm512_kxor(__m512i __A, _
__E, __F);
}
+__mmask16 test_knot_mask16(__mmask16 a) {
+ // CHECK-LABEL: @test_knot_mask16
+ // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: bitcast <16 x i1> [[NOT]] to i16
+ return _knot_mask16(a);
+}
+
+__mmask16 test_kand_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kand_mask16
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <16 x i1> [[RES]] to i16
+ return _mm512_mask_cmpneq_epu32_mask(_kand_mask16(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kandn_mask16
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]]
+ // CHECK: bitcast <16 x i1> [[RES]] to i16
+ return _mm512_mask_cmpneq_epu32_mask(_kandn_mask16(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kor_mask16
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <16 x i1> [[RES]] to i16
+ return _mm512_mask_cmpneq_epu32_mask(_kor_mask16(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kxnor_mask16
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]]
+ // CHECK: bitcast <16 x i1> [[RES]] to i16
+ return _mm512_mask_cmpneq_epu32_mask(_kxnor_mask16(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D)),
+ __E, __F);
+}
+
+__mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
+ // CHECK-LABEL: @test_kxor_mask16
+ // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]]
+ // CHECK: bitcast <16 x i1> [[RES]] to i16
+ return _mm512_mask_cmpneq_epu32_mask(_kxor_mask16(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D)),
+ __E, __F);
+}
+
void test_mm512_stream_si512(__m512i * __P, __m512i __A) {
// CHECK-LABEL: @test_mm512_stream_si512
// CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 64, !nontemporal
More information about the cfe-commits
mailing list