[llvm] r218668 - [AVX512] Added intrinsics for VPCMPEQB and VPCMPEQW.
Robert Khasanov
rob.khasanov at gmail.com
Tue Sep 30 04:32:23 PDT 2014
Author: rkhasanov
Date: Tue Sep 30 06:32:22 2014
New Revision: 218668
URL: http://llvm.org/viewvc/llvm-project?rev=218668&view=rev
Log:
[AVX512] Added intrinsics for VPCMPEQB and VPCMPEQW.
Added new operand type for intrinsics (IIT_V64)
Added:
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/Function.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/TableGen/intrinsic-varargs.td
llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=218668&r1=218667&r2=218668&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Sep 30 06:32:22 2014
@@ -3234,6 +3234,23 @@ let TargetPrefix = "x86" in {
[IntrNoMem]>;
}
+// Compares
+let TargetPrefix = "x86" in {
+ // 512-bit
+ def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">,
+ Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pcmpeq_w_512 : GCCBuiltin<"__builtin_ia32_pcmpeqw512_mask">,
+ Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pcmpeq_d_512 : GCCBuiltin<"__builtin_ia32_pcmpeqd512_mask">,
+ Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+}
+
// Misc.
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
@@ -3242,13 +3259,6 @@ let TargetPrefix = "x86" in {
def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_pcmpeq_d_512 : GCCBuiltin<"__builtin_ia32_pcmpeqd512_mask">,
- Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
- [IntrNoMem]>;
- def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">,
- Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty],
Modified: llvm/trunk/lib/IR/Function.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Function.cpp?rev=218668&r1=218667&r2=218668&view=diff
==============================================================================
--- llvm/trunk/lib/IR/Function.cpp (original)
+++ llvm/trunk/lib/IR/Function.cpp Tue Sep 30 06:32:22 2014
@@ -474,7 +474,7 @@ std::string Intrinsic::getName(ID id, Ar
///
/// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter!
enum IIT_Info {
- // Common values should be encoded with 0-15.
+ // Common values should be encoded with 0-16.
IIT_Done = 0,
IIT_I1 = 1,
IIT_I8 = 2,
@@ -489,23 +489,24 @@ enum IIT_Info {
IIT_V8 = 11,
IIT_V16 = 12,
IIT_V32 = 13,
- IIT_PTR = 14,
- IIT_ARG = 15,
+ IIT_V64 = 14,
+ IIT_PTR = 15,
+ IIT_ARG = 16,
- // Values from 16+ are only encodable with the inefficient encoding.
- IIT_MMX = 16,
- IIT_METADATA = 17,
- IIT_EMPTYSTRUCT = 18,
- IIT_STRUCT2 = 19,
- IIT_STRUCT3 = 20,
- IIT_STRUCT4 = 21,
- IIT_STRUCT5 = 22,
- IIT_EXTEND_ARG = 23,
- IIT_TRUNC_ARG = 24,
- IIT_ANYPTR = 25,
- IIT_V1 = 26,
- IIT_VARARG = 27,
- IIT_HALF_VEC_ARG = 28
+ // Values from 17+ are only encodable with the inefficient encoding.
+ IIT_MMX = 17,
+ IIT_METADATA = 18,
+ IIT_EMPTYSTRUCT = 19,
+ IIT_STRUCT2 = 20,
+ IIT_STRUCT3 = 21,
+ IIT_STRUCT4 = 22,
+ IIT_STRUCT5 = 23,
+ IIT_EXTEND_ARG = 24,
+ IIT_TRUNC_ARG = 25,
+ IIT_ANYPTR = 26,
+ IIT_V1 = 27,
+ IIT_VARARG = 28,
+ IIT_HALF_VEC_ARG = 29
};
@@ -576,6 +577,10 @@ static void DecodeIITType(unsigned &Next
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 32));
DecodeIITType(NextElt, Infos, OutputTable);
return;
+ case IIT_V64:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 64));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
case IIT_PTR:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
DecodeIITType(NextElt, Infos, OutputTable);
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=218668&r1=218667&r2=218668&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Sep 30 06:32:22 2014
@@ -156,8 +156,10 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
Added: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=218668&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Tue Sep 30 06:32:22 2014
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw --show-mc-encoding| FileCheck %s
+
+define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
+; CHECK-LABEL: test_pcmpeq_b
+; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
+ %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
+ ret i64 %res
+}
+
+define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_b
+; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
+ %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
+ ret i64 %res
+}
+
+declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
+
+define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
+; CHECK-LABEL: test_pcmpeq_w
+; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
+ %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
+ ret i32 %res
+}
+
+define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_w
+; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
+ %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
+ ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
Modified: llvm/trunk/test/TableGen/intrinsic-varargs.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/TableGen/intrinsic-varargs.td?rev=218668&r1=218667&r2=218668&view=diff
==============================================================================
--- llvm/trunk/test/TableGen/intrinsic-varargs.td (original)
+++ llvm/trunk/test/TableGen/intrinsic-varargs.td Tue Sep 30 06:32:22 2014
@@ -26,5 +26,5 @@ class Intrinsic<string name, list<LLVMTy
def isVoid : ValueType<0, 56>; // Produces no value
def llvm_vararg_ty : LLVMType<isVoid>; // this means vararg here
-// CHECK: /* 0 */ 0, 27, 0,
+// CHECK: /* 0 */ 0, 28, 0,
def int_foo : Intrinsic<"llvm.foo", [llvm_vararg_ty]>;
Modified: llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp?rev=218668&r1=218667&r2=218668&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp Tue Sep 30 06:32:22 2014
@@ -225,7 +225,7 @@ EmitIntrinsicToOverloadTable(const std::
// NOTE: This must be kept in synch with the copy in lib/VMCore/Function.cpp!
enum IIT_Info {
- // Common values should be encoded with 0-15.
+ // Common values should be encoded with 0-16.
IIT_Done = 0,
IIT_I1 = 1,
IIT_I8 = 2,
@@ -240,23 +240,24 @@ enum IIT_Info {
IIT_V8 = 11,
IIT_V16 = 12,
IIT_V32 = 13,
- IIT_PTR = 14,
- IIT_ARG = 15,
-
- // Values from 16+ are only encodable with the inefficient encoding.
- IIT_MMX = 16,
- IIT_METADATA = 17,
- IIT_EMPTYSTRUCT = 18,
- IIT_STRUCT2 = 19,
- IIT_STRUCT3 = 20,
- IIT_STRUCT4 = 21,
- IIT_STRUCT5 = 22,
- IIT_EXTEND_ARG = 23,
- IIT_TRUNC_ARG = 24,
- IIT_ANYPTR = 25,
- IIT_V1 = 26,
- IIT_VARARG = 27,
- IIT_HALF_VEC_ARG = 28
+ IIT_V64 = 14,
+ IIT_PTR = 15,
+ IIT_ARG = 16,
+
+ // Values from 17+ are only encodable with the inefficient encoding.
+ IIT_MMX = 17,
+ IIT_METADATA = 18,
+ IIT_EMPTYSTRUCT = 19,
+ IIT_STRUCT2 = 20,
+ IIT_STRUCT3 = 21,
+ IIT_STRUCT4 = 22,
+ IIT_STRUCT5 = 23,
+ IIT_EXTEND_ARG = 24,
+ IIT_TRUNC_ARG = 25,
+ IIT_ANYPTR = 26,
+ IIT_V1 = 27,
+ IIT_VARARG = 28,
+ IIT_HALF_VEC_ARG = 29
};
@@ -356,6 +357,7 @@ static void EncodeFixedType(Record *R, s
case 8: Sig.push_back(IIT_V8); break;
case 16: Sig.push_back(IIT_V16); break;
case 32: Sig.push_back(IIT_V32); break;
+ case 64: Sig.push_back(IIT_V64); break;
}
return EncodeFixedValueType(VVT.getVectorElementType().SimpleTy, Sig);
More information about the llvm-commits
mailing list