[llvm] r340869 - [X86] Add intrinsics for KADD instructions
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 28 12:22:55 PDT 2018
Author: ctopper
Date: Tue Aug 28 12:22:55 2018
New Revision: 340869
URL: http://llvm.org/viewvc/llvm-project?rev=340869&view=rev
Log:
[X86] Add intrinsics for KADD instructions
These are intrinsics for supporting kadd builtins in clang. These builtins are already in gcc to implement intrinsics from icc. Though they are missing from the Intel Intrinsics Guide.
This instruction adds two mask registers together as if they were scalar rather than a vXi1. We might be able to get away with a bitcast to scalar and a normal add instruction, but that would require DAG combine smarts in the backend to recoqnize add+bitcast. For now I'd prefer to go with the easiest implementation so we can get these builtins in to clang with good codegen.
Differential Revision: https://reviews.llvm.org/D51370
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=340869&r1=340868&r2=340869&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Aug 28 12:22:55 2018
@@ -2763,6 +2763,18 @@ let TargetPrefix = "x86" in { // All in
//===----------------------------------------------------------------------===//
// AVX512
+// Mask ops
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_kadd_b : GCCBuiltin<"__builtin_ia32_kaddqi">,
+ Intrinsic<[llvm_v8i1_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_kadd_w : GCCBuiltin<"__builtin_ia32_kaddhi">,
+ Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_kadd_d : GCCBuiltin<"__builtin_ia32_kaddsi">,
+ Intrinsic<[llvm_v32i1_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_kadd_q : GCCBuiltin<"__builtin_ia32_kadddi">,
+ Intrinsic<[llvm_v64i1_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+}
+
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=340869&r1=340868&r2=340869&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Aug 28 12:22:55 2018
@@ -404,6 +404,10 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_b, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_d, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_q, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_w, INTR_TYPE_2OP, X86ISD::KADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FADDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=340869&r1=340868&r2=340869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Tue Aug 28 12:22:55 2018
@@ -2,6 +2,62 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+define i32 @test_int_x86_avx512_kadd_d(<32 x i16> %A, <32 x i16> %B) nounwind {
+; CHECK-LABEL: test_int_x86_avx512_kadd_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
+; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
+; CHECK-NEXT: kaddd %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfd,0x4a,0xc1]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: kortestd %k0, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc0]
+; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = icmp ne <32 x i16> %A, zeroinitializer
+ %1 = icmp ne <32 x i16> %B, zeroinitializer
+ %2 = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> %0, <32 x i1> %1)
+ %3 = bitcast <32 x i1> %2 to i32
+ %4 = icmp eq i32 %3, 0
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+}
+declare <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1>, <32 x i1>)
+
+define i32 @test_int_x86_avx512_kadd_q(<64 x i8> %A, <64 x i8> %B) nounwind {
+; X86-LABEL: test_int_x86_avx512_kadd_q:
+; X86: # %bb.0: # %entry
+; X86-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
+; X86-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
+; X86-NEXT: kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1]
+; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
+; X86-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; X86-NEXT: kortestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc1]
+; X86-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_avx512_kadd_q:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
+; X64-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
+; X64-NEXT: kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1]
+; X64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; X64-NEXT: kortestq %k0, %k0 # encoding: [0xc4,0xe1,0xf8,0x98,0xc0]
+; X64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = icmp ne <64 x i8> %A, zeroinitializer
+ %1 = icmp ne <64 x i8> %B, zeroinitializer
+ %2 = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> %0, <64 x i1> %1)
+ %3 = bitcast <64 x i1> %2 to i64
+ %4 = icmp eq i64 %3, 0
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+}
+declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>)
+
define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_mask_packs_epi32_rr_512:
; CHECK: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll?rev=340869&r1=340868&r2=340869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll Tue Aug 28 12:22:55 2018
@@ -4,6 +4,50 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512DQ,X64-AVX512DQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512DQVL,X64-AVX512DQVL
+define i32 @test_int_x86_avx512_kadd_w(<16 x i32> %A, <16 x i32> %B) nounwind {
+; CHECK-LABEL: test_int_x86_avx512_kadd_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
+; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
+; CHECK-NEXT: kaddw %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4a,0xc1]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: kortestw %k0, %k0 # encoding: [0xc5,0xf8,0x98,0xc0]
+; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = icmp ne <16 x i32> %A, zeroinitializer
+ %1 = icmp ne <16 x i32> %B, zeroinitializer
+ %2 = call <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1> %0, <16 x i1> %1)
+ %3 = bitcast <16 x i1> %2 to i16
+ %4 = icmp eq i16 %3, 0
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+}
+declare <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1>, <16 x i1>)
+
+define i32 @test_int_x86_avx512_kadd_b(<8 x i64> %A, <8 x i64> %B) nounwind {
+; CHECK-LABEL: test_int_x86_avx512_kadd_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
+; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
+; CHECK-NEXT: kaddb %k1, %k0, %k0 # encoding: [0xc5,0xfd,0x4a,0xc1]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: kortestb %k0, %k0 # encoding: [0xc5,0xf9,0x98,0xc0]
+; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = icmp ne <8 x i64> %A, zeroinitializer
+ %1 = icmp ne <8 x i64> %B, zeroinitializer
+ %2 = call <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1> %0, <8 x i1> %1)
+ %3 = bitcast <8 x i1> %2 to i8
+ %4 = icmp eq i8 %3, 0
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+}
+declare <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1>, <8 x i1>)
+
declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
More information about the llvm-commits
mailing list