[llvm] r324646 - [X86] Remove kortest intrinsics and replace with native IR.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 8 12:16:07 PST 2018


Author: ctopper
Date: Thu Feb  8 12:16:06 2018
New Revision: 324646

URL: http://llvm.org/viewvc/llvm-project?rev=324646&view=rev
Log:
[X86] Remove kortest intrinsics and replace with native IR.

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/IR/AutoUpgrade.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=324646&r1=324645&r2=324646&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Feb  8 12:16:06 2018
@@ -3723,18 +3723,6 @@ let TargetPrefix = "x86" in {  // All in
 //===----------------------------------------------------------------------===//
 // AVX512
 
-// Mask ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  // Mask instructions
-  // 16-bit mask
-  def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
-              Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">,
-              Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
-                        [IntrNoMem]>;
-}
-
 // Conversion ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=324646&r1=324645&r2=324646&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Feb  8 12:16:06 2018
@@ -115,6 +115,8 @@ static bool ShouldUpgradeX86Intrinsic(Fu
       Name == "avx512.kor.w" || // Added in 7.0
       Name == "avx512.kxor.w" || // Added in 7.0
       Name == "avx512.kxnor.w" || // Added in 7.0
+      Name == "avx512.kortestc.w" || // Added in 7.0
+      Name == "avx512.kortestz.w" || // Added in 7.0
       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
       Name.startswith("avx2.pmax") || // Added in 3.9
       Name.startswith("avx2.pmin") || // Added in 3.9
@@ -1156,6 +1158,19 @@ void llvm::UpgradeIntrinsicCall(CallInst
       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
       Rep = Builder.CreateNot(Rep);
       Rep = Builder.CreateBitCast(Rep, CI->getType());
+    } else if (IsX86 &&
+               (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
+      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+      Rep = Builder.CreateOr(LHS, RHS);
+      Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
+      Value *C;
+      if (Name[14] == 'c')
+        C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
+      else
+        C = ConstantInt::getNullValue(Builder.getInt16Ty());
+      Rep = Builder.CreateICmpEQ(Rep, C);
+      Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
       Type *I32Ty = Type::getInt32Ty(C);
       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324646&r1=324645&r2=324646&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb  8 12:16:06 2018
@@ -20561,16 +20561,6 @@ SDValue X86TargetLowering::LowerINTRINSI
     SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
-  case Intrinsic::x86_avx512_kortestz_w:
-  case Intrinsic::x86_avx512_kortestc_w: {
-    X86::CondCode X86CC =
-        (IntNo == Intrinsic::x86_avx512_kortestz_w) ? X86::COND_E : X86::COND_B;
-    SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
-    SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
-    SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
-    SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
-    return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
-  }
 
   case Intrinsic::x86_sse42_pcmpistria128:
   case Intrinsic::x86_sse42_pcmpestria128:

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=324646&r1=324645&r2=324646&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Thu Feb  8 12:16:06 2018
@@ -55,6 +55,103 @@ entry:
   ret i16 %13
 }
 
+define i32 @test_mm512_kortestc(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D) {
+; X32-LABEL: test_mm512_kortestc:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    andl $-64, %esp
+; X32-NEXT:    subl $64, %esp
+; X32-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; X32-NEXT:    vpcmpneqd 8(%ebp), %zmm2, %k1
+; X32-NEXT:    korw %k0, %k1, %k0
+; X32-NEXT:    kmovw %k0, %eax
+; X32-NEXT:    cmpw $-1, %ax
+; X32-NEXT:    sete %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    movzbl %al, %eax
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    vzeroupper
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_kortestc:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; X64-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; X64-NEXT:    korw %k0, %k1, %k0
+; X64-NEXT:    kmovw %k0, %eax
+; X64-NEXT:    cmpw $-1, %ax
+; X64-NEXT:    sete %al
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast <8 x i64> %__A to <16 x i32>
+  %1 = bitcast <8 x i64> %__B to <16 x i32>
+  %2 = icmp ne <16 x i32> %0, %1
+  %3 = bitcast <8 x i64> %__C to <16 x i32>
+  %4 = bitcast <8 x i64> %__D to <16 x i32>
+  %5 = icmp ne <16 x i32> %3, %4
+  %6 = or <16 x i1> %5, %2                                                                                                                                                                                                                                 %7 = bitcast <16 x i1> %6 to i16
+  %8 = icmp eq i16 %7, -1
+  %9 = zext i1 %8 to i32
+  ret i32 %9
+}
+
+define i32 @test_mm512_kortestz(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D) {
+; X32-LABEL: test_mm512_kortestz:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    andl $-64, %esp
+; X32-NEXT:    subl $64, %esp
+; X32-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; X32-NEXT:    vpcmpneqd 8(%ebp), %zmm2, %k1
+; X32-NEXT:    korw %k0, %k1, %k0
+; X32-NEXT:    kmovw %k0, %eax
+; X32-NEXT:    cmpw $0, %ax
+; X32-NEXT:    sete %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    movzbl %al, %eax
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    vzeroupper
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_kortestz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; X64-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; X64-NEXT:    korw %k0, %k1, %k0
+; X64-NEXT:    kmovw %k0, %eax
+; X64-NEXT:    cmpw $0, %ax
+; X64-NEXT:    sete %al
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast <8 x i64> %__A to <16 x i32>
+  %1 = bitcast <8 x i64> %__B to <16 x i32>
+  %2 = icmp ne <16 x i32> %0, %1
+  %3 = bitcast <8 x i64> %__C to <16 x i32>
+  %4 = bitcast <8 x i64> %__D to <16 x i32>
+  %5 = icmp ne <16 x i32> %3, %4
+  %6 = or <16 x i1> %5, %2
+  %7 = bitcast <16 x i1> %6 to i16
+  %8 = icmp eq i16 %7, 0
+  %9 = zext i1 %8 to i32
+  ret i32 %9
+}
+
 define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) {
 ; X32-LABEL: test_mm512_shuffle_f32x4:
 ; X32:       # %bb.0: # %entry

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=324646&r1=324645&r2=324646&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Thu Feb  8 12:16:06 2018
@@ -3832,3 +3832,48 @@ define i16 @test_kxor(i16 %a0, i16 %a1)
   ret i16 %t2
 }
 
+declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
+define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
+; CHECK-LABEL: test_kortestz:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    kortestw %k1, %k0
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+entry:
+  %0 = bitcast <8 x i64> %A to <16 x i32>
+  %1 = bitcast <8 x i64> %B to <16 x i32>
+  %2 = icmp ne <16 x i32> %0, %1
+  %3 = bitcast <8 x i64> %C to <16 x i32>
+  %4 = bitcast <8 x i64> %D to <16 x i32>
+  %5 = icmp ne <16 x i32> %3, %4
+  %6 = bitcast <16 x i1> %2 to i16
+  %7 = bitcast <16 x i1> %5 to i16
+  %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
+define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
+; CHECK-LABEL: test_kortestc:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    kortestw %k1, %k0
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+entry:
+  %0 = bitcast <8 x i64> %A to <16 x i32>
+  %1 = bitcast <8 x i64> %B to <16 x i32>
+  %2 = icmp ne <16 x i32> %0, %1
+  %3 = bitcast <8 x i64> %C to <16 x i32>
+  %4 = bitcast <8 x i64> %D to <16 x i32>
+  %5 = icmp ne <16 x i32> %3, %4
+  %6 = bitcast <16 x i1> %2 to i16
+  %7 = bitcast <16 x i1> %5 to i16
+  %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
+  ret i32 %res
+}

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=324646&r1=324645&r2=324646&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Thu Feb  8 12:16:06 2018
@@ -1,34 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
-define i32 @test_kortestz(i16 %a0, i16 %a1) {
-; CHECK-LABEL: test_kortestz:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    kmovw %esi, %k0
-; CHECK-NEXT:    kmovw %edi, %k1
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    kortestw %k0, %k1
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    retq
-  %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
-  ret i32 %res
-}
-
-declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
-define i32 @test_kortestc(i16 %a0, i16 %a1) {
-; CHECK-LABEL: test_kortestc:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    kmovw %esi, %k0
-; CHECK-NEXT:    kmovw %edi, %k1
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    kortestw %k0, %k1
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    retq
-  %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
-  ret i32 %res
-}
-
 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
 ; CHECK-LABEL: test_rcp_ps_512:
 ; CHECK:       ## %bb.0:




More information about the llvm-commits mailing list