[llvm] r350374 - [X86] Add post-isel peephole to fold KAND+KORTEST into KTEST if only the zero flag is used.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 3 16:10:58 PST 2019


Author: ctopper
Date: Thu Jan  3 16:10:58 2019
New Revision: 350374

URL: http://llvm.org/viewvc/llvm-project?rev=350374&view=rev
Log:
[X86] Add post-isel peephole to fold KAND+KORTEST into KTEST if only the zero flag is used.

Doing this late so we will prefer to fold the AND into a masked comparison first. That can be better for the live range of the mask register.

Differential Revision: https://reviews.llvm.org/D56246

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=350374&r1=350373&r2=350374&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Jan  3 16:10:58 2019
@@ -948,6 +948,41 @@ void X86DAGToDAGISel::PostprocessISelDAG
       }
     }
 
+    // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
+    // used. We're doing this late so we can prefer to fold the AND into masked
+    // comparisons. Doing that can be better for the live range of the mask
+    // register.
+    if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
+         Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
+        N->getOperand(0) == N->getOperand(1) &&
+        N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+        N->getOperand(0).isMachineOpcode() &&
+        onlyUsesZeroFlag(SDValue(N, 0))) {
+      SDValue And = N->getOperand(0);
+      unsigned N0Opc = And.getMachineOpcode();
+      // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
+      // KAND instructions and KTEST use the same ISA feature.
+      if (N0Opc == X86::KANDBrr ||
+          (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
+          N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
+        unsigned NewOpc;
+        switch (Opc) {
+        default: llvm_unreachable("Unexpected opcode!");
+        case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
+        case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
+        case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
+        case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
+        }
+        MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
+                                                      MVT::i32,
+                                                      And.getOperand(0),
+                                                      And.getOperand(1));
+        ReplaceUses(N, KTest);
+        MadeChange = true;
+        continue;
+      }
+    }
+
     // Attempt to remove vectors moves that were inserted to zero upper bits.
     if (Opc != TargetOpcode::SUBREG_TO_REG)
       continue;

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=350374&r1=350373&r2=350374&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Jan  3 16:10:58 2019
@@ -3519,8 +3519,7 @@ define void @ktest_3(<8 x i32> %w, <8 x
 ; SKX-NEXT:    vptestnmd %ymm2, %ymm2, %k1
 ; SKX-NEXT:    vptestnmd %ymm3, %ymm3, %k2
 ; SKX-NEXT:    korb %k2, %k1, %k1
-; SKX-NEXT:    kandb %k1, %k0, %k0
-; SKX-NEXT:    kortestb %k0, %k0
+; SKX-NEXT:    ktestb %k1, %k0
 ; SKX-NEXT:    je LBB71_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
@@ -3574,8 +3573,7 @@ define void @ktest_3(<8 x i32> %w, <8 x
 ; AVX512DQ-NEXT:    vptestnmd %zmm3, %zmm3, %k3
 ; AVX512DQ-NEXT:    korb %k1, %k0, %k0
 ; AVX512DQ-NEXT:    korb %k3, %k2, %k1
-; AVX512DQ-NEXT:    kandb %k1, %k0, %k0
-; AVX512DQ-NEXT:    kortestb %k0, %k0
+; AVX512DQ-NEXT:    ktestb %k1, %k0
 ; AVX512DQ-NEXT:    je LBB71_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
 ; AVX512DQ-NEXT:    popq %rax
@@ -3597,8 +3595,7 @@ define void @ktest_3(<8 x i32> %w, <8 x
 ; X86-NEXT:    vptestnmd %ymm2, %ymm2, %k1
 ; X86-NEXT:    vptestnmd %ymm3, %ymm3, %k2
 ; X86-NEXT:    korb %k2, %k1, %k1
-; X86-NEXT:    kandb %k1, %k0, %k0
-; X86-NEXT:    kortestb %k0, %k0
+; X86-NEXT:    ktestb %k1, %k0
 ; X86-NEXT:    je LBB71_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
@@ -3663,8 +3660,7 @@ define void @ktest_4(<8 x i64> %w, <8 x
 ; SKX-NEXT:    vptestnmq %zmm2, %zmm2, %k1
 ; SKX-NEXT:    vptestnmq %zmm3, %zmm3, %k2
 ; SKX-NEXT:    korb %k2, %k1, %k1
-; SKX-NEXT:    kandb %k1, %k0, %k0
-; SKX-NEXT:    kortestb %k0, %k0
+; SKX-NEXT:    ktestb %k1, %k0
 ; SKX-NEXT:    je LBB72_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
@@ -3710,8 +3706,7 @@ define void @ktest_4(<8 x i64> %w, <8 x
 ; AVX512DQ-NEXT:    vptestnmq %zmm2, %zmm2, %k1
 ; AVX512DQ-NEXT:    vptestnmq %zmm3, %zmm3, %k2
 ; AVX512DQ-NEXT:    korb %k2, %k1, %k1
-; AVX512DQ-NEXT:    kandb %k1, %k0, %k0
-; AVX512DQ-NEXT:    kortestb %k0, %k0
+; AVX512DQ-NEXT:    ktestb %k1, %k0
 ; AVX512DQ-NEXT:    je LBB72_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
 ; AVX512DQ-NEXT:    popq %rax
@@ -3733,8 +3728,7 @@ define void @ktest_4(<8 x i64> %w, <8 x
 ; X86-NEXT:    vptestnmq %zmm2, %zmm2, %k1
 ; X86-NEXT:    vptestnmq %zmm3, %zmm3, %k2
 ; X86-NEXT:    korb %k2, %k1, %k1
-; X86-NEXT:    kandb %k1, %k0, %k0
-; X86-NEXT:    kortestb %k0, %k0
+; X86-NEXT:    ktestb %k1, %k0
 ; X86-NEXT:    je LBB72_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
@@ -3765,28 +3759,95 @@ exit:
 }
 
 define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
-; CHECK-LABEL: ktest_5:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
-; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT:    korw %k1, %k0, %k0
-; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
-; CHECK-NEXT:    vptestnmd %zmm3, %zmm3, %k2
-; CHECK-NEXT:    korw %k2, %k1, %k1
-; CHECK-NEXT:    kandw %k1, %k0, %k0
-; CHECK-NEXT:    kortestw %k0, %k0
-; CHECK-NEXT:    je LBB73_1
-; CHECK-NEXT:  ## %bb.2: ## %exit
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  LBB73_1: ## %bar
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    callq _foo
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
+; KNL-LABEL: ktest_5:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; KNL-NEXT:    korw %k1, %k0, %k0
+; KNL-NEXT:    vptestnmd %zmm2, %zmm2, %k1
+; KNL-NEXT:    vptestnmd %zmm3, %zmm3, %k2
+; KNL-NEXT:    korw %k2, %k1, %k1
+; KNL-NEXT:    kandw %k1, %k0, %k0
+; KNL-NEXT:    kortestw %k0, %k0
+; KNL-NEXT:    je LBB73_1
+; KNL-NEXT:  ## %bb.2: ## %exit
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+; KNL-NEXT:  LBB73_1: ## %bar
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    callq _foo
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: ktest_5:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; SKX-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; SKX-NEXT:    korw %k1, %k0, %k0
+; SKX-NEXT:    vptestnmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vptestnmd %zmm3, %zmm3, %k2
+; SKX-NEXT:    korw %k2, %k1, %k1
+; SKX-NEXT:    ktestw %k1, %k0
+; SKX-NEXT:    je LBB73_1
+; SKX-NEXT:  ## %bb.2: ## %exit
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    retq
+; SKX-NEXT:  LBB73_1: ## %bar
+; SKX-NEXT:    vzeroupper
+; SKX-NEXT:    callq _foo
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; AVX512BW-LABEL: ktest_5:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
+; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; AVX512BW-NEXT:    korw %k1, %k0, %k0
+; AVX512BW-NEXT:    vptestnmd %zmm2, %zmm2, %k1
+; AVX512BW-NEXT:    vptestnmd %zmm3, %zmm3, %k2
+; AVX512BW-NEXT:    korw %k2, %k1, %k1
+; AVX512BW-NEXT:    kandw %k1, %k0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    je LBB73_1
+; AVX512BW-NEXT:  ## %bb.2: ## %exit
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+; AVX512BW-NEXT:  LBB73_1: ## %bar
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    callq _foo
+; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: ktest_5:
+; AVX512DQ:       ## %bb.0:
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT:    vptestnmd %zmm1, %zmm1, %k1
+; AVX512DQ-NEXT:    korw %k1, %k0, %k0
+; AVX512DQ-NEXT:    vptestnmd %zmm2, %zmm2, %k1
+; AVX512DQ-NEXT:    vptestnmd %zmm3, %zmm3, %k2
+; AVX512DQ-NEXT:    korw %k2, %k1, %k1
+; AVX512DQ-NEXT:    ktestw %k1, %k0
+; AVX512DQ-NEXT:    je LBB73_1
+; AVX512DQ-NEXT:  ## %bb.2: ## %exit
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+; AVX512DQ-NEXT:  LBB73_1: ## %bar
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    callq _foo
+; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    retq
 ;
 ; X86-LABEL: ktest_5:
 ; X86:       ## %bb.0:
@@ -3798,8 +3859,7 @@ define void @ktest_5(<16 x i32> %w, <16
 ; X86-NEXT:    vptestnmd %zmm2, %zmm2, %k1
 ; X86-NEXT:    vptestnmd %zmm3, %zmm3, %k2
 ; X86-NEXT:    korw %k2, %k1, %k1
-; X86-NEXT:    kandw %k1, %k0, %k0
-; X86-NEXT:    kortestw %k0, %k0
+; X86-NEXT:    ktestw %k1, %k0
 ; X86-NEXT:    je LBB73_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
@@ -3878,8 +3938,7 @@ define void @ktest_6(<32 x i16> %w, <32
 ; SKX-NEXT:    vptestnmw %zmm2, %zmm2, %k1
 ; SKX-NEXT:    vptestnmw %zmm3, %zmm3, %k2
 ; SKX-NEXT:    kord %k2, %k1, %k1
-; SKX-NEXT:    kandd %k1, %k0, %k0
-; SKX-NEXT:    kortestd %k0, %k0
+; SKX-NEXT:    ktestd %k1, %k0
 ; SKX-NEXT:    je LBB74_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
@@ -3901,8 +3960,7 @@ define void @ktest_6(<32 x i16> %w, <32
 ; AVX512BW-NEXT:    vptestnmw %zmm2, %zmm2, %k1
 ; AVX512BW-NEXT:    vptestnmw %zmm3, %zmm3, %k2
 ; AVX512BW-NEXT:    kord %k2, %k1, %k1
-; AVX512BW-NEXT:    kandd %k1, %k0, %k0
-; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    ktestd %k1, %k0
 ; AVX512BW-NEXT:    je LBB74_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
 ; AVX512BW-NEXT:    popq %rax
@@ -3962,8 +4020,7 @@ define void @ktest_6(<32 x i16> %w, <32
 ; X86-NEXT:    vptestnmw %zmm2, %zmm2, %k1
 ; X86-NEXT:    vptestnmw %zmm3, %zmm3, %k2
 ; X86-NEXT:    kord %k2, %k1, %k1
-; X86-NEXT:    kandd %k1, %k0, %k0
-; X86-NEXT:    kortestd %k0, %k0
+; X86-NEXT:    ktestd %k1, %k0
 ; X86-NEXT:    je LBB74_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
@@ -4066,8 +4123,7 @@ define void @ktest_7(<64 x i8> %w, <64 x
 ; SKX-NEXT:    vptestnmb %zmm2, %zmm2, %k1
 ; SKX-NEXT:    vptestnmb %zmm3, %zmm3, %k2
 ; SKX-NEXT:    korq %k2, %k1, %k1
-; SKX-NEXT:    kandq %k1, %k0, %k0
-; SKX-NEXT:    kortestq %k0, %k0
+; SKX-NEXT:    ktestq %k1, %k0
 ; SKX-NEXT:    je LBB75_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
@@ -4089,8 +4145,7 @@ define void @ktest_7(<64 x i8> %w, <64 x
 ; AVX512BW-NEXT:    vptestnmb %zmm2, %zmm2, %k1
 ; AVX512BW-NEXT:    vptestnmb %zmm3, %zmm3, %k2
 ; AVX512BW-NEXT:    korq %k2, %k1, %k1
-; AVX512BW-NEXT:    kandq %k1, %k0, %k0
-; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    ktestq %k1, %k0
 ; AVX512BW-NEXT:    je LBB75_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
 ; AVX512BW-NEXT:    popq %rax




More information about the llvm-commits mailing list