[llvm] [X86] Support peephole optimization with CCMP instruction (PR #129994)

Feng Zou via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 10 01:07:54 PDT 2025


https://github.com/fzou1 updated https://github.com/llvm/llvm-project/pull/129994

>From dcc1099af82e9b6d328e9690b5b6492edaa0356a Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Thu, 19 Dec 2024 17:45:38 +0800
Subject: [PATCH 1/2] [X86] Support peephole optimization with CCMP instruction

This extends `opitimizeCompareInstr` to re-use previous CCMP results if
the
previous comparison was with an immediates that was 1 bigger or smaller.
Example:
```
CCMP x, 13, 2, 5
...
CCMP x, 12, 2, 5 ; can be removed if we change the SETg
SETg ...         ; x > 12 changed to SETge (x >= 13) & remove the 2nd
CCMP
```
---
 .../Target/X86/X86InstrConditionalCompare.td  |   4 +-
 llvm/lib/Target/X86/X86InstrInfo.cpp          |  14 +
 .../CodeGen/X86/apx/optimize-compare-ccmp.mir | 312 ++++++++++++++++++
 3 files changed, 328 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir

diff --git a/llvm/lib/Target/X86/X86InstrConditionalCompare.td b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
index 35af8405f1abe..ba8cf6cc3bc67 100644
--- a/llvm/lib/Target/X86/X86InstrConditionalCompare.td
+++ b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
@@ -36,7 +36,7 @@ class Ctest<bits<8> o, Format f, X86TypeInfo t, DAGOperand op1, DAGOperand op2>:
 //===----------------------------------------------------------------------===//
 // CCMP Instructions
 //
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteALU], isCompare = 1 in {
   def CCMP8rr : Ccmp<0x38, MRMDestReg, Xi8,  GR8,  GR8>;
   def CCMP16rr: Ccmp<0x39, MRMDestReg, Xi16, GR16, GR16>, PD;
   def CCMP32rr: Ccmp<0x39, MRMDestReg, Xi32, GR32, GR32>;
@@ -55,7 +55,7 @@ let SchedRW = [WriteALU] in {
   def CCMP64ri32: Ccmp<0x81, MRM7r, Xi64, GR64, i64i32imm>;
 }
 
-let mayLoad = 1 in {
+let mayLoad = 1, isCompare = 1 in {
   let SchedRW = [WriteALU.Folded] in {
     def CCMP16mi8: Ccmp<0x83, MRM7m, Xi16, i16mem, i16i8imm>, PD;
     def CCMP32mi8: Ccmp<0x83, MRM7m, Xi32, i32mem, i32i8imm>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5fe7203c052d8..0b741338934b8 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4854,6 +4854,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   case X86::CMP32ri:
   case X86::CMP16ri:
   case X86::CMP8ri:
+  case X86::CCMP64ri32:
+  case X86::CCMP32ri:
+  case X86::CCMP16ri:
+  case X86::CCMP8ri:
     SrcReg = MI.getOperand(0).getReg();
     SrcReg2 = 0;
     if (MI.getOperand(1).isImm()) {
@@ -4951,6 +4955,16 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
     }
     return false;
   }
+  case X86::CCMP64ri32:
+  case X86::CCMP32ri:
+  case X86::CCMP16ri:
+  case X86::CCMP8ri:
+    // The CCMP instruction should not be optimized if the scc/dfv in it is not
+    // same as the one in previous CCMP instruction.
+    if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() ||
+        (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
+      return false;
+    [[fallthrough]];
   case X86::CMP64ri32:
   case X86::CMP32ri:
   case X86::CMP16ri:
diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
new file mode 100644
index 0000000000000..1b5ecdfc40e8f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
@@ -0,0 +1,312 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass peephole-opt | FileCheck %s
+
+---
+name: opt_redundant_flags_adjusted_imm_0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_0
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+    %0:gr64 = COPY $rsi
+    ; CCMP+SETCC   %0 == 1
+    CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags
+    $cl = SETCCr 4, implicit $eflags
+    ; CCMP+SETCC   %0 >= 2; CCMP can be removed.
+    CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+    ; %0 >=s 2  -->  %0 >s 1
+    $bl = SETCCr 13, implicit $eflags
+    ; %0 >=u 2  -->  %0 >u 1
+    $bl = SETCCr 3, implicit $eflags
+    ; %0 <s 2  -->  %0 <=s 1
+    $bl = SETCCr 12, implicit $eflags
+    ; %0 <u 2  -->  %0 <=u 1
+    $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_1
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 42, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $cl = SETCCr 5, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+    %0:gr64 = COPY $rsi
+    ; CCMP+SETCC   %0 != 42
+    CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags
+    $cl = SETCCr 5, implicit $eflags
+    ; CCMP+SETCC   %0 >= 2; CCMP can be removed.
+    CCMP64ri32 %0, 41, 2, 5, implicit-def $eflags, implicit $eflags
+    ; %0 >s 41  -->  %0 >=s 42
+    $bl = SETCCr 15, implicit $eflags
+    ; %0 >u 41  -->  %0 >=u 42
+    $bl = SETCCr 7, implicit $eflags
+    ; %0 <=s 41  -->  %0 <s 42
+    $bl = SETCCr 14, implicit $eflags
+    ; %0 <=u 41  -->  %0 <u 42
+    $bl = SETCCr 6, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_0
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_0
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 42, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 41, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags
+    %0:gr64 = COPY $rsi
+    ; CCMP+SETCC   %0 <s 1
+    CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags
+    $cl = SETCCr 4, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP64ri32 %0, 41, 2, 5, implicit-def $eflags, implicit $eflags
+    ; %0 == 41
+    $bl = SETCCr 4, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_1
+    ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+    ; CHECK-NEXT: CCMP32ri [[COPY]], 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP32ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+    %0:gr32 = COPY $esi
+    ; CCMP+SETCC   %0 == INT32_MAX
+    CCMP32ri %0, 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+    ; %0 <s INT32_MIN
+    $bl = SETCCr 12, implicit $eflags
+
+    CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 12, implicit $eflags
+
+    CCMP32ri %0, 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 13, implicit $eflags
+
+    CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP32ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 2, implicit $eflags
+
+    CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP32ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 3, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_2
+    ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $cx
+    ; CHECK-NEXT: CCMP16ri [[COPY]], -32768, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], -32768, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+    %0:gr16 = COPY $cx
+    ; CCMP+SETCC   %0 == INT16_MIN
+    CCMP16ri %0, -32768, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+    ; %0 >s INT16_MAX
+    $bl = SETCCr 15, implicit $eflags
+
+    CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 15, implicit $eflags
+
+    CCMP16ri %0, -32768, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 14, implicit $eflags
+
+    CCMP16ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 4, implicit $eflags
+
+    CCMP16ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CCMP should not be removed.
+    CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 6, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_3
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_3
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 7, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 2, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+    %0:gr64 = COPY $rsi
+    ; CCMP+SETCC   %0 == 1
+    CCMP64ri32 %0, 1, 2, 7, implicit-def $eflags, implicit $eflags
+    $cl = SETCCr 4, implicit $eflags, implicit $eflags
+    ; CCMP+SETCC   %0 >= 2; CCMP should not be removed as the scc and dfv is
+    ; different.
+    CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 13, implicit $eflags
+    $bl = SETCCr 3, implicit $eflags
+    $bl = SETCCr 12, implicit $eflags
+    $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_4
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_4
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 5, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags, implicit $eflags
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 2, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+    %0:gr64 = COPY $rsi
+    ; CCMP+SETCC   %0 == 1
+    CCMP64ri32 %0, 1, 5, 5, implicit-def $eflags, implicit $eflags
+    $cl = SETCCr 4, implicit $eflags, implicit $eflags
+    ; CCMP+SETCC   %0 >= 2; CCMP should not be removed as the scc and dfv is
+    ; different.
+    CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+    $bl = SETCCr 13, implicit $eflags
+    $bl = SETCCr 3, implicit $eflags
+    $bl = SETCCr 12, implicit $eflags
+    $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_adjusted_imm_multiple_blocks
+body: |
+  ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr32 = COPY $eax
+  ; CHECK-NEXT:   CCMP32ri [[COPY]], 20, 2, 5, implicit-def $eflags, implicit $eflags
+  ; CHECK-NEXT:   JCC_1 %bb.1, 4, implicit $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT:   liveins: $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   JCC_1 %bb.2, 15, implicit $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   JMP_1 %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   RET 0
+  bb.0:
+    %0:gr32 = COPY $eax
+    CCMP32ri %0, 20, 2, 5, implicit-def $eflags, implicit $eflags
+    JCC_1 %bb.1, 4, implicit $eflags
+    JMP_1 %bb.3
+
+  bb.1:
+    CCMP32ri %0, 21, 2, 5, implicit-def $eflags, implicit $eflags
+    JCC_1 %bb.2, 13, implicit $eflags
+    JMP_1 %bb.3
+
+  bb.2:
+    JMP_1 %bb.3
+
+  bb.3:
+    RET 0
+...
+---
+name: opt_adjusted_imm_multiple_blocks_noopt
+body: |
+  ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks_noopt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr32 = COPY $eax
+  ; CHECK-NEXT:   CCMP32ri [[COPY]], 20, 2, 5, implicit-def $eflags, implicit $eflags
+  ; CHECK-NEXT:   JCC_1 %bb.1, 4, implicit $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   CCMP32ri [[COPY]], 21, 2, 5, implicit-def $eflags, implicit $eflags
+  ; CHECK-NEXT:   JCC_1 %bb.2, 13, implicit $eflags
+  ; CHECK-NEXT:   JMP_1 %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $al = SETCCr 4, implicit $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   RET 0
+  bb.0:
+    %0:gr32 = COPY $eax
+    CCMP32ri %0, 20, 2, 5, implicit-def $eflags, implicit $eflags
+    JCC_1 %bb.1, 4, implicit $eflags
+    JMP_1 %bb.3
+
+  bb.1:
+    CCMP32ri %0, 21, 2, 5, implicit-def $eflags, implicit $eflags
+    JCC_1 %bb.2, 13, implicit $eflags
+    JMP_1 %bb.3
+
+  bb.2:
+    liveins: $eflags
+    $al = SETCCr 4, implicit $eflags
+
+  bb.3:
+    RET 0
+...

>From 3511b19e20303e860cc8a2cdff7b21330e1c7cb3 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Mon, 10 Mar 2025 09:37:45 +0800
Subject: [PATCH 2/2] Add checks and LIT tests

Check scc and dfv are same if FlagI is CCMP instruction and add more LIT
tests.
---
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 12 ++-
 .../CodeGen/X86/apx/optimize-compare-ccmp.mir | 83 +++++++++++++++++++
 2 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 0b741338934b8..6f24ff9cd32d1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4958,13 +4958,17 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
   case X86::CCMP64ri32:
   case X86::CCMP32ri:
   case X86::CCMP16ri:
-  case X86::CCMP8ri:
+  case X86::CCMP8ri: {
     // The CCMP instruction should not be optimized if the scc/dfv in it is not
     // same as the one in previous CCMP instruction.
-    if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() ||
-        (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
-      return false;
+    unsigned Opcode = FlagI.getOpcode();
+    if (Opcode == X86::CCMP64ri32 || Opcode == X86::CCMP32ri ||
+        Opcode == X86::CCMP16ri || Opcode == X86::CCMP8ri)
+      if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() ||
+          (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
+        return false;
     [[fallthrough]];
+  }
   case X86::CMP64ri32:
   case X86::CMP32ri:
   case X86::CMP16ri:
diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
index 1b5ecdfc40e8f..96752ceafe542 100644
--- a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
+++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
@@ -56,6 +56,89 @@ body: |
     $bl = SETCCr 6, implicit $eflags
 ...
 ---
+name: opt_redundant_flags_adjusted_imm_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_2
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+    %0:gr64 = COPY $rsi
+    %1:gr64 = MOV64ri 1
+    ; CCMP+SETCC   %0 == 1
+    ; CCMP64rr will be optimized to CCMP64ri32 in the peephole optimiztion pass
+    CCMP64rr %0, %1, 2, 5, implicit-def $eflags, implicit $eflags
+    $cl = SETCCr 4, implicit $eflags
+    ; CCMP+SETCC   %0 >= 2; CCMP can be removed.
+    CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+    ; %0 >=s 2  -->  %0 >s 1
+    $bl = SETCCr 13, implicit $eflags
+    ; %0 >=u 2  -->  %0 >u 1
+    $bl = SETCCr 3, implicit $eflags
+    ; %0 <s 2  -->  %0 <=s 1
+    $bl = SETCCr 12, implicit $eflags
+    ; %0 <u 2  -->  %0 <=u 1
+    $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_3
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_3
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: CMP64ri32 [[COPY]], 1, implicit-def $eflags
+    ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+    %0:gr64 = COPY $rsi
+    ; CMP+SETCC   %0 == 1
+    CMP64ri32 %0, 1, implicit-def $eflags
+    $cl = SETCCr 4, implicit $eflags
+    ; CCMP+SETCC   %0 >= 2; CCMP can be removed.
+    CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+    ; %0 >=s 2  -->  %0 >s 1
+    $bl = SETCCr 13, implicit $eflags
+    ; %0 >=u 2  -->  %0 >u 1
+    $bl = SETCCr 3, implicit $eflags
+    ; %0 <s 2  -->  %0 <=s 1
+    $bl = SETCCr 12, implicit $eflags
+    ; %0 <u 2  -->  %0 <=u 1
+    $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_4
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_4
+    ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+    ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
+    ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+    ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+    %0:gr64 = COPY $rsi
+    ; CCMP+SETCC   %0 == 1
+    CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags
+    $cl = SETCCr 4, implicit $eflags
+    ; CMP+SETCC   %0 >= 2; CMP can be removed.
+    CMP64ri32 %0, 2, implicit-def $eflags
+    ; %0 >=s 2  -->  %0 >s 1
+    $bl = SETCCr 13, implicit $eflags
+    ; %0 >=u 2  -->  %0 >u 1
+    $bl = SETCCr 3, implicit $eflags
+    ; %0 <s 2  -->  %0 <=s 1
+    $bl = SETCCr 12, implicit $eflags
+    ; %0 <u 2  -->  %0 <=u 1
+    $bl = SETCCr 2, implicit $eflags
+...
+---
 name: opt_redundant_flags_adjusted_imm_noopt_0
 body: |
   bb.0:



More information about the llvm-commits mailing list