[llvm] [X86] Support peephole optimization with CCMP instruction (PR #129994)
Feng Zou via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 08:48:32 PDT 2025
https://github.com/fzou1 updated https://github.com/llvm/llvm-project/pull/129994
>From dcc1099af82e9b6d328e9690b5b6492edaa0356a Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Thu, 19 Dec 2024 17:45:38 +0800
Subject: [PATCH 1/4] [X86] Support peephole optimization with CCMP instruction
This extends `opitimizeCompareInstr` to re-use previous CCMP results if
the
previous comparison was with an immediates that was 1 bigger or smaller.
Example:
```
CCMP x, 13, 2, 5
...
CCMP x, 12, 2, 5 ; can be removed if we change the SETg
SETg ... ; x > 12 changed to SETge (x >= 13) & remove the 2nd
CCMP
```
---
.../Target/X86/X86InstrConditionalCompare.td | 4 +-
llvm/lib/Target/X86/X86InstrInfo.cpp | 14 +
.../CodeGen/X86/apx/optimize-compare-ccmp.mir | 312 ++++++++++++++++++
3 files changed, 328 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
diff --git a/llvm/lib/Target/X86/X86InstrConditionalCompare.td b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
index 35af8405f1abe..ba8cf6cc3bc67 100644
--- a/llvm/lib/Target/X86/X86InstrConditionalCompare.td
+++ b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
@@ -36,7 +36,7 @@ class Ctest<bits<8> o, Format f, X86TypeInfo t, DAGOperand op1, DAGOperand op2>:
//===----------------------------------------------------------------------===//
// CCMP Instructions
//
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteALU], isCompare = 1 in {
def CCMP8rr : Ccmp<0x38, MRMDestReg, Xi8, GR8, GR8>;
def CCMP16rr: Ccmp<0x39, MRMDestReg, Xi16, GR16, GR16>, PD;
def CCMP32rr: Ccmp<0x39, MRMDestReg, Xi32, GR32, GR32>;
@@ -55,7 +55,7 @@ let SchedRW = [WriteALU] in {
def CCMP64ri32: Ccmp<0x81, MRM7r, Xi64, GR64, i64i32imm>;
}
-let mayLoad = 1 in {
+let mayLoad = 1, isCompare = 1 in {
let SchedRW = [WriteALU.Folded] in {
def CCMP16mi8: Ccmp<0x83, MRM7m, Xi16, i16mem, i16i8imm>, PD;
def CCMP32mi8: Ccmp<0x83, MRM7m, Xi32, i32mem, i32i8imm>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5fe7203c052d8..0b741338934b8 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4854,6 +4854,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
case X86::CMP32ri:
case X86::CMP16ri:
case X86::CMP8ri:
+ case X86::CCMP64ri32:
+ case X86::CCMP32ri:
+ case X86::CCMP16ri:
+ case X86::CCMP8ri:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
if (MI.getOperand(1).isImm()) {
@@ -4951,6 +4955,16 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
}
return false;
}
+ case X86::CCMP64ri32:
+ case X86::CCMP32ri:
+ case X86::CCMP16ri:
+ case X86::CCMP8ri:
+ // The CCMP instruction should not be optimized if the scc/dfv in it is not
+ // same as the one in previous CCMP instruction.
+ if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() ||
+ (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
+ return false;
+ [[fallthrough]];
case X86::CMP64ri32:
case X86::CMP32ri:
case X86::CMP16ri:
diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
new file mode 100644
index 0000000000000..1b5ecdfc40e8f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
@@ -0,0 +1,312 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass peephole-opt | FileCheck %s
+
+---
+name: opt_redundant_flags_adjusted_imm_0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_0
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 == 1
+ CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP can be removed.
+ CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 >=s 2 --> %0 >s 1
+ $bl = SETCCr 13, implicit $eflags
+ ; %0 >=u 2 --> %0 >u 1
+ $bl = SETCCr 3, implicit $eflags
+ ; %0 <s 2 --> %0 <=s 1
+ $bl = SETCCr 12, implicit $eflags
+ ; %0 <u 2 --> %0 <=u 1
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_1
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 42, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 5, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 != 42
+ CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 5, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP can be removed.
+ CCMP64ri32 %0, 41, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 >s 41 --> %0 >=s 42
+ $bl = SETCCr 15, implicit $eflags
+ ; %0 >u 41 --> %0 >=u 42
+ $bl = SETCCr 7, implicit $eflags
+ ; %0 <=s 41 --> %0 <s 42
+ $bl = SETCCr 14, implicit $eflags
+ ; %0 <=u 41 --> %0 <u 42
+ $bl = SETCCr 6, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_0
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 42, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 41, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 <s 1
+ CCMP64ri32 %0, 42, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP64ri32 %0, 41, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 == 41
+ $bl = SETCCr 4, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_1
+ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ %0:gr32 = COPY $esi
+ ; CCMP+SETCC %0 == INT32_MAX
+ CCMP32ri %0, 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 <s INT32_MIN
+ $bl = SETCCr 12, implicit $eflags
+
+ CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 12, implicit $eflags
+
+ CCMP32ri %0, 2147483647, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, -2147483648, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 13, implicit $eflags
+
+ CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 2, implicit $eflags
+
+ CCMP32ri %0, 4294967295, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP32ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 3, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_2
+ ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $cx
+ ; CHECK-NEXT: CCMP16ri [[COPY]], -32768, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], -32768, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP16ri [[COPY]], 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+ %0:gr16 = COPY $cx
+ ; CCMP+SETCC %0 == INT16_MIN
+ CCMP16ri %0, -32768, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 >s INT16_MAX
+ $bl = SETCCr 15, implicit $eflags
+
+ CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 15, implicit $eflags
+
+ CCMP16ri %0, -32768, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 32767, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 14, implicit $eflags
+
+ CCMP16ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 4, implicit $eflags
+
+ CCMP16ri %0, 0, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CCMP should not be removed.
+ CCMP16ri %0, 65535, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 6, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_3
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_3
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 7, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 2, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 == 1
+ CCMP64ri32 %0, 1, 2, 7, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP should not be removed as the scc and dfv is
+ ; different.
+ CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 13, implicit $eflags
+ $bl = SETCCr 3, implicit $eflags
+ $bl = SETCCr 12, implicit $eflags
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_noopt_4
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_4
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 5, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags, implicit $eflags
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 2, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 == 1
+ CCMP64ri32 %0, 1, 5, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP should not be removed as the scc and dfv is
+ ; different.
+ CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+ $bl = SETCCr 13, implicit $eflags
+ $bl = SETCCr 3, implicit $eflags
+ $bl = SETCCr 12, implicit $eflags
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_adjusted_imm_multiple_blocks
+body: |
+ ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 20, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JCC_1 %bb.2, 15, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET 0
+ bb.0:
+ %0:gr32 = COPY $eax
+ CCMP32ri %0, 20, 2, 5, implicit-def $eflags, implicit $eflags
+ JCC_1 %bb.1, 4, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.1:
+ CCMP32ri %0, 21, 2, 5, implicit-def $eflags, implicit $eflags
+ JCC_1 %bb.2, 13, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.2:
+ JMP_1 %bb.3
+
+ bb.3:
+ RET 0
+...
+---
+name: opt_adjusted_imm_multiple_blocks_noopt
+body: |
+ ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks_noopt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 20, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CCMP32ri [[COPY]], 21, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: JCC_1 %bb.2, 13, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: RET 0
+ bb.0:
+ %0:gr32 = COPY $eax
+ CCMP32ri %0, 20, 2, 5, implicit-def $eflags, implicit $eflags
+ JCC_1 %bb.1, 4, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.1:
+ CCMP32ri %0, 21, 2, 5, implicit-def $eflags, implicit $eflags
+ JCC_1 %bb.2, 13, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.2:
+ liveins: $eflags
+ $al = SETCCr 4, implicit $eflags
+
+ bb.3:
+ RET 0
+...
>From 3511b19e20303e860cc8a2cdff7b21330e1c7cb3 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Mon, 10 Mar 2025 09:37:45 +0800
Subject: [PATCH 2/4] Add checks and LIT tests
Check scc and dfv are same if FlagI is CCMP instruction and add more LIT
tests.
---
llvm/lib/Target/X86/X86InstrInfo.cpp | 12 ++-
.../CodeGen/X86/apx/optimize-compare-ccmp.mir | 83 +++++++++++++++++++
2 files changed, 91 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 0b741338934b8..6f24ff9cd32d1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4958,13 +4958,17 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
case X86::CCMP64ri32:
case X86::CCMP32ri:
case X86::CCMP16ri:
- case X86::CCMP8ri:
+ case X86::CCMP8ri: {
// The CCMP instruction should not be optimized if the scc/dfv in it is not
// same as the one in previous CCMP instruction.
- if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() ||
- (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
- return false;
+ unsigned Opcode = FlagI.getOpcode();
+ if (Opcode == X86::CCMP64ri32 || Opcode == X86::CCMP32ri ||
+ Opcode == X86::CCMP16ri || Opcode == X86::CCMP8ri)
+ if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() ||
+ (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
+ return false;
[[fallthrough]];
+ }
case X86::CMP64ri32:
case X86::CMP32ri:
case X86::CMP16ri:
diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
index 1b5ecdfc40e8f..96752ceafe542 100644
--- a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
+++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
@@ -56,6 +56,89 @@ body: |
$bl = SETCCr 6, implicit $eflags
...
---
+name: opt_redundant_flags_adjusted_imm_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_2
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+ %0:gr64 = COPY $rsi
+ %1:gr64 = MOV64ri 1
+ ; CCMP+SETCC %0 == 1
+ ; CCMP64rr will be optimized to CCMP64ri32 in the peephole optimiztion pass
+ CCMP64rr %0, %1, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP can be removed.
+ CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 >=s 2 --> %0 >s 1
+ $bl = SETCCr 13, implicit $eflags
+ ; %0 >=u 2 --> %0 >u 1
+ $bl = SETCCr 3, implicit $eflags
+ ; %0 <s 2 --> %0 <=s 1
+ $bl = SETCCr 12, implicit $eflags
+ ; %0 <u 2 --> %0 <=u 1
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_3
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_3
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CMP64ri32 [[COPY]], 1, implicit-def $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CMP+SETCC %0 == 1
+ CMP64ri32 %0, 1, implicit-def $eflags
+ $cl = SETCCr 4, implicit $eflags
+ ; CCMP+SETCC %0 >= 2; CCMP can be removed.
+ CCMP64ri32 %0, 2, 2, 5, implicit-def $eflags, implicit $eflags
+ ; %0 >=s 2 --> %0 >s 1
+ $bl = SETCCr 13, implicit $eflags
+ ; %0 >=u 2 --> %0 >u 1
+ $bl = SETCCr 3, implicit $eflags
+ ; %0 <s 2 --> %0 <=s 1
+ $bl = SETCCr 12, implicit $eflags
+ ; %0 <u 2 --> %0 <=u 1
+ $bl = SETCCr 2, implicit $eflags
+...
+---
+name: opt_redundant_flags_adjusted_imm_4
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_4
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 == 1
+ CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags
+ ; CMP+SETCC %0 >= 2; CMP can be removed.
+ CMP64ri32 %0, 2, implicit-def $eflags
+ ; %0 >=s 2 --> %0 >s 1
+ $bl = SETCCr 13, implicit $eflags
+ ; %0 >=u 2 --> %0 >u 1
+ $bl = SETCCr 3, implicit $eflags
+ ; %0 <s 2 --> %0 <=s 1
+ $bl = SETCCr 12, implicit $eflags
+ ; %0 <u 2 --> %0 <=u 1
+ $bl = SETCCr 2, implicit $eflags
+...
+---
name: opt_redundant_flags_adjusted_imm_noopt_0
body: |
bb.0:
>From ce9e3abadee22716d7b31dc601963e22b7c9b0ab Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Mon, 10 Mar 2025 23:27:27 +0800
Subject: [PATCH 3/4] Add check to ensure CCMP opcode is same
---
llvm/lib/Target/X86/X86InstrInfo.cpp | 10 ++--
.../CodeGen/X86/apx/optimize-compare-ccmp.mir | 55 ++++++++++---------
2 files changed, 32 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 6f24ff9cd32d1..885ebaa79535f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4961,12 +4961,10 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
case X86::CCMP8ri: {
// The CCMP instruction should not be optimized if the scc/dfv in it is not
// same as the one in previous CCMP instruction.
- unsigned Opcode = FlagI.getOpcode();
- if (Opcode == X86::CCMP64ri32 || Opcode == X86::CCMP32ri ||
- Opcode == X86::CCMP16ri || Opcode == X86::CCMP8ri)
- if (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm() ||
- (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
- return false;
+ if ((FlagI.getOpcode() != OI.getOpcode()) ||
+ (OI.getOperand(2).getImm() != FlagI.getOperand(2).getImm()) ||
+ (OI.getOperand(3).getImm() != FlagI.getOperand(3).getImm()))
+ return false;
[[fallthrough]];
}
case X86::CMP64ri32:
diff --git a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
index 96752ceafe542..ae6741c5ee882 100644
--- a/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
+++ b/llvm/test/CodeGen/X86/apx/optimize-compare-ccmp.mir
@@ -112,33 +112,6 @@ body: |
$bl = SETCCr 2, implicit $eflags
...
---
-name: opt_redundant_flags_adjusted_imm_4
-body: |
- bb.0:
- ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_4
- ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
- ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
- ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
- ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags
- ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
- ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags
- ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags
- %0:gr64 = COPY $rsi
- ; CCMP+SETCC %0 == 1
- CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags
- $cl = SETCCr 4, implicit $eflags
- ; CMP+SETCC %0 >= 2; CMP can be removed.
- CMP64ri32 %0, 2, implicit-def $eflags
- ; %0 >=s 2 --> %0 >s 1
- $bl = SETCCr 13, implicit $eflags
- ; %0 >=u 2 --> %0 >u 1
- $bl = SETCCr 3, implicit $eflags
- ; %0 <s 2 --> %0 <=s 1
- $bl = SETCCr 12, implicit $eflags
- ; %0 <u 2 --> %0 <=u 1
- $bl = SETCCr 2, implicit $eflags
-...
----
name: opt_redundant_flags_adjusted_imm_noopt_0
body: |
bb.0:
@@ -306,6 +279,34 @@ body: |
$bl = SETCCr 2, implicit $eflags
...
---
+name: opt_redundant_flags_adjusted_imm_noopt_5
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_5
+ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
+ ; CHECK-NEXT: CCMP64ri32 [[COPY]], 1, 2, 5, implicit-def $eflags, implicit $eflags
+ ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: CMP64ri32 [[COPY]], 2, implicit-def $eflags
+ ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags
+ ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
+ %0:gr64 = COPY $rsi
+ ; CCMP+SETCC %0 == 1
+ CCMP64ri32 %0, 1, 2, 5, implicit-def $eflags, implicit $eflags
+ $cl = SETCCr 4, implicit $eflags
+ ; CMP+SETCC %0 >= 2; CMP cannot be removed.
+ CMP64ri32 %0, 2, implicit-def $eflags
+ ; %0 >=s 2 --> %0 >s 1
+ $bl = SETCCr 13, implicit $eflags
+ ; %0 >=u 2 --> %0 >u 1
+ $bl = SETCCr 3, implicit $eflags
+ ; %0 <s 2 --> %0 <=s 1
+ $bl = SETCCr 12, implicit $eflags
+ ; %0 <u 2 --> %0 <=u 1
+ $bl = SETCCr 2, implicit $eflags
+...
+---
name: opt_adjusted_imm_multiple_blocks
body: |
; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks
>From df7cb157c6c76df36d47e937c23c9b7b209cf3ba Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Mon, 10 Mar 2025 23:47:24 +0800
Subject: [PATCH 4/4] Add two sub-tests in ccmp LIT test for validate peephole
optimization.
---
llvm/test/CodeGen/X86/apx/ccmp.ll | 70 +++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/llvm/test/CodeGen/X86/apx/ccmp.ll b/llvm/test/CodeGen/X86/apx/ccmp.ll
index 7bd8aeea8863b..e2e4e8df93149 100644
--- a/llvm/test/CodeGen/X86/apx/ccmp.ll
+++ b/llvm/test/CodeGen/X86/apx/ccmp.ll
@@ -1300,5 +1300,75 @@ if.end: ; preds = %entry, %if.then
ret void
}
+define void @ccmp_continous_adjust_imm(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: ccmp_continous_adjust_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl $2, %edi # encoding: [0x83,0xff,0x02]
+; CHECK-NEXT: ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02]
+; CHECK-NEXT: jg .LBB31_1 # encoding: [0x7f,A]
+; CHECK-NEXT: # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: # encoding: [0xeb,A]
+; CHECK-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; CHECK-NEXT: .LBB31_1: # %if.end
+; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp_continous_adjust_imm:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl $2, %edi # encoding: [0x83,0xff,0x02]
+; NDD-NEXT: ccmpll {dfv=} $2, %esi # encoding: [0x62,0xf4,0x04,0x0c,0x83,0xfe,0x02]
+; NDD-NEXT: jg .LBB31_1 # encoding: [0x7f,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB31_1-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: # encoding: [0xeb,A]
+; NDD-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; NDD-NEXT: .LBB31_1: # %if.end
+; NDD-NEXT: retq # encoding: [0xc3]
+entry:
+ %cmp = icmp slt i32 %a, 2
+ %cmp1 = icmp slt i32 %b, 2
+ %or.cond = and i1 %cmp, %cmp1
+ %cmp3 = icmp slt i32 %b, 3
+ %or.cond4 = and i1 %or.cond, %cmp3
+ br i1 %or.cond4, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+define i32 @ccmp_continous_nobranch_adjust_imm(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: ccmp_continous_nobranch_adjust_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl $2, %esi # encoding: [0x83,0xfe,0x02]
+; CHECK-NEXT: ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02]
+; CHECK-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
+; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp_continous_nobranch_adjust_imm:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl $2, %esi # encoding: [0x83,0xfe,0x02]
+; NDD-NEXT: ccmpgl {dfv=} $2, %edi # encoding: [0x62,0xf4,0x04,0x0f,0x83,0xff,0x02]
+; NDD-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
+; NDD-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
+entry:
+ %cmp = icmp sgt i32 %a, 1
+ %cmp1 = icmp slt i32 %b, 2
+ %cmp2 = icmp slt i32 %b, 3
+ %or1 = or i1 %cmp, %cmp1
+ %or2 = or i1 %or1, %cmp2
+ %. = zext i1 %or2 to i32
+ ret i32 %.
+}
+
declare dso_local void @foo(...)
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
More information about the llvm-commits
mailing list