[llvm] [GlobalISel] Move G_FREEZE above G_ICMP to enable further possible co… (PR #83448)
Dávid Ferenc Szabó via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 09:11:01 PST 2024
https://github.com/dfszabo created https://github.com/llvm/llvm-project/pull/83448
…mbines
The fix https://github.com/llvm/llvm-project/pull/82733 introduced G_FREEZE in between of resulting instructions of some combines. This cause some inefficient code generation and loss of combine opportunities, since most of the combines does not handle G_FREEZE. A solution could be to move the G_FREEZE upper in the BB if possible, but also doing it cautiously to not introduce more G_FREEZE.
This change only add pattern for G_ICMP for now, but for most of the instruction this can be applied as well.
>From 5fb917ad11a7d19aac01e2a9a8bb4e86ad4a27c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Ferenc=20Szab=C3=B3?=
<szabodavidferenc at gmail.com>
Date: Thu, 29 Feb 2024 17:23:31 +0100
Subject: [PATCH] [GlobalISel] Move G_FREEZE above G_ICMP to enable further
possible combines
The fix https://github.com/llvm/llvm-project/pull/82733 introduced G_FREEZE in between of resulting instructions of some combines. This cause some inefficient code generation and loss of combine opportunities, since most of the combines does not handle G_FREEZE. A solution could be to move the G_FREEZE upper in the BB if possible, but also doing it cautiously to not introduce more G_FREEZE.
This change only add pattern for G_ICMP for now, but for most of the instruction this can be applied as well.
---
.../include/llvm/Target/GlobalISel/Combine.td | 14 ++++-
.../GlobalISel/combine-freeze-icmp.mir | 59 ++++++++++++++++++
llvm/test/CodeGen/AArch64/cmp-chains.ll | 60 +++++--------------
3 files changed, 88 insertions(+), 45 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze-icmp.mir
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 18db7a819540af..b72fb37db4306d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -952,6 +952,17 @@ def redundant_binop_in_equality : GICombineRule<
[{ return Helper.matchRedundantBinOpInEquality(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+// Move the freeze upper if only one freeze is needed for the ICMP, this way
+// allowing other combines to match
+def canonicalize_icmp_freeze_with_const: GICombineRule<
+ (defs root:$root),
+ (match (G_ICMP $dst, $p, $src1, $src2),
+ (G_FREEZE $root, $dst),
+ [{ return !!isConstantOrConstantSplatVector(*MRI.getVRegDef(${src2}.getReg()), MRI); }]),
+ (apply (G_FREEZE $new_src1, $src1),
+ (G_ICMP $root, $p, $new_src1, $src2))
+>;
+
// Transform: (X == 0 & Y == 0) -> (X | Y) == 0
def double_icmp_zero_and_combine: GICombineRule<
(defs root:$root),
@@ -1374,7 +1385,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector, double_icmp_zero_and_or_combine]>;
+ combine_concat_vector, double_icmp_zero_and_or_combine,
+ canonicalize_icmp_freeze_with_const]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze-icmp.mir
new file mode 100644
index 00000000000000..1141059ad32766
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze-icmp.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+# REQUIRES: asserts
+
+
+---
+name: valid_freeze_icmp
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: valid_freeze_icmp
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %x:_(s32) = COPY $w0
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %x
+ ; CHECK-NEXT: %freeze:_(s1) = G_ICMP intpred(eq), [[FREEZE]](s32), %zero
+ ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %freeze(s1)
+ ; CHECK-NEXT: $w0 = COPY %zext(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %x:_(s32), %zero:_
+ %freeze:_(s1) = G_FREEZE %cmp
+ %zext:_(s32) = G_ZEXT %freeze:_(s1)
+ $w0 = COPY %zext
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: invalid_freeze_icmp_with_no_const_src
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: invalid_freeze_icmp_with_no_const_src
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %x:_(s32) = COPY $w0
+ ; CHECK-NEXT: %y:_(s32) = COPY $w1
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %y
+ ; CHECK-NEXT: %freeze:_(s1) = G_FREEZE %cmp
+ ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %freeze(s1)
+ ; CHECK-NEXT: $w0 = COPY %zext(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %y:_(s32) = COPY $w1
+ %cmp:_(s1) = G_ICMP intpred(eq), %x:_(s32), %y:_
+ %freeze:_(s1) = G_FREEZE %cmp
+ %zext:_(s32) = G_ZEXT %freeze:_(s1)
+ $w0 = COPY %zext
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 1d9f39e5185939..4eaa25608d16ff 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -109,8 +109,7 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
; GISEL-NEXT: cset w8, lo
; GISEL-NEXT: cmp w2, w3
; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
+; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%5 = icmp ult i32 %0, %1
%6 = icmp ne i32 %2, %3
@@ -138,8 +137,7 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
; GISEL-NEXT: cmp w4, w5
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
+; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%7 = icmp ult i32 %0, %1
%8 = icmp ugt i32 %2, %3
@@ -173,8 +171,7 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: cset w11, eq
; GISEL-NEXT: orr w9, w10, w11
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
+; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%9 = icmp ult i32 %0, %1
%10 = icmp ugt i32 %2, %3
@@ -189,22 +186,12 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
; (x0 != 0) || (x1 != 0)
define i32 @true_or2(i32 %0, i32 %1) {
-; SDISEL-LABEL: true_or2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w0, w1
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: true_or2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w1, #0
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-LABEL: true_or2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
%3 = icmp ne i32 %0, 0
%4 = icmp ne i32 %1, 0
%5 = select i1 %3, i1 true, i1 %4
@@ -214,26 +201,13 @@ define i32 @true_or2(i32 %0, i32 %1) {
; (x0 != 0) || (x1 != 0) || (x2 != 0)
define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
-; SDISEL-LABEL: true_or3:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w0, w1
-; SDISEL-NEXT: orr w8, w8, w2
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: true_or3:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w1, #0
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: cmp w2, #0
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-LABEL: true_or3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: orr w8, w8, w2
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
%4 = icmp ne i32 %0, 0
%5 = icmp ne i32 %1, 0
%6 = select i1 %4, i1 true, i1 %5
@@ -242,5 +216,3 @@ define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
%9 = zext i1 %8 to i32
ret i32 %9
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
More information about the llvm-commits
mailing list