[llvm] [GlobalISel] Move G_FREEZE above G_ICMP to enable further possible co… (PR #83448)

Thu Feb 29 09:11:01 PST 2024

https://github.com/dfszabo created https://github.com/llvm/llvm-project/pull/83448

…mbines

The fix https://github.com/llvm/llvm-project/pull/82733 introduced G_FREEZE in between of resulting instructions of some combines. This cause some inefficient code generation and loss of combine opportunities, since most of the combines does not handle G_FREEZE. A solution could be to move the G_FREEZE upper in the BB if possible, but also doing it cautiously to not introduce more G_FREEZE.

This change only add pattern for G_ICMP for now, but for most of the instruction this can be applied as well.

>From 5fb917ad11a7d19aac01e2a9a8bb4e86ad4a27c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Ferenc=20Szab=C3=B3?=
 <szabodavidferenc at gmail.com>
Date: Thu, 29 Feb 2024 17:23:31 +0100
Subject: [PATCH] [GlobalISel] Move G_FREEZE above G_ICMP to enable further
 possible combines

The fix https://github.com/llvm/llvm-project/pull/82733 introduced G_FREEZE in between of resulting instructions of some combines. This cause some inefficient code generation and loss of combine opportunities, since most of the combines does not handle G_FREEZE. A solution could be to move the G_FREEZE upper in the BB if possible, but also doing it cautiously to not introduce more G_FREEZE.

This change only add pattern for G_ICMP for now, but for most of the instruction this can be applied as well.
---
 .../include/llvm/Target/GlobalISel/Combine.td | 14 ++++-
 .../GlobalISel/combine-freeze-icmp.mir        | 59 ++++++++++++++++++
 llvm/test/CodeGen/AArch64/cmp-chains.ll       | 60 +++++--------------
 3 files changed, 88 insertions(+), 45 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze-icmp.mir

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 18db7a819540af..b72fb37db4306d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -952,6 +952,17 @@ def redundant_binop_in_equality : GICombineRule<
          [{ return Helper.matchRedundantBinOpInEquality(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+// Move the freeze upper if only one freeze is needed for the ICMP, this way
+// allowing other combines to match
+def canonicalize_icmp_freeze_with_const: GICombineRule<
+  (defs root:$root),
+  (match (G_ICMP $dst, $p, $src1, $src2),
+         (G_FREEZE $root, $dst),
+         [{ return !!isConstantOrConstantSplatVector(*MRI.getVRegDef(${src2}.getReg()), MRI); }]),
+  (apply (G_FREEZE $new_src1, $src1),
+         (G_ICMP $root, $p, $new_src1, $src2))
+>;
+
 // Transform: (X == 0 & Y == 0) -> (X | Y) == 0
 def double_icmp_zero_and_combine: GICombineRule<
   (defs root:$root),
@@ -1374,7 +1385,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, 
-    combine_concat_vector, double_icmp_zero_and_or_combine]>;
+    combine_concat_vector, double_icmp_zero_and_or_combine,
+    canonicalize_icmp_freeze_with_const]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze-icmp.mir
new file mode 100644
index 00000000000000..1141059ad32766
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze-icmp.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+# REQUIRES: asserts
+
+
+---
+name:            valid_freeze_icmp
+tracksRegLiveness: true
+legalized: true
+body:             |
+  bb.0:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: valid_freeze_icmp
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %x:_(s32) = COPY $w0
+    ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %x
+    ; CHECK-NEXT: %freeze:_(s1) = G_ICMP intpred(eq), [[FREEZE]](s32), %zero
+    ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %freeze(s1)
+    ; CHECK-NEXT: $w0 = COPY %zext(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %zero:_(s32) = G_CONSTANT i32 0
+    %cmp:_(s1) = G_ICMP intpred(eq), %x:_(s32), %zero:_
+    %freeze:_(s1) = G_FREEZE %cmp
+    %zext:_(s32) = G_ZEXT %freeze:_(s1)
+    $w0 = COPY %zext
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            invalid_freeze_icmp_with_no_const_src
+tracksRegLiveness: true
+legalized: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+
+    ; CHECK-LABEL: name: invalid_freeze_icmp_with_no_const_src
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %x:_(s32) = COPY $w0
+    ; CHECK-NEXT: %y:_(s32) = COPY $w1
+    ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %x(s32), %y
+    ; CHECK-NEXT: %freeze:_(s1) = G_FREEZE %cmp
+    ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %freeze(s1)
+    ; CHECK-NEXT: $w0 = COPY %zext(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %cmp:_(s1) = G_ICMP intpred(eq), %x:_(s32), %y:_
+    %freeze:_(s1) = G_FREEZE %cmp
+    %zext:_(s32) = G_ZEXT %freeze:_(s1)
+    $w0 = COPY %zext
+    RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 1d9f39e5185939..4eaa25608d16ff 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -109,8 +109,7 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
 ; GISEL-NEXT:    cset w8, lo
 ; GISEL-NEXT:    cmp w2, w3
 ; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %5 = icmp ult i32 %0, %1
   %6 = icmp ne i32 %2, %3
@@ -138,8 +137,7 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
 ; GISEL-NEXT:    cmp w4, w5
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %7 = icmp ult i32 %0, %1
   %8 = icmp ugt i32 %2, %3
@@ -173,8 +171,7 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    cset w11, eq
 ; GISEL-NEXT:    orr w9, w10, w11
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %9 = icmp ult i32 %0, %1
   %10 = icmp ugt i32 %2, %3
@@ -189,22 +186,12 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
 
 ; (x0 != 0) || (x1 != 0)
 define i32 @true_or2(i32 %0, i32 %1) {
-; SDISEL-LABEL: true_or2:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    orr w8, w0, w1
-; SDISEL-NEXT:    cmp w8, #0
-; SDISEL-NEXT:    cset w0, ne
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: true_or2:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, ne
-; GISEL-NEXT:    cmp w1, #0
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
-; GISEL-NEXT:    ret
+; CHECK-LABEL: true_or2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %3 = icmp ne i32 %0, 0
   %4 = icmp ne i32 %1, 0
   %5 = select i1 %3, i1 true, i1 %4
@@ -214,26 +201,13 @@ define i32 @true_or2(i32 %0, i32 %1) {
 
 ; (x0 != 0) || (x1 != 0) || (x2 != 0)
 define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
-; SDISEL-LABEL: true_or3:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    orr w8, w0, w1
-; SDISEL-NEXT:    orr w8, w8, w2
-; SDISEL-NEXT:    cmp w8, #0
-; SDISEL-NEXT:    cset w0, ne
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: true_or3:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, ne
-; GISEL-NEXT:    cmp w1, #0
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    cmp w2, #0
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
-; GISEL-NEXT:    ret
+; CHECK-LABEL: true_or3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    orr w8, w8, w2
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %4 = icmp ne i32 %0, 0
   %5 = icmp ne i32 %1, 0
   %6 = select i1 %4, i1 true, i1 %5
@@ -242,5 +216,3 @@ define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
   %9 = zext i1 %8 to i32
   ret i32 %9
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}