[llvm] [GlobalIsel][AArch64] Replace N bit G_ADD with N/2 bit G_ADD if the lower bits are known to be zeros (PR #101327)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 1 08:14:19 PDT 2024


https://github.com/KRM7 updated https://github.com/llvm/llvm-project/pull/101327

>From ba6a80d23ee3b134addeadaf1124c763307539ce Mon Sep 17 00:00:00 2001
From: Krisztian Rugasi <Krisztian.Rugasi at hightec-rt.com>
Date: Tue, 2 Jul 2024 14:52:52 +0200
Subject: [PATCH] [GlobalIsel][AArch64] Replace N bit G_ADD with N/2 bit G_ADD
 if the lower bits are known to be zeros

---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |   3 +
 .../include/llvm/Target/GlobalISel/Combine.td |  10 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  56 +++++++++
 .../CodeGen/AArch64/GlobalISel/narrow-add.mir | 114 ++++++++++++++++++
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 05d7e882f5135..f1266ad063c0d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -714,6 +714,9 @@ class CombinerHelper {
   /// (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
   bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  bool matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI,
+                                          BuildFnTy &MatchInfo);
+
   /// Transform (fadd x, fneg(y)) -> (fsub x, y)
   ///           (fadd fneg(x), y) -> (fsub y, x)
   ///           (fsub x, fneg(y)) -> (fadd x, y)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 2246e20ecc1dc..b12a36e48f94b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1142,6 +1142,13 @@ def adde_to_addo: GICombineRule<
          [{ return Helper.matchAddEToAddO(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
 
+def narrow_add_to_half: GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_ADD):$root,
+    [{ return Helper.matchAddWithKnownZeroLowerHalfBits(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
 def mulh_to_lshr : GICombineRule<
   (defs root:$root),
   (match (wip_match_opcode G_UMULH):$root,
@@ -1829,7 +1836,8 @@ def known_bits_simplifications : GICombineGroup<[
   sext_inreg_to_zext_inreg]>;
 
 def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
-                                               narrow_binop_feeding_and]>;
+                                               narrow_binop_feeding_and,
+                                               narrow_add_to_half]>;
 
 def phi_combines : GICombineGroup<[extend_through_phis]>;
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d930ab2984629..c14f77b035d32 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5099,6 +5099,62 @@ bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
   return true;
 }
 
+bool CombinerHelper::matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI,
+                                                        BuildFnTy &MatchInfo) {
+  GAdd *Add = cast<GAdd>(&MI);
+
+  const Register DstReg = Add->getReg(0);
+  const LLT FullTy = MRI.getType(DstReg);
+
+  if (!FullTy.isScalar())
+    return false;
+
+  const uint64_t FullSize = FullTy.getSizeInBits();
+  const uint64_t HalfSize = (FullSize + 1) / 2;
+  const LLT HalfTy = LLT::scalar(HalfSize);
+
+  if (isLegal({TargetOpcode::G_ADD, {FullTy}}) ||
+      !isLegal({TargetOpcode::G_ADD, {HalfTy}}) ||
+      !isLegalOrBeforeLegalizer(
+          {TargetOpcode::G_UNMERGE_VALUES, {HalfTy, FullTy}}) ||
+      !isLegalOrBeforeLegalizer(
+          {TargetOpcode::G_MERGE_VALUES, {FullTy, HalfTy}}))
+    return false;
+
+  const Register LhsReg = Add->getLHSReg();
+  const Register RhsReg = Add->getRHSReg();
+
+  const KnownBits RhsKnownBits = KB->getKnownBits(RhsReg);
+  const KnownBits RhsLoBits = RhsKnownBits.extractBits(HalfSize, 0);
+  const bool RhsHasLoZeros =
+      RhsLoBits.isConstant() && RhsLoBits.getConstant().isZero();
+
+  if (!RhsHasLoZeros) {
+    const KnownBits LhsKnownBits = KB->getKnownBits(LhsReg);
+    const KnownBits LhsLoBits = LhsKnownBits.extractBits(HalfSize, 0);
+    if (!LhsLoBits.isConstant() || !LhsLoBits.getConstant().isZero())
+      return false;
+  }
+
+  const auto Flags = MI.getFlags();
+
+  MatchInfo = [=](MachineIRBuilder &MIRBuilder) {
+    const auto LhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, LhsReg);
+    const auto RhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, RhsReg);
+
+    const auto ResHiReg = MIRBuilder.buildAdd(HalfTy, LhsSubRegs.getReg(1),
+                                              RhsSubRegs.getReg(1), Flags);
+
+    if (RhsHasLoZeros) {
+      MIRBuilder.buildMergeLikeInstr(DstReg, {LhsSubRegs.getReg(0), ResHiReg});
+    } else {
+      MIRBuilder.buildMergeLikeInstr(DstReg, {RhsSubRegs.getReg(0), ResHiReg});
+    }
+  };
+
+  return true;
+}
+
 bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
                                         BuildFnTy &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_SUB);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir
new file mode 100644
index 0000000000000..4ae86d2a09e33
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir
@@ -0,0 +1,114 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            add_s128_unknown_bits
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: add_s128_unknown_bits
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %lhs:_(s128) = COPY $q0
+    ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+    ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
+    ; CHECK-NEXT: $q0 = COPY %res(s128)
+    %lhs:_(s128) = COPY $q0
+    %rhs:_(s128) = COPY $q1
+    %res:_(s128) = G_ADD %lhs, %rhs
+    $q0 = COPY %res(s128)
+...
+
+---
+name:            add_s64_low32_known_zero_bits
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: add_s64_low32_known_zero_bits
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(s64) = COPY $x0
+    ; CHECK-NEXT: %rhs:_(s64) = COPY $x1
+    ; CHECK-NEXT: %mask:_(s64) = G_CONSTANT i64 -4294967296
+    ; CHECK-NEXT: %lhs:_(s64) = G_AND %a, %mask
+    ; CHECK-NEXT: %res:_(s64) = G_ADD %lhs, %rhs
+    ; CHECK-NEXT: $x0 = COPY %res(s64)
+    %a:_(s64) = COPY $x0
+    %rhs:_(s64) = COPY $x1
+    %mask:_(s64) = G_CONSTANT i64 -4294967296
+    %lhs:_(s64) = G_AND %a, %mask
+    %res:_(s64) = G_ADD %lhs, %rhs
+    $x0 = COPY %res(s64)
+...
+
+---
+name:            add_s128_low64_known_nonzero_bits
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: add_s128_low64_known_nonzero_bits
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(s128) = COPY $q0
+    ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+    ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 18446744073709551615
+    ; CHECK-NEXT: %lhs:_(s128) = G_OR %a, %mask
+    ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
+    ; CHECK-NEXT: $q0 = COPY %res(s128)
+    %a:_(s128) = COPY $q0
+    %rhs:_(s128) = COPY $q1
+    %mask:_(s128) = G_CONSTANT i128 18446744073709551615
+    %lhs:_(s128) = G_OR %a, %mask
+    %res:_(s128) = G_ADD %lhs, %rhs
+    $q0 = COPY %res(s128)
+...
+
+---
+name:            add_s128_lhs_low64_known_zero_bits
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: add_s128_lhs_low64_known_zero_bits
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(s128) = COPY $q0
+    ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+    ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+    ; CHECK-NEXT: %lhs:_(s128) = G_AND %a, %mask
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
+    ; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV2]](s64), [[ADD]](s64)
+    ; CHECK-NEXT: $q0 = COPY %res(s128)
+    %a:_(s128) = COPY $q0
+    %rhs:_(s128) = COPY $q1
+    %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+    %lhs:_(s128) = G_AND %a, %mask
+    %res:_(s128) = G_ADD %lhs, %rhs
+    $q0 = COPY %res(s128)
+...
+
+---
+name:            add_s128_rhs_low64_known_zero_bits
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: add_s128_rhs_low64_known_zero_bits
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %lhs:_(s128) = COPY $q0
+    ; CHECK-NEXT: %b:_(s128) = COPY $q1
+    ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+    ; CHECK-NEXT: %rhs:_(s128) = G_AND %b, %mask
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
+    ; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV]](s64), [[ADD]](s64)
+    ; CHECK-NEXT: $q0 = COPY %res(s128)
+    %lhs:_(s128) = COPY $q0
+    %b:_(s128) = COPY $q1
+    %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+    %rhs:_(s128) = G_AND %b, %mask
+    %res:_(s128) = G_ADD %lhs, %rhs
+    $q0 = COPY %res(s128)
+...



More information about the llvm-commits mailing list