[llvm] [GlobalIsel][AArch64] Replace N bit G_ADD with N/2 bit G_ADD if the lower bits are known to be zeros (PR #101327)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 1 08:14:19 PDT 2024
https://github.com/KRM7 updated https://github.com/llvm/llvm-project/pull/101327
>From ba6a80d23ee3b134addeadaf1124c763307539ce Mon Sep 17 00:00:00 2001
From: Krisztian Rugasi <Krisztian.Rugasi at hightec-rt.com>
Date: Tue, 2 Jul 2024 14:52:52 +0200
Subject: [PATCH] [GlobalIsel][AArch64] Replace N bit G_ADD with N/2 bit G_ADD
if the lower bits are known to be zeros
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 +
.../include/llvm/Target/GlobalISel/Combine.td | 10 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 56 +++++++++
.../CodeGen/AArch64/GlobalISel/narrow-add.mir | 114 ++++++++++++++++++
4 files changed, 182 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 05d7e882f5135..f1266ad063c0d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -714,6 +714,9 @@ class CombinerHelper {
/// (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
/// Transform (fadd x, fneg(y)) -> (fsub x, y)
/// (fadd fneg(x), y) -> (fsub y, x)
/// (fsub x, fneg(y)) -> (fadd x, y)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 2246e20ecc1dc..b12a36e48f94b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1142,6 +1142,13 @@ def adde_to_addo: GICombineRule<
[{ return Helper.matchAddEToAddO(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+def narrow_add_to_half: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_ADD):$root,
+ [{ return Helper.matchAddWithKnownZeroLowerHalfBits(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
def mulh_to_lshr : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_UMULH):$root,
@@ -1829,7 +1836,8 @@ def known_bits_simplifications : GICombineGroup<[
sext_inreg_to_zext_inreg]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
- narrow_binop_feeding_and]>;
+ narrow_binop_feeding_and,
+ narrow_add_to_half]>;
def phi_combines : GICombineGroup<[extend_through_phis]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d930ab2984629..c14f77b035d32 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5099,6 +5099,62 @@ bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
+bool CombinerHelper::matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GAdd *Add = cast<GAdd>(&MI);
+
+ const Register DstReg = Add->getReg(0);
+ const LLT FullTy = MRI.getType(DstReg);
+
+ if (!FullTy.isScalar())
+ return false;
+
+ const uint64_t FullSize = FullTy.getSizeInBits();
+ const uint64_t HalfSize = (FullSize + 1) / 2;
+ const LLT HalfTy = LLT::scalar(HalfSize);
+
+ if (isLegal({TargetOpcode::G_ADD, {FullTy}}) ||
+ !isLegal({TargetOpcode::G_ADD, {HalfTy}}) ||
+ !isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_UNMERGE_VALUES, {HalfTy, FullTy}}) ||
+ !isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_MERGE_VALUES, {FullTy, HalfTy}}))
+ return false;
+
+ const Register LhsReg = Add->getLHSReg();
+ const Register RhsReg = Add->getRHSReg();
+
+ const KnownBits RhsKnownBits = KB->getKnownBits(RhsReg);
+ const KnownBits RhsLoBits = RhsKnownBits.extractBits(HalfSize, 0);
+ const bool RhsHasLoZeros =
+ RhsLoBits.isConstant() && RhsLoBits.getConstant().isZero();
+
+ if (!RhsHasLoZeros) {
+ const KnownBits LhsKnownBits = KB->getKnownBits(LhsReg);
+ const KnownBits LhsLoBits = LhsKnownBits.extractBits(HalfSize, 0);
+ if (!LhsLoBits.isConstant() || !LhsLoBits.getConstant().isZero())
+ return false;
+ }
+
+ const auto Flags = MI.getFlags();
+
+ MatchInfo = [=](MachineIRBuilder &MIRBuilder) {
+ const auto LhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, LhsReg);
+ const auto RhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, RhsReg);
+
+ const auto ResHiReg = MIRBuilder.buildAdd(HalfTy, LhsSubRegs.getReg(1),
+ RhsSubRegs.getReg(1), Flags);
+
+ if (RhsHasLoZeros) {
+ MIRBuilder.buildMergeLikeInstr(DstReg, {LhsSubRegs.getReg(0), ResHiReg});
+ } else {
+ MIRBuilder.buildMergeLikeInstr(DstReg, {RhsSubRegs.getReg(0), ResHiReg});
+ }
+ };
+
+ return true;
+}
+
bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SUB);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir
new file mode 100644
index 0000000000000..4ae86d2a09e33
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir
@@ -0,0 +1,114 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: add_s128_unknown_bits
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: add_s128_unknown_bits
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:_(s128) = COPY $q0
+ ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+ ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ %lhs:_(s128) = COPY $q0
+ %rhs:_(s128) = COPY $q1
+ %res:_(s128) = G_ADD %lhs, %rhs
+ $q0 = COPY %res(s128)
+...
+
+---
+name: add_s64_low32_known_zero_bits
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: add_s64_low32_known_zero_bits
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s64) = COPY $x0
+ ; CHECK-NEXT: %rhs:_(s64) = COPY $x1
+ ; CHECK-NEXT: %mask:_(s64) = G_CONSTANT i64 -4294967296
+ ; CHECK-NEXT: %lhs:_(s64) = G_AND %a, %mask
+ ; CHECK-NEXT: %res:_(s64) = G_ADD %lhs, %rhs
+ ; CHECK-NEXT: $x0 = COPY %res(s64)
+ %a:_(s64) = COPY $x0
+ %rhs:_(s64) = COPY $x1
+ %mask:_(s64) = G_CONSTANT i64 -4294967296
+ %lhs:_(s64) = G_AND %a, %mask
+ %res:_(s64) = G_ADD %lhs, %rhs
+ $x0 = COPY %res(s64)
+...
+
+---
+name: add_s128_low64_known_nonzero_bits
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: add_s128_low64_known_nonzero_bits
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s128) = COPY $q0
+ ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+ ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 18446744073709551615
+ ; CHECK-NEXT: %lhs:_(s128) = G_OR %a, %mask
+ ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ %a:_(s128) = COPY $q0
+ %rhs:_(s128) = COPY $q1
+ %mask:_(s128) = G_CONSTANT i128 18446744073709551615
+ %lhs:_(s128) = G_OR %a, %mask
+ %res:_(s128) = G_ADD %lhs, %rhs
+ $q0 = COPY %res(s128)
+...
+
+---
+name: add_s128_lhs_low64_known_zero_bits
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: add_s128_lhs_low64_known_zero_bits
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s128) = COPY $q0
+ ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+ ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+ ; CHECK-NEXT: %lhs:_(s128) = G_AND %a, %mask
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
+ ; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV2]](s64), [[ADD]](s64)
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ %a:_(s128) = COPY $q0
+ %rhs:_(s128) = COPY $q1
+ %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+ %lhs:_(s128) = G_AND %a, %mask
+ %res:_(s128) = G_ADD %lhs, %rhs
+ $q0 = COPY %res(s128)
+...
+
+---
+name: add_s128_rhs_low64_known_zero_bits
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: add_s128_rhs_low64_known_zero_bits
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:_(s128) = COPY $q0
+ ; CHECK-NEXT: %b:_(s128) = COPY $q1
+ ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+ ; CHECK-NEXT: %rhs:_(s128) = G_AND %b, %mask
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
+ ; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV]](s64), [[ADD]](s64)
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ %lhs:_(s128) = COPY $q0
+ %b:_(s128) = COPY $q1
+ %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+ %rhs:_(s128) = G_AND %b, %mask
+ %res:_(s128) = G_ADD %lhs, %rhs
+ $q0 = COPY %res(s128)
+...
More information about the llvm-commits
mailing list