[llvm] [GlobalIsel][AArch64] Replace N bit G_ADD with N/2 bit G_ADD if the lower bits are known to be zeros (PR #101327)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 05:43:44 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: None (KRM7)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/101327.diff
4 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+2)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-1)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+63)
- (added) llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir (+114)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 05d7e882f5135..f4abdc2dc22ea 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -714,6 +714,8 @@ class CombinerHelper {
/// (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Transform (fadd x, fneg(y)) -> (fsub x, y)
/// (fadd fneg(x), y) -> (fsub y, x)
/// (fsub x, fneg(y)) -> (fadd x, y)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 2246e20ecc1dc..b12a36e48f94b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1142,6 +1142,13 @@ def adde_to_addo: GICombineRule<
[{ return Helper.matchAddEToAddO(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+def narrow_add_to_half: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_ADD):$root,
+ [{ return Helper.matchAddWithKnownZeroLowerHalfBits(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
def mulh_to_lshr : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_UMULH):$root,
@@ -1829,7 +1836,8 @@ def known_bits_simplifications : GICombineGroup<[
sext_inreg_to_zext_inreg]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
- narrow_binop_feeding_and]>;
+ narrow_binop_feeding_and,
+ narrow_add_to_half]>;
def phi_combines : GICombineGroup<[extend_through_phis]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d930ab2984629..afa202a0a8e4c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5099,6 +5099,69 @@ bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
+bool CombinerHelper::matchAddWithKnownZeroLowerHalfBits(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD);
+
+ const Register DstReg = MI.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DstReg);
+
+ if (!DstTy.isScalar()) {
+ return false;
+ }
+
+ const std::uint64_t FullSize = DstTy.getSizeInBits();
+ const std::uint64_t HalfSize = (FullSize + 1) / 2;
+
+ MachineFunction &MF = *MI.getMF();
+ const DataLayout &DL = MF.getDataLayout();
+
+ if (DL.isLegalInteger(FullSize) || !DL.isLegalInteger(HalfSize)) {
+ return false;
+ }
+
+ const Register LhsReg = MI.getOperand(1).getReg();
+ const Register RhsReg = MI.getOperand(2).getReg();
+
+ const KnownBits LhsKnownBits = KB->getKnownBits(LhsReg);
+ const KnownBits LhsLoBits = LhsKnownBits.extractBits(HalfSize, 0);
+
+ const KnownBits RhsKnownBits = KB->getKnownBits(RhsReg);
+ const KnownBits RhsLoBits = RhsKnownBits.extractBits(HalfSize, 0);
+
+ const bool LhsHasLoZeros =
+ LhsLoBits.isConstant() && LhsLoBits.getConstant().isZero();
+ const bool RhsHasLoZeros =
+ RhsLoBits.isConstant() && RhsLoBits.getConstant().isZero();
+
+ if (!LhsHasLoZeros && !RhsHasLoZeros) {
+ return false;
+ }
+
+ const auto Flags = MI.getFlags();
+
+ MatchInfo = [=](MachineIRBuilder &MIRBuilder) {
+ const LLT HalfTy = LLT::scalar(HalfSize);
+
+ const auto LhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, LhsReg);
+ const auto RhsSubRegs = MIRBuilder.buildUnmerge(HalfTy, RhsReg);
+
+ const Register ResHiReg = MRI.createGenericVirtualRegister(HalfTy);
+
+ MIRBuilder.buildAdd(ResHiReg, LhsSubRegs.getReg(1), RhsSubRegs.getReg(1),
+ Flags);
+
+ if (LhsHasLoZeros) {
+ MIRBuilder.buildMergeLikeInstr(DstReg, {RhsSubRegs.getReg(0), ResHiReg});
+ } else {
+ assert(RhsHasLoZeros);
+ MIRBuilder.buildMergeLikeInstr(DstReg, {LhsSubRegs.getReg(0), ResHiReg});
+ }
+ };
+
+ return true;
+}
+
bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SUB);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir
new file mode 100644
index 0000000000000..10701da868bb3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/narrow-add.mir
@@ -0,0 +1,114 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple aarch64 -global-isel -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: add_s128_unknown_bits
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: add_s128_unknown_bits
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:_(s128) = COPY $q0
+ ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+ ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ %lhs:_(s128) = COPY $q0
+ %rhs:_(s128) = COPY $q1
+ %res:_(s128) = G_ADD %lhs, %rhs
+ $q0 = COPY %res(s128)
+...
+
+---
+name: add_s64_low32_known_zero_bits
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: add_s64_low32_known_zero_bits
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s64) = COPY $x0
+ ; CHECK-NEXT: %rhs:_(s64) = COPY $x1
+ ; CHECK-NEXT: %mask:_(s64) = G_CONSTANT i64 -4294967296
+ ; CHECK-NEXT: %lhs:_(s64) = G_AND %a, %mask
+ ; CHECK-NEXT: %res:_(s64) = G_ADD %lhs, %rhs
+ ; CHECK-NEXT: $x0 = COPY %res(s64)
+ %a:_(s64) = COPY $x0
+ %rhs:_(s64) = COPY $x1
+ %mask:_(s64) = G_CONSTANT i64 -4294967296
+ %lhs:_(s64) = G_AND %a, %mask
+ %res:_(s64) = G_ADD %lhs, %rhs
+ $x0 = COPY %res(s64)
+...
+
+---
+name: add_s128_low64_known_nonzero_bits
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: add_s128_low64_known_nonzero_bits
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s128) = COPY $q0
+ ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+ ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 18446744073709551615
+ ; CHECK-NEXT: %lhs:_(s128) = G_OR %a, %mask
+ ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, %rhs
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ %a:_(s128) = COPY $q0
+ %rhs:_(s128) = COPY $q1
+ %mask:_(s128) = G_CONSTANT i128 18446744073709551615
+ %lhs:_(s128) = G_OR %a, %mask
+ %res:_(s128) = G_ADD %lhs, %rhs
+ $q0 = COPY %res(s128)
+...
+
+---
+name: add_s128_lhs_low64_known_zero_bits
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: add_s128_lhs_low64_known_zero_bits
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s128) = COPY $q0
+ ; CHECK-NEXT: %rhs:_(s128) = COPY $q1
+ ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+ ; CHECK-NEXT: %lhs:_(s128) = G_AND %a, %mask
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
+ ; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV2]](s64), [[ADD]](s64)
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ %a:_(s128) = COPY $q0
+ %rhs:_(s128) = COPY $q1
+ %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+ %lhs:_(s128) = G_AND %a, %mask
+ %res:_(s128) = G_ADD %lhs, %rhs
+ $q0 = COPY %res(s128)
+...
+
+---
+name: add_s128_rhs_low64_known_zero_bits
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: add_s128_rhs_low64_known_zero_bits
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:_(s128) = COPY $q0
+ ; CHECK-NEXT: %b:_(s128) = COPY $q1
+ ; CHECK-NEXT: %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+ ; CHECK-NEXT: %rhs:_(s128) = G_AND %b, %mask
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %lhs(s128)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %rhs(s128)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[UV1]], [[UV3]]
+ ; CHECK-NEXT: %res:_(s128) = G_MERGE_VALUES [[UV]](s64), [[ADD]](s64)
+ ; CHECK-NEXT: $q0 = COPY %res(s128)
+ %lhs:_(s128) = COPY $q0
+ %b:_(s128) = COPY $q1
+ %mask:_(s128) = G_CONSTANT i128 -18446744073709551616
+ %rhs:_(s128) = G_AND %b, %mask
+ %res:_(s128) = G_ADD %lhs, %rhs
+ $q0 = COPY %res(s128)
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/101327
More information about the llvm-commits
mailing list