[llvm] [AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz (PR #92915)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 21 06:28:51 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (chuongg3)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/92915.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+10-1)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp (+54)
- (modified) llvm/test/CodeGen/AArch64/mulcmle.ll (+16-5)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 10cad6d192440..bee9b07b9d230 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -265,6 +265,14 @@ def or_to_bsp: GICombineRule <
(apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
>;
+// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
+def combine_mul_cmlt : GICombineRule<
+ (defs root:$root, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return matchCombineMulCMLT(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -295,5 +303,6 @@ def AArch64PostLegalizerCombiner
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
- commute_constant_to_rhs]> {
+ commute_constant_to_rhs,
+ combine_mul_cmlt]> {
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index d8ca5494ba50a..82f2904ad8d43 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -381,6 +381,60 @@ void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &SrcReg) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
+ DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
+ DstTy != LLT::fixed_vector(8, 16))
+ return false;
+
+ auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ if (AndMI->getOpcode() != TargetOpcode::G_AND)
+ return false;
+ auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
+ if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
+ return false;
+
+ // Check the constant splat values
+ auto V1 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
+ auto V2 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
+ auto V3 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
+ if (!V1.has_value() || !V2.has_value() || !V3.has_value())
+ return false;
+ unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
+ if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
+ V3 != (HalfSize - 1))
+ return false;
+
+ SrcReg = LShrMI->getOperand(1).getReg();
+
+ return true;
+}
+
+void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, Register &SrcReg) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT HalfTy =
+ DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))
+ .changeElementSize(DstTy.getScalarSizeInBits() / 2);
+
+ Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
+ Register CastReg =
+ B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
+ Register CMLTReg =
+ B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
+ .getReg(0);
+
+ B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
+ MI.eraseFromParent();
+}
+
class AArch64PostLegalizerCombinerImpl : public Combiner {
protected:
// TODO: Make CombinerHelper methods const.
diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll
index 5c216b8550080..32bc5c5e63b3e 100644
--- a/llvm/test/CodeGen/AArch64/mulcmle.ll
+++ b/llvm/test/CodeGen/AArch64/mulcmle.ll
@@ -1,11 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <1 x i64> @v1i64(<1 x i64> %a) {
-; CHECK-LABEL: v1i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v1i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.2s, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v1i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: lsr x8, x8, #31
+; CHECK-GI-NEXT: and x8, x8, #0x100000001
+; CHECK-GI-NEXT: lsl x9, x8, #32
+; CHECK-GI-NEXT: sub x8, x9, x8
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%b = lshr <1 x i64> %a, <i64 31>
%c = and <1 x i64> %b, <i64 4294967297>
%d = mul nuw <1 x i64> %c, <i64 4294967295>
``````````
</details>
https://github.com/llvm/llvm-project/pull/92915
More information about the llvm-commits
mailing list