[llvm] [AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz (PR #92915)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 29 05:48:00 PDT 2024
https://github.com/chuongg3 updated https://github.com/llvm/llvm-project/pull/92915
>From 3d354d0aba75278181d758d0241c5c09294c8dcb Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 12 Apr 2024 20:40:41 +0000
Subject: [PATCH] [AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001),
0xffff) to CMLTz
This patch mirrors the following SelectionDAG patch for GlobalISel:
https://reviews.llvm.org/D130874
---
llvm/lib/Target/AArch64/AArch64Combine.td | 11 +-
.../GISel/AArch64PostLegalizerCombiner.cpp | 55 +++++++++
llvm/test/CodeGen/AArch64/mulcmle.ll | 114 ++++--------------
3 files changed, 90 insertions(+), 90 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1c7f6b870d390..1ce6cdf1c1e1e 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -265,6 +265,14 @@ def or_to_bsp: GICombineRule <
(apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
>;
+// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
+def combine_mul_cmlt : GICombineRule<
+ (defs root:$root, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return matchCombineMulCMLT(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -296,5 +304,6 @@ def AArch64PostLegalizerCombiner
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
commute_constant_to_rhs,
- push_freeze_to_prevent_poison_from_propagating]> {
+ push_freeze_to_prevent_poison_from_propagating,
+ combine_mul_cmlt]> {
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index d8ca5494ba50a..7f3e0e01ccd25 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -381,6 +381,61 @@ void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
+bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &SrcReg) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
+ DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
+ DstTy != LLT::fixed_vector(8, 16))
+ return false;
+
+ auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ if (AndMI->getOpcode() != TargetOpcode::G_AND)
+ return false;
+ auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
+ if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
+ return false;
+
+ // Check the constant splat values
+ auto V1 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
+ auto V2 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
+ auto V3 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
+ if (!V1.has_value() || !V2.has_value() || !V3.has_value())
+ return false;
+ unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
+ if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
+ V3 != (HalfSize - 1))
+ return false;
+
+ SrcReg = LShrMI->getOperand(1).getReg();
+
+ return true;
+}
+
+void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, Register &SrcReg) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT HalfTy =
+ DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))
+ .changeElementSize(DstTy.getScalarSizeInBits() / 2);
+
+ Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
+ Register CastReg =
+ B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
+ Register CMLTReg =
+ B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
+ .getReg(0);
+
+ B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
+ MI.eraseFromParent();
+}
+
class AArch64PostLegalizerCombinerImpl : public Combiner {
protected:
// TODO: Make CombinerHelper methods const.
diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll
index b22c75259adf2..32bc5c5e63b3e 100644
--- a/llvm/test/CodeGen/AArch64/mulcmle.ll
+++ b/llvm/test/CodeGen/AArch64/mulcmle.ll
@@ -24,26 +24,10 @@ define <1 x i64> @v1i64(<1 x i64> %a) {
}
define <2 x i64> @v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: v2i64:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.4s, v0.4s, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v2i64:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.4s, #1
-; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #31
-; CHECK-GI-NEXT: movi v2.2d, #0x000000ffffffff
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: fmov x11, d2
-; CHECK-GI-NEXT: mov x9, v2.d[1]
-; CHECK-GI-NEXT: fmov x10, d0
-; CHECK-GI-NEXT: mov x8, v0.d[1]
-; CHECK-GI-NEXT: mul x10, x10, x11
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: fmov d0, x10
-; CHECK-GI-NEXT: mov v0.d[1], x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
+; CHECK-NEXT: ret
%b = lshr <2 x i64> %a, <i64 31, i64 31>
%c = and <2 x i64> %b, <i64 4294967297, i64 4294967297>
%d = mul nuw <2 x i64> %c, <i64 4294967295, i64 4294967295>
@@ -51,19 +35,10 @@ define <2 x i64> @v2i64(<2 x i64> %a) {
}
define <2 x i32> @v2i32(<2 x i32> %a) {
-; CHECK-SD-LABEL: v2i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v2i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.4h, #1
-; CHECK-GI-NEXT: ushr v0.2s, v0.2s, #15
-; CHECK-GI-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mul v0.2s, v0.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
+; CHECK-NEXT: ret
%b = lshr <2 x i32> %a, <i32 15, i32 15>
%c = and <2 x i32> %b, <i32 65537, i32 65537>
%d = mul nuw <2 x i32> %c, <i32 65535, i32 65535>
@@ -71,19 +46,10 @@ define <2 x i32> @v2i32(<2 x i32> %a) {
}
define <4 x i32> @v4i32(<4 x i32> %a) {
-; CHECK-SD-LABEL: v4i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v4i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.8h, #1
-; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #15
-; CHECK-GI-NEXT: movi v2.2d, #0x00ffff0000ffff
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: mul v0.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-NEXT: ret
%b = lshr <4 x i32> %a, <i32 15, i32 15, i32 15, i32 15>
%c = and <4 x i32> %b, <i32 65537, i32 65537, i32 65537, i32 65537>
%d = mul nuw <4 x i32> %c, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -91,23 +57,11 @@ define <4 x i32> @v4i32(<4 x i32> %a) {
}
define <8 x i32> @v8i32(<8 x i32> %a) {
-; CHECK-SD-LABEL: v8i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0
-; CHECK-SD-NEXT: cmlt v1.8h, v1.8h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.8h, #1
-; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #15
-; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #15
-; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: mul v0.4s, v0.4s, v3.4s
-; CHECK-GI-NEXT: mul v1.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-NEXT: cmlt v1.8h, v1.8h, #0
+; CHECK-NEXT: ret
%b = lshr <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
%c = and <8 x i32> %b, <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
%d = mul nuw <8 x i32> %c, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -115,19 +69,10 @@ define <8 x i32> @v8i32(<8 x i32> %a) {
}
define <4 x i16> @v4i16(<4 x i16> %a) {
-; CHECK-SD-LABEL: v4i16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v4i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.8b, #1
-; CHECK-GI-NEXT: ushr v0.4h, v0.4h, #7
-; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mul v0.4h, v0.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
+; CHECK-NEXT: ret
%b = lshr <4 x i16> %a, <i16 7, i16 7, i16 7, i16 7>
%c = and <4 x i16> %b, <i16 257, i16 257, i16 257, i16 257>
%d = mul nuw <4 x i16> %c, <i16 255, i16 255, i16 255, i16 255>
@@ -135,19 +80,10 @@ define <4 x i16> @v4i16(<4 x i16> %a) {
}
define <8 x i16> @v8i16(<8 x i16> %a) {
-; CHECK-SD-LABEL: v8i16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.16b, #1
-; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #7
-; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: mul v0.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
+; CHECK-NEXT: ret
%b = lshr <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%c = and <8 x i16> %b, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
%d = mul nuw <8 x i16> %c, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
More information about the llvm-commits
mailing list