[llvm] [AArch64] MachineCombiner msub matching (PR #84267)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 8 02:15:56 PST 2024
https://github.com/vfdff updated https://github.com/llvm/llvm-project/pull/84267
>From 3a62edcf527bfc927f148c99b9d37d3d199db611 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Wed, 6 Mar 2024 14:09:18 -0500
Subject: [PATCH 1/2] [AArch64] MachineCombiner msub matching
Pattern should be sorted in priority order since the pattern evalutor
stops checking as soon as it finds a faster sequence.
so for a * b - c * d, we prefer to match the 2nd operands of sub,
which can be use msub to fold them.
Refer to https://www.slideshare.net/chimerawang/instruction-combine-in-llvm
Fix https://github.com/llvm/llvm-project/issues/84152
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +-
llvm/test/CodeGen/AArch64/scalar-mla-mls.ll | 31 ++++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/scalar-mla-mls.ll
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5df691f35275df..5893f76dbd5544 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6110,8 +6110,8 @@ static bool getMaddPatterns(MachineInstr &Root,
setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
break;
case AArch64::SUBWrr:
- setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
+ setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
break;
case AArch64::SUBXrr:
setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
diff --git a/llvm/test/CodeGen/AArch64/scalar-mla-mls.ll b/llvm/test/CodeGen/AArch64/scalar-mla-mls.ll
new file mode 100644
index 00000000000000..36ac36701fa8aa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/scalar-mla-mls.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define ptr @test_scalar_msub(ptr %a, ptr %b) {
+; CHECK-LABEL: test_scalar_msub:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldp w8, w11, [x1]
+; CHECK-NEXT: ldp w9, w10, [x0]
+; CHECK-NEXT: mul w12, w8, w9
+; CHECK-NEXT: mul w8, w10, w8
+; CHECK-NEXT: madd w8, w11, w9, w8
+; CHECK-NEXT: msub w9, w11, w10, w12
+; CHECK-NEXT: stp w9, w8, [x0]
+; CHECK-NEXT: ret
+entry:
+ %0 = load i32, ptr %a, align 4
+ %1 = load i32, ptr %b, align 4
+ %mul = mul nsw i32 %1, %0
+ %_M_imag = getelementptr inbounds i8, ptr %a, i64 4
+ %2 = load i32, ptr %_M_imag, align 4
+ %_M_imag.i = getelementptr inbounds i8, ptr %b, i64 4
+ %3 = load i32, ptr %_M_imag.i, align 4
+ %mul3 = mul nsw i32 %3, %2
+ %sub = sub nsw i32 %mul, %mul3
+ %mul6 = mul nsw i32 %3, %0
+ %mul9 = mul nsw i32 %2, %1
+ %add = add nsw i32 %mul6, %mul9
+ store i32 %add, ptr %_M_imag, align 4
+ store i32 %sub, ptr %a, align 4
+ ret ptr %a
+}
>From a110a1c0ed9850be168cd0e29f05179e80941b04 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Fri, 8 Mar 2024 11:27:48 -0500
Subject: [PATCH 2/2] [AArch64] MachineCombiner msub matching for i64
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +-
llvm/test/CodeGen/AArch64/scalar-mla-mls.ll | 32 ++++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5893f76dbd5544..02943b8a4ab158 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6114,8 +6114,8 @@ static bool getMaddPatterns(MachineInstr &Root,
setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
break;
case AArch64::SUBXrr:
- setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
+ setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
break;
case AArch64::ADDWri:
setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
diff --git a/llvm/test/CodeGen/AArch64/scalar-mla-mls.ll b/llvm/test/CodeGen/AArch64/scalar-mla-mls.ll
index 36ac36701fa8aa..c8ba50ae0cc3ca 100644
--- a/llvm/test/CodeGen/AArch64/scalar-mla-mls.ll
+++ b/llvm/test/CodeGen/AArch64/scalar-mla-mls.ll
@@ -29,3 +29,35 @@ entry:
store i32 %sub, ptr %a, align 4
ret ptr %a
}
+
+define ptr @test_scalar_msub_i64(ptr %a, ptr %b) {
+; CHECK-LABEL: test_scalar_msub_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr x8, [x1]
+; CHECK-NEXT: ldur x9, [x0, #4]
+; CHECK-NEXT: ldr x10, [x0]
+; CHECK-NEXT: ldur x12, [x1, #4]
+; CHECK-NEXT: mul x11, x9, x8
+; CHECK-NEXT: mul x8, x8, x10
+; CHECK-NEXT: madd x10, x12, x10, x11
+; CHECK-NEXT: msub x8, x12, x9, x8
+; CHECK-NEXT: stur x10, [x0, #4]
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr %a, align 8
+ %1 = load i64, ptr %b, align 8
+ %mul = mul nsw i64 %1, %0
+ %_M_imag = getelementptr inbounds i8, ptr %a, i64 4
+ %2 = load i64, ptr %_M_imag, align 8
+ %_M_imag.i = getelementptr inbounds i8, ptr %b, i64 4
+ %3 = load i64, ptr %_M_imag.i, align 8
+ %mul3 = mul nsw i64 %3, %2
+ %sub = sub nsw i64 %mul, %mul3
+ %mul6 = mul nsw i64 %3, %0
+ %mul9 = mul nsw i64 %2, %1
+ %add = add nsw i64 %mul6, %mul9
+ store i64 %add, ptr %_M_imag, align 8
+ store i64 %sub, ptr %a, align 8
+ ret ptr %a
+}
More information about the llvm-commits
mailing list