[llvm] [AArch64] Match constants in SelectSMETileSlice (PR #151494)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 03:57:31 PDT 2025
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/151494
If the slice is a constant then it should try to use `WZR + <imm>`
addressing mode if the constant fits the range.
>From ca2bea536c110bf2b39e3b0742b992ed0a108040 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 31 Jul 2025 10:49:52 +0000
Subject: [PATCH 1/2] Pre-commit test
---
.../AArch64/sme-tileslice-addrmodes.ll | 56 +++++++++++++++++++
1 file changed, 56 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
diff --git a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
new file mode 100644
index 0000000000000..aaee521a8bd16
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sme2 -enable-subreg-liveness < %s| FileCheck %s
+
+target triple = "aarch64"
+
+define void @sme_tileslice_addrmode_zero_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_zero_base_plus_constant_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: mov w8, #7 // =0x7
+; CHECK-NEXT: fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 7, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 8, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ ret void
+}
+
+define void @sme_tileslice_addrmode_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_base_plus_constant_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: add w9, w0, #8
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: ret
+ %slice0 = add i32 %slice, 0
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ %slice1 = add i32 %slice, 1
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ %slice7 = add i32 %slice, 7
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice7, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ %slice8 = add i32 %slice, 8
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice8, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ ret void
+}
+
+define void @sme_tileslice_addrmode_base_plus_zero_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_base_plus_zero_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ ret void
+}
+
+declare void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32 immarg)
>From 722d30afba0b5aba825d5b4cfec7fe953e190bcb Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 31 Jul 2025 10:39:23 +0000
Subject: [PATCH 2/2] [AArch64] Match constants in SelectSMETileSlice
If the slice is a constant then it should try to use WZR + <imm>
addressing mode if the constant fits the range.
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 29 ++++++++++++++-----
.../AArch64/sme-tileslice-addrmodes.ll | 8 ++---
2 files changed, 24 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ad42f4b56caf2..bc786f415b554 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7617,16 +7617,29 @@ bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
SDValue &Base, SDValue &Offset,
unsigned Scale) {
- // Try to untangle an ADD node into a 'reg + offset'
- if (CurDAG->isBaseWithConstantOffset(N))
- if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
+ if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
int64_t ImmOff = C->getSExtValue();
- if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
- Base = N.getOperand(0);
- Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
- return true;
- }
+ if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
+ return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
}
+ return SDValue();
+ };
+
+ if (SDValue C = MatchConstantOffset(N)) {
+ Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
+ Offset = C;
+ return true;
+ }
+
+ // Try to untangle an ADD node into a 'reg + offset'
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
+ Base = N.getOperand(0);
+ Offset = C;
+ return true;
+ }
+ }
// By default, just match reg + 0.
Base = N;
diff --git a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
index aaee521a8bd16..cfe8e9ec4a0b6 100644
--- a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
+++ b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
@@ -7,13 +7,11 @@ define void @sme_tileslice_addrmode_zero_base_plus_constant_offset(i32 %slice, <
; CHECK-LABEL: sme_tileslice_addrmode_zero_base_plus_constant_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
-; CHECK-NEXT: mov w8, #7 // =0x7
+; CHECK-NEXT: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
-; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
-; CHECK-NEXT: mov w8, #8 // =0x8
-; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
More information about the llvm-commits
mailing list