[llvm] [AArch64] Match constants in SelectSMETileSlice (PR #151494)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 11 01:22:40 PDT 2025


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/151494

>From ca2bea536c110bf2b39e3b0742b992ed0a108040 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 31 Jul 2025 10:49:52 +0000
Subject: [PATCH 1/3] Pre-commit test

---
 .../AArch64/sme-tileslice-addrmodes.ll        | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll

diff --git a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
new file mode 100644
index 0000000000000..aaee521a8bd16
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sme2 -enable-subreg-liveness < %s| FileCheck %s
+
+target triple = "aarch64"
+
+define void @sme_tileslice_addrmode_zero_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_zero_base_plus_constant_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, wzr
+; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    mov w8, #7 // =0x7
+; CHECK-NEXT:    fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    ret
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 7, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 8, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  ret void
+}
+
+define void @sme_tileslice_addrmode_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_base_plus_constant_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    add w9, w0, #8
+; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    fdot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    ret
+  %slice0 = add i32 %slice, 0
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  %slice1 = add i32 %slice, 1
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  %slice7 = add i32 %slice, 7
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice7, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  %slice8 = add i32 %slice, 8
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice8, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  ret void
+}
+
+define void @sme_tileslice_addrmode_base_plus_zero_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_base_plus_zero_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    ret
+  tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+  ret void
+}
+
+declare void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32 immarg)

>From 722d30afba0b5aba825d5b4cfec7fe953e190bcb Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 31 Jul 2025 10:39:23 +0000
Subject: [PATCH 2/3] [AArch64] Match constants in SelectSMETileSlice

If the slice is a constant then it should try to use WZR + <imm>
addressing mode if the constant fits the range.
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 29 ++++++++++++++-----
 .../AArch64/sme-tileslice-addrmodes.ll        |  8 ++---
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ad42f4b56caf2..bc786f415b554 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7617,16 +7617,29 @@ bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
                                              SDValue &Base, SDValue &Offset,
                                              unsigned Scale) {
-  // Try to untangle an ADD node into a 'reg + offset'
-  if (CurDAG->isBaseWithConstantOffset(N))
-    if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+  auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
+    if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
       int64_t ImmOff = C->getSExtValue();
-      if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
-        Base = N.getOperand(0);
-        Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
-        return true;
-      }
+      if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
+        return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
     }
+    return SDValue();
+  };
+
+  if (SDValue C = MatchConstantOffset(N)) {
+    Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
+    Offset = C;
+    return true;
+  }
+
+  // Try to untangle an ADD node into a 'reg + offset'
+  if (CurDAG->isBaseWithConstantOffset(N)) {
+    if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
+      Base = N.getOperand(0);
+      Offset = C;
+      return true;
+    }
+  }
 
   // By default, just match reg + 0.
   Base = N;
diff --git a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
index aaee521a8bd16..cfe8e9ec4a0b6 100644
--- a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
+++ b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
@@ -7,13 +7,11 @@ define void @sme_tileslice_addrmode_zero_base_plus_constant_offset(i32 %slice, <
 ; CHECK-LABEL: sme_tileslice_addrmode_zero_base_plus_constant_offset:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    mov w9, #8 // =0x8
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
-; CHECK-NEXT:    mov w8, #7 // =0x7
+; CHECK-NEXT:    fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT:    fdot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
-; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
-; CHECK-NEXT:    mov w8, #8 // =0x8
-; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    ret
   tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
   tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)

>From 39a622467119922f78a71b79652cee4eb13039db Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 11 Aug 2025 08:20:38 +0000
Subject: [PATCH 3/3] Add comment to test

---
 llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
index cfe8e9ec4a0b6..4af3fabb2b4d5 100644
--- a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
+++ b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
@@ -3,6 +3,8 @@
 
 target triple = "aarch64"
 
+; The tile-slice addressing mode supports an immediate of 0-7.
+; This is testing an immediate of 0, 1, 7 (folded) and 8 (not folded).
 define void @sme_tileslice_addrmode_zero_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: sme_tileslice_addrmode_zero_base_plus_constant_offset:
 ; CHECK:       // %bb.0:
@@ -20,6 +22,8 @@ define void @sme_tileslice_addrmode_zero_base_plus_constant_offset(i32 %slice, <
   ret void
 }
 
+; The tile-slice addressing mode supports an immediate of 0-7.
+; This is testing an immediate of 0, 1, 7 (folded) and 8 (not folded).
 define void @sme_tileslice_addrmode_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: sme_tileslice_addrmode_base_plus_constant_offset:
 ; CHECK:       // %bb.0:



More information about the llvm-commits mailing list