[llvm] [AArch64] Match constants in SelectSMETileSlice (PR #151494)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 03:58:00 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
If the slice is a constant then it should try to use `WZR + <imm>`
addressing mode if the constant fits the range.
---
Full diff: https://github.com/llvm/llvm-project/pull/151494.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+21-8)
- (added) llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll (+54)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ad42f4b56caf2..bc786f415b554 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7617,16 +7617,29 @@ bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
SDValue &Base, SDValue &Offset,
unsigned Scale) {
- // Try to untangle an ADD node into a 'reg + offset'
- if (CurDAG->isBaseWithConstantOffset(N))
- if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
+ if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
int64_t ImmOff = C->getSExtValue();
- if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
- Base = N.getOperand(0);
- Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
- return true;
- }
+ if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
+ return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
}
+ return SDValue();
+ };
+
+ if (SDValue C = MatchConstantOffset(N)) {
+ Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
+ Offset = C;
+ return true;
+ }
+
+ // Try to untangle an ADD node into a 'reg + offset'
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
+ Base = N.getOperand(0);
+ Offset = C;
+ return true;
+ }
+ }
// By default, just match reg + 0.
Base = N;
diff --git a/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
new file mode 100644
index 0000000000000..cfe8e9ec4a0b6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-tileslice-addrmodes.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sme2 -enable-subreg-liveness < %s| FileCheck %s
+
+target triple = "aarch64"
+
+define void @sme_tileslice_addrmode_zero_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_zero_base_plus_constant_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: mov w9, #8 // =0x8
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 7, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 8, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ ret void
+}
+
+define void @sme_tileslice_addrmode_base_plus_constant_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_base_plus_constant_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: add w9, w0, #8
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: fdot za.s[w9, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: ret
+ %slice0 = add i32 %slice, 0
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice0, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ %slice1 = add i32 %slice, 1
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice1, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ %slice7 = add i32 %slice, 7
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice7, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ %slice8 = add i32 %slice, 8
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice8, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ ret void
+}
+
+define void @sme_tileslice_addrmode_base_plus_zero_offset(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: sme_tileslice_addrmode_base_plus_zero_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[0]
+; CHECK-NEXT: ret
+ tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, <vscale x 8 x half> %4, i32 0)
+ ret void
+}
+
+declare void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32 immarg)
``````````
</details>
https://github.com/llvm/llvm-project/pull/151494
More information about the llvm-commits
mailing list