[llvm] [AArch64][SME] Preserve `Chain` argument in SelectMultiVectorLutiLane (PR #161494)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 02:20:55 PDT 2025
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/161494
Previously, the `Chain` was dropped leading to two identical LUTI4 nodes that would be incorrectly CSE'd.
Fixes: #161420
>From b07ff80a54ce5535247c7467f4bc943f2bb7ceef Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 1 Oct 2025 09:16:00 +0000
Subject: [PATCH] [AArch64][SME] Preserve `Chain` argument in
SelectMultiVectorLutiLane
Previously, the `Chain` was dropped leading to two identical LUTI4 nodes
that would be incorrectly CSE'd.
Fixes: #161420
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 3 +-
llvm/test/CodeGen/AArch64/pr161420.ll | 56 +++++++++++++++++++
2 files changed, 58 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/pr161420.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 6a1b06eea4309..8d872eb85da22 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2089,7 +2089,8 @@ void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
return;
- SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
SDLoc DL(Node);
EVT VT = Node->getValueType(0);
diff --git a/llvm/test/CodeGen/AArch64/pr161420.ll b/llvm/test/CodeGen/AArch64/pr161420.ll
new file mode 100644
index 0000000000000..ebab7e0f42e77
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr161420.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx15.0.0"
+
+; From https://github.com/llvm/llvm-project/issues/161420. This test checks that
+; two `luti4` instructions are emitted. Previously, we would incorrectly CSE
+; one as the `Chain` argument was dropped in SelectionDAG.
+
+define void @pluto(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) #0 {
+; CHECK-LABEL: pluto:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: mov w8, #0 ; =0x0
+; CHECK-NEXT: ldr zt0, [x1]
+; CHECK-NEXT: ldr z4, [x3]
+; CHECK-NEXT: ptrue pn8.h
+; CHECK-NEXT: ld1h { z0.h - z3.h }, pn8/z, [x0]
+; CHECK-NEXT: luti4 { z16.h - z19.h }, zt0, z4[0]
+; CHECK-NEXT: fmla za.h[w8, 0, vgx4], { z0.h - z3.h }, { z16.h - z19.h }
+; CHECK-NEXT: ldr zt0, [x2]
+; CHECK-NEXT: luti4 { z4.h - z7.h }, zt0, z4[0]
+; CHECK-NEXT: fmla za.h[w8, 2, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
+; CHECK-NEXT: ret
+bb:
+ tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %arg1)
+ %load = load <vscale x 16 x i8>, ptr %arg3, align 16
+ %call = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c16()
+ %call4 = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") %call, ptr %arg)
+ %extractvalue = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call4, 0
+ %extractvalue5 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call4, 1
+ %extractvalue6 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call4, 2
+ %extractvalue7 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call4, 3
+ %call8 = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, <vscale x 16 x i8> %load, i32 0)
+ %extractvalue9 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call8, 0
+ %extractvalue10 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call8, 1
+ %extractvalue11 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call8, 2
+ %extractvalue12 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call8, 3
+ tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32 0, <vscale x 8 x half> %extractvalue, <vscale x 8 x half> %extractvalue5, <vscale x 8 x half> %extractvalue6, <vscale x 8 x half> %extractvalue7, <vscale x 8 x half> %extractvalue9, <vscale x 8 x half> %extractvalue10, <vscale x 8 x half> %extractvalue11, <vscale x 8 x half> %extractvalue12)
+ tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %arg2)
+ %call13 = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, <vscale x 16 x i8> %load, i32 0)
+ %extractvalue14 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call13, 0
+ %extractvalue15 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call13, 1
+ %extractvalue16 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call13, 2
+ %extractvalue17 = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %call13, 3
+ tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32 2, <vscale x 8 x half> %extractvalue, <vscale x 8 x half> %extractvalue5, <vscale x 8 x half> %extractvalue6, <vscale x 8 x half> %extractvalue7, <vscale x 8 x half> %extractvalue14, <vscale x 8 x half> %extractvalue15, <vscale x 8 x half> %extractvalue16, <vscale x 8 x half> %extractvalue17)
+ ret void
+}
+
+declare void @llvm.aarch64.sme.ldr.zt(i32, ptr)
+declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c16()
+declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount"), ptr)
+declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 immarg, <vscale x 16 x i8>, i32 immarg)
+declare void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+
+attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn uwtable(sync) "aarch64_inout_za" "aarch64_inout_zt0" "aarch64_pstate_sm_enabled" "target-cpu"="apple-m1" "target-features"="+fp-armv8,+lse,+neon,+sme,+sme-f16f16,+sme2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a" }
More information about the llvm-commits
mailing list