[llvm] [DAGCombiner] Fix crash in reassociationCanBreakAddressingModePattern for multi-memop nodes (PR #180268)
Alexander Weinrauch via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 08:07:51 PST 2026
https://github.com/AlexAUT updated https://github.com/llvm/llvm-project/pull/180268
>From 03135f6c0670ccef15977278aa5a737bbe6b640b Mon Sep 17 00:00:00 2001
From: Alexander Weinrauch <alexander.weinrauch at amd.com>
Date: Fri, 6 Feb 2026 18:41:04 +0000
Subject: [PATCH 1/4] Bail out for multi mem operands ops
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++-
.../dagcombine-reassociate-multi-memop.ll | 29 +++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 58c59628342c7..c43acb1780d37 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1145,7 +1145,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
ScalableOffset = -ScalableOffset;
if (all_of(N->users(), [&](SDNode *Node) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
- LoadStore && LoadStore->getBasePtr().getNode() == N) {
+ LoadStore && LoadStore->hasUniqueMemOperand() &&
+ LoadStore->getBasePtr().getNode() == N) {
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.ScalableOffset = ScalableOffset;
@@ -1183,6 +1184,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
for (SDNode *Node : N->users()) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
+ if (!LoadStore->hasUniqueMemOperand())
+ continue;
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
new file mode 100644
index 0000000000000..f19eeca3065ec
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
@@ -0,0 +1,29 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
+
+; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not
+; crash when a MemSDNode user has multiple memory operands (e.g.
+; buffer_load_lds which reads from a buffer and writes to LDS).
+
+ at global_smem = external addrspace(3) global [0 x i8], align 16
+
+declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i16, i64, i32)
+declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32)
+declare i32 @llvm.amdgcn.workitem.id.x()
+
+define amdgpu_kernel void @triton_mm_minimal(ptr addrspace(1) inreg %ptr) {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ ; Create a pattern that will be reassociated: (add (add base, 1024), 32)
+ ; where base comes from mul, creating nested adds
+ %base = mul i32 %tid, 1536
+ %add1 = add i32 %base, 1024
+ %offset1 = add i32 %add1, 32
+ %offset2 = add i32 %add1, 33
+ %shl1 = shl i32 %offset1, 1
+ %shl2 = shl i32 %offset2, 1
+ %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %ptr, i16 0, i64 2147483646, i32 159744)
+ %lds0 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 0
+ %lds1 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 1056
+ call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds0, i32 16, i32 %shl1, i32 0, i32 0, i32 0)
+ call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds1, i32 16, i32 %shl2, i32 0, i32 0, i32 0)
+ ret void
+}
>From 03579adc18281c8ac89be3e3dcaf184237bb29b5 Mon Sep 17 00:00:00 2001
From: Alexander Weinrauch <alexander.weinrauch at amd.com>
Date: Fri, 6 Feb 2026 19:36:52 +0000
Subject: [PATCH 2/4] Adjust lit test
---
.../CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
index f19eeca3065ec..38cb4f9dc6c56 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
@@ -1,4 +1,4 @@
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not
; crash when a MemSDNode user has multiple memory operands (e.g.
@@ -10,6 +10,9 @@ declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i
declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32)
declare i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-LABEL: triton_mm_minimal:
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
define amdgpu_kernel void @triton_mm_minimal(ptr addrspace(1) inreg %ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; Create a pattern that will be reassociated: (add (add base, 1024), 32)
>From df09ee75b5ba26dc4e323b7e6e42853d52581d48 Mon Sep 17 00:00:00 2001
From: Alexander Weinrauch <alexander.weinrauch at amd.com>
Date: Fri, 6 Feb 2026 19:38:20 +0000
Subject: [PATCH 3/4] Rename lit test
---
.../CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
index 38cb4f9dc6c56..8df77062003a6 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
@@ -10,10 +10,10 @@ declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i
declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32)
declare i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-LABEL: triton_mm_minimal:
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-define amdgpu_kernel void @triton_mm_minimal(ptr addrspace(1) inreg %ptr) {
+; CHECK-LABEL: buffer_load_lds_reassociate_offsets:
+; CHECK: buffer_load_dwordx4 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen lds
+; CHECK: buffer_load_dwordx4 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen lds
+define amdgpu_kernel void @buffer_load_lds_reassociate_offsets(ptr addrspace(1) inreg %ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; Create a pattern that will be reassociated: (add (add base, 1024), 32)
; where base comes from mul, creating nested adds
>From 20cd9b28c2932c226f359fa6a34d6177193c77a9 Mon Sep 17 00:00:00 2001
From: Alexander Weinrauch <alexander.weinrauch at amd.com>
Date: Tue, 10 Feb 2026 16:07:19 +0000
Subject: [PATCH 4/4] Autogenerate checks in lit test
---
.../dagcombine-reassociate-multi-memop.ll | 26 ++++++++++++++++---
1 file changed, 23 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
index 8df77062003a6..55159634eb4e5 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-multi-memop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck %s
; Test that DAGCombiner::reassociationCanBreakAddressingModePattern does not
@@ -10,10 +11,29 @@ declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1), i
declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8), ptr addrspace(3) nocapture, i32, i32, i32, i32, i32)
declare i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-LABEL: buffer_load_lds_reassociate_offsets:
-; CHECK: buffer_load_dwordx4 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen lds
-; CHECK: buffer_load_dwordx4 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen lds
define amdgpu_kernel void @buffer_load_lds_reassociate_offsets(ptr addrspace(1) inreg %ptr) {
+; CHECK-LABEL: buffer_load_lds_reassociate_offsets:
+; CHECK: ; %bb.1:
+; CHECK-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_branch .LBB0_0
+; CHECK-NEXT: .p2align 8
+; CHECK-NEXT: ; %bb.2:
+; CHECK-NEXT: .LBB0_0:
+; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; CHECK-NEXT: v_mul_u32_u24_e32 v0, 0x600, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; CHECK-NEXT: v_add_u32_e32 v1, 0x840, v0
+; CHECK-NEXT: s_and_b32 s9, s9, 0xffff
+; CHECK-NEXT: s_mov_b32 s11, 0x27000
+; CHECK-NEXT: s_mov_b32 s10, 0x7ffffffe
+; CHECK-NEXT: s_mov_b32 m0, 0
+; CHECK-NEXT: v_add_u32_e32 v0, 0x842, v0
+; CHECK-NEXT: buffer_load_dwordx4 v1, s[8:11], 0 offen lds
+; CHECK-NEXT: s_add_i32 m0, 0, 0x420
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: buffer_load_dwordx4 v0, s[8:11], 0 offen lds
+; CHECK-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; Create a pattern that will be reassociated: (add (add base, 1024), 32)
; where base comes from mul, creating nested adds
More information about the llvm-commits
mailing list