[llvm] [AMDGPU][InstCombine] Fold unused m0 operand to poison for sendmsg intrinsics (PR #183755)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 01:27:07 PST 2026
https://github.com/addmisol updated https://github.com/llvm/llvm-project/pull/183755
>From 4b5cd452b85ff75d6bc6122309369f645d4d8619 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:02:11 +0530
Subject: [PATCH 01/18] Sendmsg fold m0 to poison
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 26 ++++++++++++++++++-
1 file changed, 25 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 82de8cf169b48..bc9ce55b34b22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1194,7 +1194,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case KnownIEEEMode::Unknown:
break;
}
- }
+}
if (V) {
if (auto *CI = dyn_cast<CallInst>(V)) {
@@ -1448,6 +1448,30 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// amdgcn.kill(i1 1) is a no-op
return IC.eraseInstFromFunction(II);
}
+ case Intrinsic::amdgcn_s_sendmsg:
+ case Intrinsic::amdgcn_s_sendmsghalt: {
+ // The second operand is copied to m0, but is only actually used for
+ // GS_ALLOC_REQ. For other message types, fold it to poison.
+ using namespace AMDGPU::SendMsg;
+
+ Value *M0Val = II.getArgOperand(1);
+ if (isa<PoisonValue>(M0Val))
+ break;
+
+ auto *MsgImm = cast<ConstantInt>(II.getArgOperand(0));
+ uint64_t Msg = MsgImm->getZExtValue();
+ // Extract the message ID. Pre-GFX11 uses the lower 4 bits, GFX11+ uses
+ // the lower 8 bits. Since ID_GS_ALLOC_REQ is 9, we need to check the
+ // appropriate mask. For simplicity, extract the lower 8 bits which covers
+ // both cases.
+ uint64_t MsgId = Msg & ID_MASK_GFX11Plus_;
+
+ // Only GS_ALLOC_REQ uses the m0 value.
+ if (MsgId == ID_GS_ALLOC_REQ)
+ break;
+
+ return IC.replaceOperand(II, 1, PoisonValue::get(M0Val->getType()));
+ }
case Intrinsic::amdgcn_update_dpp: {
Value *Old = II.getArgOperand(0);
>From 323b1be63d7545198a462d09e944a24a2577639b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:04:52 +0530
Subject: [PATCH 02/18] adds tests sendmsg-m0-poison.ll
---
.../InstCombine/AMDGPU/sendmsg-m0-poison.ll | 118 ++++++++++++++++++
1 file changed, 118 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
new file mode 100644
index 0000000000000..8a7a2a7b5253e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S | FileCheck %s
+
+; Test that the m0 operand is folded to poison for message types that don't use it.
+
+; MSG_INTERRUPT (1) doesn't use m0
+define void @test_sendmsg_interrupt(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_interrupt(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 1, i32 %val)
+ ret void
+}
+
+; MSG_GS (2) + GS_OP_EMIT (2 << 4) doesn't use m0
+define void @test_sendmsg_gs_emit(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_gs_emit(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 34, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 34, i32 %val)
+ ret void
+}
+
+; MSG_GS_DONE (3) doesn't use m0
+define void @test_sendmsg_gs_done(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_gs_done(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %val)
+ ret void
+}
+
+; MSG_SYSMSG (15) doesn't use m0
+define void @test_sendmsg_sysmsg(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_sysmsg(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 15, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 15, i32 %val)
+ ret void
+}
+
+; MSG_GS_ALLOC_REQ (9) DOES use m0 - should NOT be folded
+define void @test_sendmsg_gs_alloc_req(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_gs_alloc_req(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 9, i32 [[VAL:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 9, i32 %val)
+ ret void
+}
+
+; Test sendmsghalt as well - MSG_INTERRUPT (1) doesn't use m0
+define void @test_sendmsghalt_interrupt(i32 %val) {
+; CHECK-LABEL: @test_sendmsghalt_interrupt(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %val)
+ ret void
+}
+
+; Test sendmsghalt - MSG_GS_ALLOC_REQ (9) DOES use m0 - should NOT be folded
+define void @test_sendmsghalt_gs_alloc_req(i32 %val) {
+; CHECK-LABEL: @test_sendmsghalt_gs_alloc_req(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsghalt(i32 9, i32 [[VAL:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsghalt(i32 9, i32 %val)
+ ret void
+}
+
+; m0 already poison - should be a no-op
+define void @test_sendmsg_already_poison() {
+; CHECK-LABEL: @test_sendmsg_already_poison(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+ ret void
+}
+
+; Test other message types that don't use m0
+; MSG_SAVEWAVE (4)
+define void @test_sendmsg_savewave(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_savewave(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 4, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 4, i32 %val)
+ ret void
+}
+
+; MSG_STALL_WAVE_GEN (5)
+define void @test_sendmsg_stall_wave_gen(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_stall_wave_gen(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 5, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 5, i32 %val)
+ ret void
+}
+
+; MSG_HALT_WAVES (6)
+define void @test_sendmsg_halt_waves(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_halt_waves(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 6, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 6, i32 %val)
+ ret void
+}
+
+declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
+declare void @llvm.amdgcn.s.sendmsghalt(i32 immarg, i32)
>From 85d6cda69400ff3f4db17bc184e31c100a82adab Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:06:04 +0530
Subject: [PATCH 03/18] Update AMDGPUInstCombineIntrinsic.cpp
---
llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index bc9ce55b34b22..875ae0ec3665c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1194,7 +1194,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case KnownIEEEMode::Unknown:
break;
}
-}
+ }
if (V) {
if (auto *CI = dyn_cast<CallInst>(V)) {
>From fd526042f649f57a9aa99133af92dbe062eb7d2b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:46:48 +0530
Subject: [PATCH 04/18] Update AMDGPUInstCombineIntrinsic.cpp
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 44 +++++++++++++++----
1 file changed, 36 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 875ae0ec3665c..e4d93d0ebee91 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1451,7 +1451,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// The second operand is copied to m0, but is only actually used for
- // GS_ALLOC_REQ. For other message types, fold it to poison.
+ // GS_ALLOC_REQ. For other message types that are known to not use m0,
+ // fold it to poison.
using namespace AMDGPU::SendMsg;
Value *M0Val = II.getArgOperand(1);
@@ -1461,16 +1462,43 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
auto *MsgImm = cast<ConstantInt>(II.getArgOperand(0));
uint64_t Msg = MsgImm->getZExtValue();
// Extract the message ID. Pre-GFX11 uses the lower 4 bits, GFX11+ uses
- // the lower 8 bits. Since ID_GS_ALLOC_REQ is 9, we need to check the
- // appropriate mask. For simplicity, extract the lower 8 bits which covers
- // both cases.
- uint64_t MsgId = Msg & ID_MASK_GFX11Plus_;
+ // the lower 8 bits. Use 4-bit mask for extracting base message ID since
+ // all message types we handle fit in 4 bits, and the upper bits encode
+ // stream ID or other parameters for some message types (e.g., MSG_GS).
+ uint64_t MsgId = Msg & ID_MASK_PreGFX11_;
+
+ // Explicitly list message types that are known to not use m0.
+ // This is safer than excluding only GS_ALLOC_REQ, in case new message
+ // types are added in the future that do use m0.
+ bool M0Unused;
+ switch (MsgId) {
+ case ID_INTERRUPT:
+ case ID_GS_PreGFX11:
+ case ID_GS_DONE_PreGFX11:
+ // ID_HS_TESSFACTOR_GFX11Plus and ID_DEALLOC_VGPRS_GFX11Plus have the
+ // same values as ID_GS_PreGFX11 and ID_GS_DONE_PreGFX11 respectively.
+ case ID_SAVEWAVE:
+ case ID_STALL_WAVE_GEN:
+ case ID_HALT_WAVES:
+ case ID_ORDERED_PS_DONE:
+ case ID_EARLY_PRIM_DEALLOC:
+ case ID_GET_DOORBELL:
+ case ID_GET_DDID:
+ case ID_SYSMSG:
+ M0Unused = true;
+ break;
+ default:
+ M0Unused = false;
+ break;
+ }
- // Only GS_ALLOC_REQ uses the m0 value.
- if (MsgId == ID_GS_ALLOC_REQ)
+ if (!M0Unused)
break;
- return IC.replaceOperand(II, 1, PoisonValue::get(M0Val->getType()));
+ // Drop noundef attribute since we're replacing with poison.
+ II.removeParamAttr(1, Attribute::NoUndef);
+ IC.replaceOperand(II, 1, PoisonValue::get(M0Val->getType()));
+ return nullptr;
}
case Intrinsic::amdgcn_update_dpp: {
Value *Old = II.getArgOperand(0);
>From 42c6ffa63a6c79dc76bbb82c43303e04ebf0e297 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:48:29 +0530
Subject: [PATCH 05/18] Update sendmsg-m0-poison.ll
---
.../InstCombine/AMDGPU/sendmsg-m0-poison.ll | 120 +++++++++++++-----
1 file changed, 91 insertions(+), 29 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
index 8a7a2a7b5253e..3753ee259769b 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX9
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX10
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX11
; Test that the m0 operand is folded to poison for message types that don't use it.
@@ -13,7 +15,7 @@ define void @test_sendmsg_interrupt(i32 %val) {
ret void
}
-; MSG_GS (2) + GS_OP_EMIT (2 << 4) doesn't use m0
+; MSG_GS (2) + GS_OP_EMIT (2 << 4) doesn't use m0 (pre-GFX11)
define void @test_sendmsg_gs_emit(i32 %val) {
; CHECK-LABEL: @test_sendmsg_gs_emit(
; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 34, i32 poison)
@@ -23,7 +25,8 @@ define void @test_sendmsg_gs_emit(i32 %val) {
ret void
}
-; MSG_GS_DONE (3) doesn't use m0
+; MSG_GS_DONE (3) doesn't use m0 (pre-GFX11)
+; On GFX11+ this is ID_DEALLOC_VGPRS which also doesn't use m0
define void @test_sendmsg_gs_done(i32 %val) {
; CHECK-LABEL: @test_sendmsg_gs_done(
; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
@@ -33,13 +36,53 @@ define void @test_sendmsg_gs_done(i32 %val) {
ret void
}
-; MSG_SYSMSG (15) doesn't use m0
-define void @test_sendmsg_sysmsg(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_sysmsg(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 15, i32 poison)
+; MSG_SAVEWAVE (4) doesn't use m0 (GFX8-GFX10)
+define void @test_sendmsg_savewave(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_savewave(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 4, i32 poison)
; CHECK-NEXT: ret void
;
- call void @llvm.amdgcn.s.sendmsg(i32 15, i32 %val)
+ call void @llvm.amdgcn.s.sendmsg(i32 4, i32 %val)
+ ret void
+}
+
+; MSG_STALL_WAVE_GEN (5) doesn't use m0 (GFX9-GFX11)
+define void @test_sendmsg_stall_wave_gen(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_stall_wave_gen(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 5, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 5, i32 %val)
+ ret void
+}
+
+; MSG_HALT_WAVES (6) doesn't use m0 (GFX9-GFX11)
+define void @test_sendmsg_halt_waves(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_halt_waves(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 6, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 6, i32 %val)
+ ret void
+}
+
+; MSG_ORDERED_PS_DONE (7) doesn't use m0 (GFX9-GFX10)
+define void @test_sendmsg_ordered_ps_done(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_ordered_ps_done(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 7, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 7, i32 %val)
+ ret void
+}
+
+; MSG_EARLY_PRIM_DEALLOC (8) doesn't use m0 (GFX9 only)
+define void @test_sendmsg_early_prim_dealloc(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_early_prim_dealloc(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 8, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 8, i32 %val)
ret void
}
@@ -53,6 +96,36 @@ define void @test_sendmsg_gs_alloc_req(i32 %val) {
ret void
}
+; MSG_GET_DOORBELL (10) doesn't use m0 (GFX9-GFX10)
+define void @test_sendmsg_get_doorbell(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_get_doorbell(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 10, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 10, i32 %val)
+ ret void
+}
+
+; MSG_GET_DDID (11) doesn't use m0 (GFX10 only)
+define void @test_sendmsg_get_ddid(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_get_ddid(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 11, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 11, i32 %val)
+ ret void
+}
+
+; MSG_SYSMSG (15) doesn't use m0
+define void @test_sendmsg_sysmsg(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_sysmsg(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 15, i32 poison)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 15, i32 %val)
+ ret void
+}
+
; Test sendmsghalt as well - MSG_INTERRUPT (1) doesn't use m0
define void @test_sendmsghalt_interrupt(i32 %val) {
; CHECK-LABEL: @test_sendmsghalt_interrupt(
@@ -83,34 +156,23 @@ define void @test_sendmsg_already_poison() {
ret void
}
-; Test other message types that don't use m0
-; MSG_SAVEWAVE (4)
-define void @test_sendmsg_savewave(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_savewave(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 4, i32 poison)
-; CHECK-NEXT: ret void
-;
- call void @llvm.amdgcn.s.sendmsg(i32 4, i32 %val)
- ret void
-}
-
-; MSG_STALL_WAVE_GEN (5)
-define void @test_sendmsg_stall_wave_gen(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_stall_wave_gen(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 5, i32 poison)
+; Test that noundef attribute is dropped when folding to poison
+define void @test_sendmsg_noundef(i32 noundef %val) {
+; CHECK-LABEL: @test_sendmsg_noundef(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
; CHECK-NEXT: ret void
;
- call void @llvm.amdgcn.s.sendmsg(i32 5, i32 %val)
+ call void @llvm.amdgcn.s.sendmsg(i32 1, i32 noundef %val)
ret void
}
-; MSG_HALT_WAVES (6)
-define void @test_sendmsg_halt_waves(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_halt_waves(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 6, i32 poison)
+; Test unknown message ID - should NOT be folded (future-proofing)
+define void @test_sendmsg_unknown_id(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_unknown_id(
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 14, i32 [[VAL:%.*]])
; CHECK-NEXT: ret void
;
- call void @llvm.amdgcn.s.sendmsg(i32 6, i32 %val)
+ call void @llvm.amdgcn.s.sendmsg(i32 14, i32 %val)
ret void
}
>From 940e2aa5d696179a446bfd721b495a6ae05e4a86 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:11:19 +0530
Subject: [PATCH 06/18] Update AMDGPUInstCombineIntrinsic.cpp
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 29 ++-----------------
1 file changed, 2 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index e4d93d0ebee91..42eb1db2736df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1451,7 +1451,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// The second operand is copied to m0, but is only actually used for
- // GS_ALLOC_REQ. For other message types that are known to not use m0,
+ // certain message types. For message types that are known to not use m0,
// fold it to poison.
using namespace AMDGPU::SendMsg;
@@ -1467,32 +1467,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// stream ID or other parameters for some message types (e.g., MSG_GS).
uint64_t MsgId = Msg & ID_MASK_PreGFX11_;
- // Explicitly list message types that are known to not use m0.
- // This is safer than excluding only GS_ALLOC_REQ, in case new message
- // types are added in the future that do use m0.
- bool M0Unused;
- switch (MsgId) {
- case ID_INTERRUPT:
- case ID_GS_PreGFX11:
- case ID_GS_DONE_PreGFX11:
- // ID_HS_TESSFACTOR_GFX11Plus and ID_DEALLOC_VGPRS_GFX11Plus have the
- // same values as ID_GS_PreGFX11 and ID_GS_DONE_PreGFX11 respectively.
- case ID_SAVEWAVE:
- case ID_STALL_WAVE_GEN:
- case ID_HALT_WAVES:
- case ID_ORDERED_PS_DONE:
- case ID_EARLY_PRIM_DEALLOC:
- case ID_GET_DOORBELL:
- case ID_GET_DDID:
- case ID_SYSMSG:
- M0Unused = true;
- break;
- default:
- M0Unused = false;
- break;
- }
-
- if (!M0Unused)
+ if (!msgDoesNotUseM0(MsgId))
break;
// Drop noundef attribute since we're replacing with poison.
>From c0d976b07c0912a18fe2af7dd3b51fb64f30dd97 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:17:35 +0530
Subject: [PATCH 07/18] Update AMDGPUBaseInfo.cpp
---
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 25 +++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c1337f27a0f70..bb1b6c95c46dc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2484,6 +2484,31 @@ uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
}
+bool msgDoesNotUseM0(int64_t MsgId) {
+ // Explicitly list message types that are known to not use m0.
+ // This is safer than excluding only GS_ALLOC_REQ, in case new message
+ // types are added in the future that do use m0.
+ switch (MsgId) {
+ case ID_INTERRUPT:
+ case ID_GS_PreGFX11:
+ case ID_GS_DONE_PreGFX11:
+ // ID_HS_TESSFACTOR_GFX11Plus and ID_DEALLOC_VGPRS_GFX11Plus have the
+ // same values as ID_GS_PreGFX11 and ID_GS_DONE_PreGFX11 respectively.
+ case ID_SAVEWAVE:
+ case ID_STALL_WAVE_GEN:
+ case ID_HALT_WAVES:
+ case ID_ORDERED_PS_DONE:
+ case ID_EARLY_PRIM_DEALLOC:
+ case ID_GET_DOORBELL:
+ case ID_GET_DDID:
+ case ID_SYSMSG:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
} // namespace SendMsg
//===----------------------------------------------------------------------===//
>From 61c5438af8d808a4b02258baf7e62c2879a38773 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:20:56 +0530
Subject: [PATCH 08/18] Update AMDGPUBaseInfo.h
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index eee9e96934d49..94d82672b6a22 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1562,6 +1562,10 @@ void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
LLVM_READNONE
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
+/// \returns true if the message does not use the m0 operand.
+LLVM_READNONE
+bool msgDoesNotUseM0(int64_t MsgId);
+
} // namespace SendMsg
unsigned getInitialPSInputAddr(const Function &F);
>From d5df0fd1e92c7ea942fc944152fd622eb97570e8 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:22:39 +0530
Subject: [PATCH 09/18] Update sendmsg-m0-poison.ll
---
.../test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
index 3753ee259769b..d489036555266 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX9
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX10
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX11
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s
; Test that the m0 operand is folded to poison for message types that don't use it.
>From 3dedbbf8a506fe7dd879a80969d25d91a1fcf4fe Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:23:57 +0530
Subject: [PATCH 10/18] Update AMDGPUBaseInfo.cpp
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index bb1b6c95c46dc..7badf937cd0d0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2508,7 +2508,6 @@ bool msgDoesNotUseM0(int64_t MsgId) {
}
}
-
} // namespace SendMsg
//===----------------------------------------------------------------------===//
>From 1b9c2cf0741f0fa591ee1eeca44e17b8d184b3ed Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:26:02 +0530
Subject: [PATCH 11/18] Update AMDGPUBaseInfo.h
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 94d82672b6a22..b15b9e8e95332 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1562,7 +1562,7 @@ void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
LLVM_READNONE
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
-/// \returns true if the message does not use the m0 operand.
+/// Returns true if the message does not use the m0 operand.
LLVM_READNONE
bool msgDoesNotUseM0(int64_t MsgId);
>From 431a4b56fca3b275acf44bd32ba9756cefff0534 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:49:12 +0530
Subject: [PATCH 12/18] Update AMDGPUBaseInfo.cpp
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 7badf937cd0d0..4965a3d09c2c5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2484,16 +2484,19 @@ uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
}
-bool msgDoesNotUseM0(int64_t MsgId) {
+bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
// Explicitly list message types that are known to not use m0.
// This is safer than excluding only GS_ALLOC_REQ, in case new message
// types are added in the future that do use m0.
switch (MsgId) {
- case ID_INTERRUPT:
- case ID_GS_PreGFX11:
- case ID_GS_DONE_PreGFX11:
- // ID_HS_TESSFACTOR_GFX11Plus and ID_DEALLOC_VGPRS_GFX11Plus have the
- // same values as ID_GS_PreGFX11 and ID_GS_DONE_PreGFX11 respectively.
+ case ID_HS_TESSFACTOR_GFX11Plus:
+ // ID_GS_PreGFX11 has the same value as ID_HS_TESSFACTOR_GFX11Plus.
+ // GS uses m0, but HS_TESSFACTOR does not.
+ return isGFX11Plus(STI);
+ case ID_DEALLOC_VGPRS_GFX11Plus:
+ // ID_GS_DONE_PreGFX11 has the same value as ID_DEALLOC_VGPRS_GFX11Plus.
+ // GS_DONE uses m0, but DEALLOC_VGPRS does not.
+ return isGFX11Plus(STI);
case ID_SAVEWAVE:
case ID_STALL_WAVE_GEN:
case ID_HALT_WAVES:
>From f58d28376bb69656c630bbccff378b906a689e4a Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:51:25 +0530
Subject: [PATCH 13/18] Update AMDGPUInstCombineIntrinsic.cpp
---
llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 42eb1db2736df..a6ca213313058 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1467,7 +1467,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// stream ID or other parameters for some message types (e.g., MSG_GS).
uint64_t MsgId = Msg & ID_MASK_PreGFX11_;
- if (!msgDoesNotUseM0(MsgId))
+ if (!msgDoesNotUseM0(MsgId, *ST))
break;
// Drop noundef attribute since we're replacing with poison.
>From 38e1f13d7a2e23ea4e3b6971ee380828ee1a3423 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 12:04:34 +0530
Subject: [PATCH 14/18] Update AMDGPUBaseInfo.h
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index b15b9e8e95332..e8ecbe89f0a8b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1563,8 +1563,7 @@ LLVM_READNONE
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
/// Returns true if the message does not use the m0 operand.
-LLVM_READNONE
-bool msgDoesNotUseM0(int64_t MsgId);
+bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI);
} // namespace SendMsg
>From ff821ff92f854834fef67d2d1463229d2af62c85 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 12:07:59 +0530
Subject: [PATCH 15/18] Update sendmsg-m0-poison.ll
---
.../InstCombine/AMDGPU/sendmsg-m0-poison.ll | 90 +++++++++++++++----
1 file changed, 72 insertions(+), 18 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
index d489036555266..e54d1ab89d99f 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -1,36 +1,53 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX9
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX10
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX11
; Test that the m0 operand is folded to poison for message types that don't use it.
-; MSG_INTERRUPT (1) doesn't use m0
+; MSG_INTERRUPT (1) DOES use m0 - should NOT be folded
define void @test_sendmsg_interrupt(i32 %val) {
; CHECK-LABEL: @test_sendmsg_interrupt(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 1, i32 [[VAL:%.*]])
; CHECK-NEXT: ret void
;
call void @llvm.amdgcn.s.sendmsg(i32 1, i32 %val)
ret void
}
-; MSG_GS (2) + GS_OP_EMIT (2 << 4) doesn't use m0 (pre-GFX11)
+; MSG_GS (2) + GS_OP_EMIT (2 << 4) DOES use m0 (pre-GFX11) - should NOT be folded
+; On GFX11+ this is MSG_HS_TESSFACTOR which doesn't use m0
define void @test_sendmsg_gs_emit(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_gs_emit(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 34, i32 poison)
-; CHECK-NEXT: ret void
+; GFX9-LABEL: @test_sendmsg_gs_emit(
+; GFX9-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 34, i32 [[VAL:%.*]])
+; GFX9-NEXT: ret void
+;
+; GFX10-LABEL: @test_sendmsg_gs_emit(
+; GFX10-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 34, i32 [[VAL:%.*]])
+; GFX10-NEXT: ret void
+;
+; GFX11-LABEL: @test_sendmsg_gs_emit(
+; GFX11-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 34, i32 poison)
+; GFX11-NEXT: ret void
;
call void @llvm.amdgcn.s.sendmsg(i32 34, i32 %val)
ret void
}
-; MSG_GS_DONE (3) doesn't use m0 (pre-GFX11)
-; On GFX11+ this is ID_DEALLOC_VGPRS which also doesn't use m0
+; MSG_GS_DONE (3) DOES use m0 (pre-GFX11) - should NOT be folded
+; On GFX11+ this is ID_DEALLOC_VGPRS which doesn't use m0
define void @test_sendmsg_gs_done(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_gs_done(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
-; CHECK-NEXT: ret void
+; GFX9-LABEL: @test_sendmsg_gs_done(
+; GFX9-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; GFX9-NEXT: ret void
+;
+; GFX10-LABEL: @test_sendmsg_gs_done(
+; GFX10-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; GFX10-NEXT: ret void
+;
+; GFX11-LABEL: @test_sendmsg_gs_done(
+; GFX11-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
+; GFX11-NEXT: ret void
;
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %val)
ret void
@@ -126,10 +143,10 @@ define void @test_sendmsg_sysmsg(i32 %val) {
ret void
}
-; Test sendmsghalt as well - MSG_INTERRUPT (1) doesn't use m0
+; Test sendmsghalt as well - MSG_INTERRUPT (1) DOES use m0 - should NOT be folded
define void @test_sendmsghalt_interrupt(i32 %val) {
; CHECK-LABEL: @test_sendmsghalt_interrupt(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 poison)
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 [[VAL:%.*]])
; CHECK-NEXT: ret void
;
call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %val)
@@ -157,12 +174,13 @@ define void @test_sendmsg_already_poison() {
}
; Test that noundef attribute is dropped when folding to poison
+; Using MSG_SAVEWAVE (4) which doesn't use m0
define void @test_sendmsg_noundef(i32 noundef %val) {
; CHECK-LABEL: @test_sendmsg_noundef(
-; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+; CHECK-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 4, i32 poison)
; CHECK-NEXT: ret void
;
- call void @llvm.amdgcn.s.sendmsg(i32 1, i32 noundef %val)
+ call void @llvm.amdgcn.s.sendmsg(i32 4, i32 noundef %val)
ret void
}
@@ -176,5 +194,41 @@ define void @test_sendmsg_unknown_id(i32 %val) {
ret void
}
+; Test MSG_HS_TESSFACTOR (2) on GFX11+ - doesn't use m0
+define void @test_sendmsg_hs_tessfactor(i32 %val) {
+; GFX9-LABEL: @test_sendmsg_hs_tessfactor(
+; GFX9-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 2, i32 [[VAL:%.*]])
+; GFX9-NEXT: ret void
+;
+; GFX10-LABEL: @test_sendmsg_hs_tessfactor(
+; GFX10-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 2, i32 [[VAL:%.*]])
+; GFX10-NEXT: ret void
+;
+; GFX11-LABEL: @test_sendmsg_hs_tessfactor(
+; GFX11-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 2, i32 poison)
+; GFX11-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 2, i32 %val)
+ ret void
+}
+
+; Test MSG_DEALLOC_VGPRS (3) on GFX11+ - doesn't use m0
+define void @test_sendmsg_dealloc_vgprs(i32 %val) {
+; GFX9-LABEL: @test_sendmsg_dealloc_vgprs(
+; GFX9-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; GFX9-NEXT: ret void
+;
+; GFX10-LABEL: @test_sendmsg_dealloc_vgprs(
+; GFX10-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; GFX10-NEXT: ret void
+;
+; GFX11-LABEL: @test_sendmsg_dealloc_vgprs(
+; GFX11-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
+; GFX11-NEXT: ret void
+;
+ call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %val)
+ ret void
+}
+
declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
declare void @llvm.amdgcn.s.sendmsghalt(i32 immarg, i32)
>From bbf21a4ea15e58e5ac032b8232efcb0caa7d025b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 14:44:56 +0530
Subject: [PATCH 16/18] Update sendmsg-m0-poison.ll
>From aeef3063adbfa12e174d439f805549985a450103 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 14:48:55 +0530
Subject: [PATCH 17/18] Update sendmsg-m0-poison.ll
---
.../InstCombine/AMDGPU/sendmsg-m0-poison.ll | 22 +++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
index e54d1ab89d99f..c1f99d146232d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -1,9 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,DEFAULT
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX9
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX10
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX11
; Test that the m0 operand is folded to poison for message types that don't use it.
+; For the default target (no mcpu), conflicting encodings (IDs 2, 3) should NOT be folded.
; MSG_INTERRUPT (1) DOES use m0 - should NOT be folded
define void @test_sendmsg_interrupt(i32 %val) {
@@ -17,7 +19,12 @@ define void @test_sendmsg_interrupt(i32 %val) {
; MSG_GS (2) + GS_OP_EMIT (2 << 4) DOES use m0 (pre-GFX11) - should NOT be folded
; On GFX11+ this is MSG_HS_TESSFACTOR which doesn't use m0
+; On default target, this should NOT be folded (conflicting encoding)
define void @test_sendmsg_gs_emit(i32 %val) {
+; DEFAULT-LABEL: @test_sendmsg_gs_emit(
+; DEFAULT-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 34, i32 [[VAL:%.*]])
+; DEFAULT-NEXT: ret void
+;
; GFX9-LABEL: @test_sendmsg_gs_emit(
; GFX9-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 34, i32 [[VAL:%.*]])
; GFX9-NEXT: ret void
@@ -36,7 +43,12 @@ define void @test_sendmsg_gs_emit(i32 %val) {
; MSG_GS_DONE (3) DOES use m0 (pre-GFX11) - should NOT be folded
; On GFX11+ this is ID_DEALLOC_VGPRS which doesn't use m0
+; On default target, this should NOT be folded (conflicting encoding)
define void @test_sendmsg_gs_done(i32 %val) {
+; DEFAULT-LABEL: @test_sendmsg_gs_done(
+; DEFAULT-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; DEFAULT-NEXT: ret void
+;
; GFX9-LABEL: @test_sendmsg_gs_done(
; GFX9-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
; GFX9-NEXT: ret void
@@ -195,7 +207,12 @@ define void @test_sendmsg_unknown_id(i32 %val) {
}
; Test MSG_HS_TESSFACTOR (2) on GFX11+ - doesn't use m0
+; On default target, this should NOT be folded (conflicting encoding with MSG_GS)
define void @test_sendmsg_hs_tessfactor(i32 %val) {
+; DEFAULT-LABEL: @test_sendmsg_hs_tessfactor(
+; DEFAULT-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 2, i32 [[VAL:%.*]])
+; DEFAULT-NEXT: ret void
+;
; GFX9-LABEL: @test_sendmsg_hs_tessfactor(
; GFX9-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 2, i32 [[VAL:%.*]])
; GFX9-NEXT: ret void
@@ -213,7 +230,12 @@ define void @test_sendmsg_hs_tessfactor(i32 %val) {
}
; Test MSG_DEALLOC_VGPRS (3) on GFX11+ - doesn't use m0
+; On default target, this should NOT be folded (conflicting encoding with MSG_GS_DONE)
define void @test_sendmsg_dealloc_vgprs(i32 %val) {
+; DEFAULT-LABEL: @test_sendmsg_dealloc_vgprs(
+; DEFAULT-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; DEFAULT-NEXT: ret void
+;
; GFX9-LABEL: @test_sendmsg_dealloc_vgprs(
; GFX9-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
; GFX9-NEXT: ret void
>From f91c02a80fb52ac7d6dc78ac92ebbdc918574597 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 14:56:49 +0530
Subject: [PATCH 18/18] Update AMDGPUInstCombineIntrinsic.cpp
---
.../lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index a6ca213313058..3278d33c7ef12 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1460,18 +1460,14 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
auto *MsgImm = cast<ConstantInt>(II.getArgOperand(0));
- uint64_t Msg = MsgImm->getZExtValue();
- // Extract the message ID. Pre-GFX11 uses the lower 4 bits, GFX11+ uses
- // the lower 8 bits. Use 4-bit mask for extracting base message ID since
- // all message types we handle fit in 4 bits, and the upper bits encode
- // stream ID or other parameters for some message types (e.g., MSG_GS).
- uint64_t MsgId = Msg & ID_MASK_PreGFX11_;
+ uint16_t MsgId, OpId, StreamId;
+ decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
if (!msgDoesNotUseM0(MsgId, *ST))
break;
- // Drop noundef attribute since we're replacing with poison.
- II.removeParamAttr(1, Attribute::NoUndef);
+ // Drop UB-implying attributes since we're replacing with poison.
+ II.dropUBImplyingAttrsAndMetadata();
IC.replaceOperand(II, 1, PoisonValue::get(M0Val->getType()));
return nullptr;
}
More information about the llvm-commits
mailing list