[llvm] [AMDGPU][InstCombine] Fold unused m0 operand to poison for sendmsg intrinsics (PR #183755)

Tue Mar 3 01:27:07 PST 2026

https://github.com/addmisol updated https://github.com/llvm/llvm-project/pull/183755

>From 4b5cd452b85ff75d6bc6122309369f645d4d8619 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:02:11 +0530
Subject: [PATCH 01/18] Sendmsg fold m0 to poison

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     | 26 ++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 82de8cf169b48..bc9ce55b34b22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1194,7 +1194,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
       case KnownIEEEMode::Unknown:
         break;
       }
-    }
+}
 
     if (V) {
       if (auto *CI = dyn_cast<CallInst>(V)) {
@@ -1448,6 +1448,30 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     // amdgcn.kill(i1 1) is a no-op
     return IC.eraseInstFromFunction(II);
   }
+  case Intrinsic::amdgcn_s_sendmsg:
+  case Intrinsic::amdgcn_s_sendmsghalt: {
+    // The second operand is copied to m0, but is only actually used for
+    // GS_ALLOC_REQ. For other message types, fold it to poison.
+    using namespace AMDGPU::SendMsg;
+
+    Value *M0Val = II.getArgOperand(1);
+    if (isa<PoisonValue>(M0Val))
+      break;
+
+    auto *MsgImm = cast<ConstantInt>(II.getArgOperand(0));
+    uint64_t Msg = MsgImm->getZExtValue();
+    // Extract the message ID. Pre-GFX11 uses the lower 4 bits, GFX11+ uses
+    // the lower 8 bits. Since ID_GS_ALLOC_REQ is 9, we need to check the
+    // appropriate mask. For simplicity, extract the lower 8 bits which covers
+    // both cases.
+    uint64_t MsgId = Msg & ID_MASK_GFX11Plus_;
+
+    // Only GS_ALLOC_REQ uses the m0 value.
+    if (MsgId == ID_GS_ALLOC_REQ)
+      break;
+
+    return IC.replaceOperand(II, 1, PoisonValue::get(M0Val->getType()));
+  }
   case Intrinsic::amdgcn_update_dpp: {
     Value *Old = II.getArgOperand(0);
 

>From 323b1be63d7545198a462d09e944a24a2577639b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:04:52 +0530
Subject: [PATCH 02/18] adds tests sendmsg-m0-poison.ll

---
 .../InstCombine/AMDGPU/sendmsg-m0-poison.ll   | 118 ++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
new file mode 100644
index 0000000000000..8a7a2a7b5253e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S | FileCheck %s
+
+; Test that the m0 operand is folded to poison for message types that don't use it.
+
+; MSG_INTERRUPT (1) doesn't use m0
+define void @test_sendmsg_interrupt(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_interrupt(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 1, i32 %val)
+  ret void
+}
+
+; MSG_GS (2) + GS_OP_EMIT (2 << 4) doesn't use m0
+define void @test_sendmsg_gs_emit(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_gs_emit(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 34, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 34, i32 %val)
+  ret void
+}
+
+; MSG_GS_DONE (3) doesn't use m0
+define void @test_sendmsg_gs_done(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_gs_done(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %val)
+  ret void
+}
+
+; MSG_SYSMSG (15) doesn't use m0
+define void @test_sendmsg_sysmsg(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_sysmsg(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 15, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 15, i32 %val)
+  ret void
+}
+
+; MSG_GS_ALLOC_REQ (9) DOES use m0 - should NOT be folded
+define void @test_sendmsg_gs_alloc_req(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_gs_alloc_req(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 9, i32 [[VAL:%.*]])
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 9, i32 %val)
+  ret void
+}
+
+; Test sendmsghalt as well - MSG_INTERRUPT (1) doesn't use m0
+define void @test_sendmsghalt_interrupt(i32 %val) {
+; CHECK-LABEL: @test_sendmsghalt_interrupt(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %val)
+  ret void
+}
+
+; Test sendmsghalt - MSG_GS_ALLOC_REQ (9) DOES use m0 - should NOT be folded
+define void @test_sendmsghalt_gs_alloc_req(i32 %val) {
+; CHECK-LABEL: @test_sendmsghalt_gs_alloc_req(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsghalt(i32 9, i32 [[VAL:%.*]])
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsghalt(i32 9, i32 %val)
+  ret void
+}
+
+; m0 already poison - should be a no-op
+define void @test_sendmsg_already_poison() {
+; CHECK-LABEL: @test_sendmsg_already_poison(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+  ret void
+}
+
+; Test other message types that don't use m0
+; MSG_SAVEWAVE (4)
+define void @test_sendmsg_savewave(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_savewave(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 4, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 4, i32 %val)
+  ret void
+}
+
+; MSG_STALL_WAVE_GEN (5)
+define void @test_sendmsg_stall_wave_gen(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_stall_wave_gen(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 5, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 5, i32 %val)
+  ret void
+}
+
+; MSG_HALT_WAVES (6)
+define void @test_sendmsg_halt_waves(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_halt_waves(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 6, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 6, i32 %val)
+  ret void
+}
+
+declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
+declare void @llvm.amdgcn.s.sendmsghalt(i32 immarg, i32)

>From 85d6cda69400ff3f4db17bc184e31c100a82adab Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:06:04 +0530
Subject: [PATCH 03/18] Update AMDGPUInstCombineIntrinsic.cpp

---
 llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index bc9ce55b34b22..875ae0ec3665c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1194,7 +1194,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
       case KnownIEEEMode::Unknown:
         break;
       }
-}
+    }
 
     if (V) {
       if (auto *CI = dyn_cast<CallInst>(V)) {

>From fd526042f649f57a9aa99133af92dbe062eb7d2b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:46:48 +0530
Subject: [PATCH 04/18] Update AMDGPUInstCombineIntrinsic.cpp

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     | 44 +++++++++++++++----
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 875ae0ec3665c..e4d93d0ebee91 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1451,7 +1451,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
   case Intrinsic::amdgcn_s_sendmsg:
   case Intrinsic::amdgcn_s_sendmsghalt: {
     // The second operand is copied to m0, but is only actually used for
-    // GS_ALLOC_REQ. For other message types, fold it to poison.
+    // GS_ALLOC_REQ. For other message types that are known to not use m0,
+    // fold it to poison.
     using namespace AMDGPU::SendMsg;
 
     Value *M0Val = II.getArgOperand(1);
@@ -1461,16 +1462,43 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     auto *MsgImm = cast<ConstantInt>(II.getArgOperand(0));
     uint64_t Msg = MsgImm->getZExtValue();
     // Extract the message ID. Pre-GFX11 uses the lower 4 bits, GFX11+ uses
-    // the lower 8 bits. Since ID_GS_ALLOC_REQ is 9, we need to check the
-    // appropriate mask. For simplicity, extract the lower 8 bits which covers
-    // both cases.
-    uint64_t MsgId = Msg & ID_MASK_GFX11Plus_;
+    // the lower 8 bits. Use 4-bit mask for extracting base message ID since
+    // all message types we handle fit in 4 bits, and the upper bits encode
+    // stream ID or other parameters for some message types (e.g., MSG_GS).
+    uint64_t MsgId = Msg & ID_MASK_PreGFX11_;
+
+    // Explicitly list message types that are known to not use m0.
+    // This is safer than excluding only GS_ALLOC_REQ, in case new message
+    // types are added in the future that do use m0.
+    bool M0Unused;
+    switch (MsgId) {
+    case ID_INTERRUPT:
+    case ID_GS_PreGFX11:
+    case ID_GS_DONE_PreGFX11:
+    // ID_HS_TESSFACTOR_GFX11Plus and ID_DEALLOC_VGPRS_GFX11Plus have the
+    // same values as ID_GS_PreGFX11 and ID_GS_DONE_PreGFX11 respectively.
+    case ID_SAVEWAVE:
+    case ID_STALL_WAVE_GEN:
+    case ID_HALT_WAVES:
+    case ID_ORDERED_PS_DONE:
+    case ID_EARLY_PRIM_DEALLOC:
+    case ID_GET_DOORBELL:
+    case ID_GET_DDID:
+    case ID_SYSMSG:
+      M0Unused = true;
+      break;
+    default:
+      M0Unused = false;
+      break;
+    }
 
-    // Only GS_ALLOC_REQ uses the m0 value.
-    if (MsgId == ID_GS_ALLOC_REQ)
+    if (!M0Unused)
       break;
 
-    return IC.replaceOperand(II, 1, PoisonValue::get(M0Val->getType()));
+    // Drop noundef attribute since we're replacing with poison.
+    II.removeParamAttr(1, Attribute::NoUndef);
+    IC.replaceOperand(II, 1, PoisonValue::get(M0Val->getType()));
+    return nullptr;
   }
   case Intrinsic::amdgcn_update_dpp: {
     Value *Old = II.getArgOperand(0);

>From 42c6ffa63a6c79dc76bbb82c43303e04ebf0e297 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:48:29 +0530
Subject: [PATCH 05/18] Update sendmsg-m0-poison.ll

---
 .../InstCombine/AMDGPU/sendmsg-m0-poison.ll   | 120 +++++++++++++-----
 1 file changed, 91 insertions(+), 29 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
index 8a7a2a7b5253e..3753ee259769b 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX9
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX10
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX11
 
 ; Test that the m0 operand is folded to poison for message types that don't use it.
 
@@ -13,7 +15,7 @@ define void @test_sendmsg_interrupt(i32 %val) {
   ret void
 }
 
-; MSG_GS (2) + GS_OP_EMIT (2 << 4) doesn't use m0
+; MSG_GS (2) + GS_OP_EMIT (2 << 4) doesn't use m0 (pre-GFX11)
 define void @test_sendmsg_gs_emit(i32 %val) {
 ; CHECK-LABEL: @test_sendmsg_gs_emit(
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 34, i32 poison)
@@ -23,7 +25,8 @@ define void @test_sendmsg_gs_emit(i32 %val) {
   ret void
 }
 
-; MSG_GS_DONE (3) doesn't use m0
+; MSG_GS_DONE (3) doesn't use m0 (pre-GFX11)
+; On GFX11+ this is ID_DEALLOC_VGPRS which also doesn't use m0
 define void @test_sendmsg_gs_done(i32 %val) {
 ; CHECK-LABEL: @test_sendmsg_gs_done(
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
@@ -33,13 +36,53 @@ define void @test_sendmsg_gs_done(i32 %val) {
   ret void
 }
 
-; MSG_SYSMSG (15) doesn't use m0
-define void @test_sendmsg_sysmsg(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_sysmsg(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 15, i32 poison)
+; MSG_SAVEWAVE (4) doesn't use m0 (GFX8-GFX10)
+define void @test_sendmsg_savewave(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_savewave(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 4, i32 poison)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.amdgcn.s.sendmsg(i32 15, i32 %val)
+  call void @llvm.amdgcn.s.sendmsg(i32 4, i32 %val)
+  ret void
+}
+
+; MSG_STALL_WAVE_GEN (5) doesn't use m0 (GFX9-GFX11)
+define void @test_sendmsg_stall_wave_gen(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_stall_wave_gen(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 5, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 5, i32 %val)
+  ret void
+}
+
+; MSG_HALT_WAVES (6) doesn't use m0 (GFX9-GFX11)
+define void @test_sendmsg_halt_waves(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_halt_waves(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 6, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 6, i32 %val)
+  ret void
+}
+
+; MSG_ORDERED_PS_DONE (7) doesn't use m0 (GFX9-GFX10)
+define void @test_sendmsg_ordered_ps_done(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_ordered_ps_done(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 7, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 7, i32 %val)
+  ret void
+}
+
+; MSG_EARLY_PRIM_DEALLOC (8) doesn't use m0 (GFX9 only)
+define void @test_sendmsg_early_prim_dealloc(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_early_prim_dealloc(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 8, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 8, i32 %val)
   ret void
 }
 
@@ -53,6 +96,36 @@ define void @test_sendmsg_gs_alloc_req(i32 %val) {
   ret void
 }
 
+; MSG_GET_DOORBELL (10) doesn't use m0 (GFX9-GFX10)
+define void @test_sendmsg_get_doorbell(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_get_doorbell(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 10, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 10, i32 %val)
+  ret void
+}
+
+; MSG_GET_DDID (11) doesn't use m0 (GFX10 only)
+define void @test_sendmsg_get_ddid(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_get_ddid(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 11, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 11, i32 %val)
+  ret void
+}
+
+; MSG_SYSMSG (15) doesn't use m0
+define void @test_sendmsg_sysmsg(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_sysmsg(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 15, i32 poison)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 15, i32 %val)
+  ret void
+}
+
 ; Test sendmsghalt as well - MSG_INTERRUPT (1) doesn't use m0
 define void @test_sendmsghalt_interrupt(i32 %val) {
 ; CHECK-LABEL: @test_sendmsghalt_interrupt(
@@ -83,34 +156,23 @@ define void @test_sendmsg_already_poison() {
   ret void
 }
 
-; Test other message types that don't use m0
-; MSG_SAVEWAVE (4)
-define void @test_sendmsg_savewave(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_savewave(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 4, i32 poison)
-; CHECK-NEXT:    ret void
-;
-  call void @llvm.amdgcn.s.sendmsg(i32 4, i32 %val)
-  ret void
-}
-
-; MSG_STALL_WAVE_GEN (5)
-define void @test_sendmsg_stall_wave_gen(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_stall_wave_gen(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 5, i32 poison)
+; Test that noundef attribute is dropped when folding to poison
+define void @test_sendmsg_noundef(i32 noundef %val) {
+; CHECK-LABEL: @test_sendmsg_noundef(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.amdgcn.s.sendmsg(i32 5, i32 %val)
+  call void @llvm.amdgcn.s.sendmsg(i32 1, i32 noundef %val)
   ret void
 }
 
-; MSG_HALT_WAVES (6)
-define void @test_sendmsg_halt_waves(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_halt_waves(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 6, i32 poison)
+; Test unknown message ID - should NOT be folded (future-proofing)
+define void @test_sendmsg_unknown_id(i32 %val) {
+; CHECK-LABEL: @test_sendmsg_unknown_id(
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 14, i32 [[VAL:%.*]])
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.amdgcn.s.sendmsg(i32 6, i32 %val)
+  call void @llvm.amdgcn.s.sendmsg(i32 14, i32 %val)
   ret void
 }
 

>From 940e2aa5d696179a446bfd721b495a6ae05e4a86 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:11:19 +0530
Subject: [PATCH 06/18] Update AMDGPUInstCombineIntrinsic.cpp

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     | 29 ++-----------------
 1 file changed, 2 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index e4d93d0ebee91..42eb1db2736df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1451,7 +1451,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
   case Intrinsic::amdgcn_s_sendmsg:
   case Intrinsic::amdgcn_s_sendmsghalt: {
     // The second operand is copied to m0, but is only actually used for
-    // GS_ALLOC_REQ. For other message types that are known to not use m0,
+    // certain message types. For message types that are known to not use m0,
     // fold it to poison.
     using namespace AMDGPU::SendMsg;
 
@@ -1467,32 +1467,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     // stream ID or other parameters for some message types (e.g., MSG_GS).
     uint64_t MsgId = Msg & ID_MASK_PreGFX11_;
 
-    // Explicitly list message types that are known to not use m0.
-    // This is safer than excluding only GS_ALLOC_REQ, in case new message
-    // types are added in the future that do use m0.
-    bool M0Unused;
-    switch (MsgId) {
-    case ID_INTERRUPT:
-    case ID_GS_PreGFX11:
-    case ID_GS_DONE_PreGFX11:
-    // ID_HS_TESSFACTOR_GFX11Plus and ID_DEALLOC_VGPRS_GFX11Plus have the
-    // same values as ID_GS_PreGFX11 and ID_GS_DONE_PreGFX11 respectively.
-    case ID_SAVEWAVE:
-    case ID_STALL_WAVE_GEN:
-    case ID_HALT_WAVES:
-    case ID_ORDERED_PS_DONE:
-    case ID_EARLY_PRIM_DEALLOC:
-    case ID_GET_DOORBELL:
-    case ID_GET_DDID:
-    case ID_SYSMSG:
-      M0Unused = true;
-      break;
-    default:
-      M0Unused = false;
-      break;
-    }
-
-    if (!M0Unused)
+    if (!msgDoesNotUseM0(MsgId))
       break;
 
     // Drop noundef attribute since we're replacing with poison.

>From c0d976b07c0912a18fe2af7dd3b51fb64f30dd97 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:17:35 +0530
Subject: [PATCH 07/18] Update AMDGPUBaseInfo.cpp

---
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c1337f27a0f70..bb1b6c95c46dc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2484,6 +2484,31 @@ uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
   return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
 }
 
+bool msgDoesNotUseM0(int64_t MsgId) {
+  // Explicitly list message types that are known to not use m0.
+  // This is safer than excluding only GS_ALLOC_REQ, in case new message
+  // types are added in the future that do use m0.
+  switch (MsgId) {
+  case ID_INTERRUPT:
+  case ID_GS_PreGFX11:
+  case ID_GS_DONE_PreGFX11:
+  // ID_HS_TESSFACTOR_GFX11Plus and ID_DEALLOC_VGPRS_GFX11Plus have the
+  // same values as ID_GS_PreGFX11 and ID_GS_DONE_PreGFX11 respectively.
+  case ID_SAVEWAVE:
+  case ID_STALL_WAVE_GEN:
+  case ID_HALT_WAVES:
+  case ID_ORDERED_PS_DONE:
+  case ID_EARLY_PRIM_DEALLOC:
+  case ID_GET_DOORBELL:
+  case ID_GET_DDID:
+  case ID_SYSMSG:
+    return true;
+  default:
+    return false;
+  }
+}
+
+
 } // namespace SendMsg
 
 //===----------------------------------------------------------------------===//

>From 61c5438af8d808a4b02258baf7e62c2879a38773 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:20:56 +0530
Subject: [PATCH 08/18] Update AMDGPUBaseInfo.h

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index eee9e96934d49..94d82672b6a22 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1562,6 +1562,10 @@ void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
 LLVM_READNONE
 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
 
+/// \returns true if the message does not use the m0 operand.
+LLVM_READNONE
+bool msgDoesNotUseM0(int64_t MsgId);
+
 } // namespace SendMsg
 
 unsigned getInitialPSInputAddr(const Function &F);

>From d5df0fd1e92c7ea942fc944152fd622eb97570e8 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:22:39 +0530
Subject: [PATCH 09/18] Update sendmsg-m0-poison.ll

---
 .../test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
index 3753ee259769b..d489036555266 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX9
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX10
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX11
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s
 
 ; Test that the m0 operand is folded to poison for message types that don't use it.
 

>From 3dedbbf8a506fe7dd879a80969d25d91a1fcf4fe Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:23:57 +0530
Subject: [PATCH 10/18] Update AMDGPUBaseInfo.cpp

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index bb1b6c95c46dc..7badf937cd0d0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2508,7 +2508,6 @@ bool msgDoesNotUseM0(int64_t MsgId) {
   }
 }
 
-
 } // namespace SendMsg
 
 //===----------------------------------------------------------------------===//

>From 1b9c2cf0741f0fa591ee1eeca44e17b8d184b3ed Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 27 Feb 2026 23:26:02 +0530
Subject: [PATCH 11/18] Update AMDGPUBaseInfo.h

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 94d82672b6a22..b15b9e8e95332 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1562,7 +1562,7 @@ void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
 LLVM_READNONE
 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
 
-/// \returns true if the message does not use the m0 operand.
+/// Returns true if the message does not use the m0 operand.
 LLVM_READNONE
 bool msgDoesNotUseM0(int64_t MsgId);
 

>From 431a4b56fca3b275acf44bd32ba9756cefff0534 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:49:12 +0530
Subject: [PATCH 12/18] Update AMDGPUBaseInfo.cpp

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 7badf937cd0d0..4965a3d09c2c5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2484,16 +2484,19 @@ uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
   return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
 }
 
-bool msgDoesNotUseM0(int64_t MsgId) {
+bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
   // Explicitly list message types that are known to not use m0.
   // This is safer than excluding only GS_ALLOC_REQ, in case new message
   // types are added in the future that do use m0.
   switch (MsgId) {
-  case ID_INTERRUPT:
-  case ID_GS_PreGFX11:
-  case ID_GS_DONE_PreGFX11:
-  // ID_HS_TESSFACTOR_GFX11Plus and ID_DEALLOC_VGPRS_GFX11Plus have the
-  // same values as ID_GS_PreGFX11 and ID_GS_DONE_PreGFX11 respectively.
+  case ID_HS_TESSFACTOR_GFX11Plus:
+    // ID_GS_PreGFX11 has the same value as ID_HS_TESSFACTOR_GFX11Plus.
+    // GS uses m0, but HS_TESSFACTOR does not.
+    return isGFX11Plus(STI);
+  case ID_DEALLOC_VGPRS_GFX11Plus:
+    // ID_GS_DONE_PreGFX11 has the same value as ID_DEALLOC_VGPRS_GFX11Plus.
+    // GS_DONE uses m0, but DEALLOC_VGPRS does not.
+    return isGFX11Plus(STI);
   case ID_SAVEWAVE:
   case ID_STALL_WAVE_GEN:
   case ID_HALT_WAVES:

>From f58d28376bb69656c630bbccff378b906a689e4a Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:51:25 +0530
Subject: [PATCH 13/18] Update AMDGPUInstCombineIntrinsic.cpp

---
 llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 42eb1db2736df..a6ca213313058 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1467,7 +1467,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     // stream ID or other parameters for some message types (e.g., MSG_GS).
     uint64_t MsgId = Msg & ID_MASK_PreGFX11_;
 
-    if (!msgDoesNotUseM0(MsgId))
+    if (!msgDoesNotUseM0(MsgId, *ST))
       break;
 
     // Drop noundef attribute since we're replacing with poison.

>From 38e1f13d7a2e23ea4e3b6971ee380828ee1a3423 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 12:04:34 +0530
Subject: [PATCH 14/18] Update AMDGPUBaseInfo.h

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index b15b9e8e95332..e8ecbe89f0a8b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1563,8 +1563,7 @@ LLVM_READNONE
 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
 
 /// Returns true if the message does not use the m0 operand.
-LLVM_READNONE
-bool msgDoesNotUseM0(int64_t MsgId);
+bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI);
 
 } // namespace SendMsg
 

>From ff821ff92f854834fef67d2d1463229d2af62c85 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 12:07:59 +0530
Subject: [PATCH 15/18] Update sendmsg-m0-poison.ll

---
 .../InstCombine/AMDGPU/sendmsg-m0-poison.ll   | 90 +++++++++++++++----
 1 file changed, 72 insertions(+), 18 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
index d489036555266..e54d1ab89d99f 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -1,36 +1,53 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX9
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX10
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX11
 
 ; Test that the m0 operand is folded to poison for message types that don't use it.
 
-; MSG_INTERRUPT (1) doesn't use m0
+; MSG_INTERRUPT (1) DOES use m0 - should NOT be folded
 define void @test_sendmsg_interrupt(i32 %val) {
 ; CHECK-LABEL: @test_sendmsg_interrupt(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 1, i32 [[VAL:%.*]])
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.amdgcn.s.sendmsg(i32 1, i32 %val)
   ret void
 }
 
-; MSG_GS (2) + GS_OP_EMIT (2 << 4) doesn't use m0 (pre-GFX11)
+; MSG_GS (2) + GS_OP_EMIT (2 << 4) DOES use m0 (pre-GFX11) - should NOT be folded
+; On GFX11+ this is MSG_HS_TESSFACTOR which doesn't use m0
 define void @test_sendmsg_gs_emit(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_gs_emit(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 34, i32 poison)
-; CHECK-NEXT:    ret void
+; GFX9-LABEL: @test_sendmsg_gs_emit(
+; GFX9-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 34, i32 [[VAL:%.*]])
+; GFX9-NEXT:    ret void
+;
+; GFX10-LABEL: @test_sendmsg_gs_emit(
+; GFX10-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 34, i32 [[VAL:%.*]])
+; GFX10-NEXT:    ret void
+;
+; GFX11-LABEL: @test_sendmsg_gs_emit(
+; GFX11-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 34, i32 poison)
+; GFX11-NEXT:    ret void
 ;
   call void @llvm.amdgcn.s.sendmsg(i32 34, i32 %val)
   ret void
 }
 
-; MSG_GS_DONE (3) doesn't use m0 (pre-GFX11)
-; On GFX11+ this is ID_DEALLOC_VGPRS which also doesn't use m0
+; MSG_GS_DONE (3) DOES use m0 (pre-GFX11) - should NOT be folded
+; On GFX11+ this is ID_DEALLOC_VGPRS which doesn't use m0
 define void @test_sendmsg_gs_done(i32 %val) {
-; CHECK-LABEL: @test_sendmsg_gs_done(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
-; CHECK-NEXT:    ret void
+; GFX9-LABEL: @test_sendmsg_gs_done(
+; GFX9-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; GFX9-NEXT:    ret void
+;
+; GFX10-LABEL: @test_sendmsg_gs_done(
+; GFX10-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; GFX10-NEXT:    ret void
+;
+; GFX11-LABEL: @test_sendmsg_gs_done(
+; GFX11-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
+; GFX11-NEXT:    ret void
 ;
   call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %val)
   ret void
@@ -126,10 +143,10 @@ define void @test_sendmsg_sysmsg(i32 %val) {
   ret void
 }
 
-; Test sendmsghalt as well - MSG_INTERRUPT (1) doesn't use m0
+; Test sendmsghalt as well - MSG_INTERRUPT (1) DOES use m0 - should NOT be folded
 define void @test_sendmsghalt_interrupt(i32 %val) {
 ; CHECK-LABEL: @test_sendmsghalt_interrupt(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 poison)
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 [[VAL:%.*]])
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %val)
@@ -157,12 +174,13 @@ define void @test_sendmsg_already_poison() {
 }
 
 ; Test that noundef attribute is dropped when folding to poison
+; Using MSG_SAVEWAVE (4) which doesn't use m0
 define void @test_sendmsg_noundef(i32 noundef %val) {
 ; CHECK-LABEL: @test_sendmsg_noundef(
-; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 1, i32 poison)
+; CHECK-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 4, i32 poison)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.amdgcn.s.sendmsg(i32 1, i32 noundef %val)
+  call void @llvm.amdgcn.s.sendmsg(i32 4, i32 noundef %val)
   ret void
 }
 
@@ -176,5 +194,41 @@ define void @test_sendmsg_unknown_id(i32 %val) {
   ret void
 }
 
+; Test MSG_HS_TESSFACTOR (2) on GFX11+ - doesn't use m0
+define void @test_sendmsg_hs_tessfactor(i32 %val) {
+; GFX9-LABEL: @test_sendmsg_hs_tessfactor(
+; GFX9-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 2, i32 [[VAL:%.*]])
+; GFX9-NEXT:    ret void
+;
+; GFX10-LABEL: @test_sendmsg_hs_tessfactor(
+; GFX10-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 2, i32 [[VAL:%.*]])
+; GFX10-NEXT:    ret void
+;
+; GFX11-LABEL: @test_sendmsg_hs_tessfactor(
+; GFX11-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 2, i32 poison)
+; GFX11-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 2, i32 %val)
+  ret void
+}
+
+; Test MSG_DEALLOC_VGPRS (3) on GFX11+ - doesn't use m0
+define void @test_sendmsg_dealloc_vgprs(i32 %val) {
+; GFX9-LABEL: @test_sendmsg_dealloc_vgprs(
+; GFX9-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; GFX9-NEXT:    ret void
+;
+; GFX10-LABEL: @test_sendmsg_dealloc_vgprs(
+; GFX10-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; GFX10-NEXT:    ret void
+;
+; GFX11-LABEL: @test_sendmsg_dealloc_vgprs(
+; GFX11-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 poison)
+; GFX11-NEXT:    ret void
+;
+  call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %val)
+  ret void
+}
+
 declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
 declare void @llvm.amdgcn.s.sendmsghalt(i32 immarg, i32)

>From bbf21a4ea15e58e5ac032b8232efcb0caa7d025b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 14:44:56 +0530
Subject: [PATCH 16/18] Update sendmsg-m0-poison.ll


>From aeef3063adbfa12e174d439f805549985a450103 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 14:48:55 +0530
Subject: [PATCH 17/18] Update sendmsg-m0-poison.ll

---
 .../InstCombine/AMDGPU/sendmsg-m0-poison.ll   | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
index e54d1ab89d99f..c1f99d146232d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/sendmsg-m0-poison.ll
@@ -1,9 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,DEFAULT
 ; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX9
 ; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX10
 ; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GFX11
 
 ; Test that the m0 operand is folded to poison for message types that don't use it.
+; For the default target (no mcpu), conflicting encodings (IDs 2, 3) should NOT be folded.
 
 ; MSG_INTERRUPT (1) DOES use m0 - should NOT be folded
 define void @test_sendmsg_interrupt(i32 %val) {
@@ -17,7 +19,12 @@ define void @test_sendmsg_interrupt(i32 %val) {
 
 ; MSG_GS (2) + GS_OP_EMIT (2 << 4) DOES use m0 (pre-GFX11) - should NOT be folded
 ; On GFX11+ this is MSG_HS_TESSFACTOR which doesn't use m0
+; On default target, this should NOT be folded (conflicting encoding)
 define void @test_sendmsg_gs_emit(i32 %val) {
+; DEFAULT-LABEL: @test_sendmsg_gs_emit(
+; DEFAULT-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 34, i32 [[VAL:%.*]])
+; DEFAULT-NEXT:    ret void
+;
 ; GFX9-LABEL: @test_sendmsg_gs_emit(
 ; GFX9-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 34, i32 [[VAL:%.*]])
 ; GFX9-NEXT:    ret void
@@ -36,7 +43,12 @@ define void @test_sendmsg_gs_emit(i32 %val) {
 
 ; MSG_GS_DONE (3) DOES use m0 (pre-GFX11) - should NOT be folded
 ; On GFX11+ this is ID_DEALLOC_VGPRS which doesn't use m0
+; On default target, this should NOT be folded (conflicting encoding)
 define void @test_sendmsg_gs_done(i32 %val) {
+; DEFAULT-LABEL: @test_sendmsg_gs_done(
+; DEFAULT-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; DEFAULT-NEXT:    ret void
+;
 ; GFX9-LABEL: @test_sendmsg_gs_done(
 ; GFX9-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
 ; GFX9-NEXT:    ret void
@@ -195,7 +207,12 @@ define void @test_sendmsg_unknown_id(i32 %val) {
 }
 
 ; Test MSG_HS_TESSFACTOR (2) on GFX11+ - doesn't use m0
+; On default target, this should NOT be folded (conflicting encoding with MSG_GS)
 define void @test_sendmsg_hs_tessfactor(i32 %val) {
+; DEFAULT-LABEL: @test_sendmsg_hs_tessfactor(
+; DEFAULT-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 2, i32 [[VAL:%.*]])
+; DEFAULT-NEXT:    ret void
+;
 ; GFX9-LABEL: @test_sendmsg_hs_tessfactor(
 ; GFX9-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 2, i32 [[VAL:%.*]])
 ; GFX9-NEXT:    ret void
@@ -213,7 +230,12 @@ define void @test_sendmsg_hs_tessfactor(i32 %val) {
 }
 
 ; Test MSG_DEALLOC_VGPRS (3) on GFX11+ - doesn't use m0
+; On default target, this should NOT be folded (conflicting encoding with MSG_GS_DONE)
 define void @test_sendmsg_dealloc_vgprs(i32 %val) {
+; DEFAULT-LABEL: @test_sendmsg_dealloc_vgprs(
+; DEFAULT-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
+; DEFAULT-NEXT:    ret void
+;
 ; GFX9-LABEL: @test_sendmsg_dealloc_vgprs(
 ; GFX9-NEXT:    call void @llvm.amdgcn.s.sendmsg(i32 3, i32 [[VAL:%.*]])
 ; GFX9-NEXT:    ret void

>From f91c02a80fb52ac7d6dc78ac92ebbdc918574597 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Tue, 3 Mar 2026 14:56:49 +0530
Subject: [PATCH 18/18] Update AMDGPUInstCombineIntrinsic.cpp

---
 .../lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index a6ca213313058..3278d33c7ef12 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1460,18 +1460,14 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
       break;
 
     auto *MsgImm = cast<ConstantInt>(II.getArgOperand(0));
-    uint64_t Msg = MsgImm->getZExtValue();
-    // Extract the message ID. Pre-GFX11 uses the lower 4 bits, GFX11+ uses
-    // the lower 8 bits. Use 4-bit mask for extracting base message ID since
-    // all message types we handle fit in 4 bits, and the upper bits encode
-    // stream ID or other parameters for some message types (e.g., MSG_GS).
-    uint64_t MsgId = Msg & ID_MASK_PreGFX11_;
+    uint16_t MsgId, OpId, StreamId;
+    decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
 
     if (!msgDoesNotUseM0(MsgId, *ST))
       break;
 
-    // Drop noundef attribute since we're replacing with poison.
-    II.removeParamAttr(1, Attribute::NoUndef);
+    // Drop UB-implying attributes since we're replacing with poison.
+    II.dropUBImplyingAttrsAndMetadata();
     IC.replaceOperand(II, 1, PoisonValue::get(M0Val->getType()));
     return nullptr;
   }