[llvm] [AMDGPU] si-peephole-sdwa: Disable V_CNDMASK_B32 conversion with sext (PR #140760)

Frederik Harwath via llvm-commits llvm-commits at lists.llvm.org
Tue May 20 09:48:06 PDT 2025


https://github.com/frederik-h created https://github.com/llvm/llvm-project/pull/140760

The sext modifier on an operand of V_CNDMASK_B32_sdwa gets erroneously turned into a neg modifier in the assembly output.

As a workaround, to avoid miscompilation, this patch disables the conversion of V_CNDMASK_B32 to the SDWA form if any operand uses an sext modifier.

Fixes #138766.

>From a035402e9ff56cb3b13562d3e7775087e273b32c Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Tue, 20 May 2025 11:52:13 -0400
Subject: [PATCH] [AMDGPU] si-peephole-sdwa: Disable V_CNDMASK conversion with
 sext

The sext modifier on an operand of V_CNDMASK_B32_sdwa gets erroneously
turned into a neg modifier in the assembly output.

As a workaround, to avoid miscompilation, this patch disables the
conversion of V_CNDMASK_B32 to the SDWA form if any operand uses
an sext modifier.
---
 llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp     |  7 +++
 .../AMDGPU/sdwa-peephole-cndmask-sext.ll      | 47 +++++++++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll

diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index bd8baaaa3df20..70d448e75eb1a 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -430,6 +430,13 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
   case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
     // Does not support input modifiers: noabs, noneg, nosext.
     return false;
+  case AMDGPU::V_CNDMASK_B32_sdwa:
+    // FIXME SISrcMods uses the same bitmask for SEXT and NEG
+    // modifiers and hence each instruction can only support one type
+    // of modifier; SEXT gets turned into NEG for this instruction.
+    if (Sext)
+      return false;
+    break;
   }
 
   // Find operand in instruction that matches source operand and replace it with
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll
new file mode 100644
index 0000000000000..4a6c4ae5e6c02
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s
+; XFAIL: *
+
+; FIXME The sext modifier is turned into a neg modifier in the asm output
+
+define void @widget(ptr addrspace(7) %arg, <1 x i1> %arg1, <1 x i1> %arg2) #0 {
+; CHECK-LABEL: widget:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    s_mov_b32 s2, s0
+; CHECK-NEXT:    s_mov_b32 s3, s0
+; CHECK-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
+; CHECK-NEXT:    v_and_b32_e32 v1, 1, v5
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; CHECK-NEXT:  ; %bb.1: ; %cond.load
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    ds_read_b32 v1, v1
+; CHECK-NEXT:  ; %bb.2: ; %else
+; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
+; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; CHECK-NEXT:    s_cbranch_execz .LBB0_4
+; CHECK-NEXT:  ; %bb.3: ; %cond.store
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_cndmask_b32_sdwa v0, v2, sext(v0), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_or_b32_e32 v0, v0, v1
+; CHECK-NEXT:    ds_write_b32 v2, v0
+; CHECK-NEXT:  .LBB0_4: ; %else1
+; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %load = load <1 x i8>, ptr addrspace(7) null, align 1
+  %sext = sext <1 x i8> %load to <1 x i32>
+  %select = select <1 x i1> %arg1, <1 x i32> %sext, <1 x i32> zeroinitializer
+  %call = tail call <1 x i32> @llvm.masked.load.v1i32.p3(ptr addrspace(3) null, i32 1, <1 x i1> %arg1, <1 x i32> zeroinitializer)
+  %or = or <1 x i32> %select, %call
+  tail call void @llvm.masked.store.v1i32.p3(<1 x i32> %or, ptr addrspace(3) null, i32 1, <1 x i1> %arg1)
+  tail call void @llvm.amdgcn.s.waitcnt(i32 0)
+  ret void
+}



More information about the llvm-commits mailing list