[llvm] d45031c - [AMDGPU] si-peephole-sdwa: Disable V_CNDMASK_B32 conversion with sext (#140760)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 26 00:33:12 PDT 2025
Author: Frederik Harwath
Date: 2025-05-26T09:33:09+02:00
New Revision: d45031ce5281b9fae54f2fdf5edff831e1308976
URL: https://github.com/llvm/llvm-project/commit/d45031ce5281b9fae54f2fdf5edff831e1308976
DIFF: https://github.com/llvm/llvm-project/commit/d45031ce5281b9fae54f2fdf5edff831e1308976.diff
LOG: [AMDGPU] si-peephole-sdwa: Disable V_CNDMASK_B32 conversion with sext (#140760)
The sext modifier on an operand of V_CNDMASK_B32_sdwa gets erroneously
turned into a neg modifier in the assembly output.
As a workaround, to avoid miscompilation, this patch disables the
conversion of V_CNDMASK_B32 to the SDWA form if any operand uses an sext
modifier.
Fixes #138766.
---------
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
Added:
llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll
Modified:
llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-wave64.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index bd8baaaa3df20..1e305c2efc8a0 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -430,6 +430,21 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
// Does not support input modifiers: noabs, noneg, nosext.
return false;
+ case AMDGPU::V_CNDMASK_B32_sdwa:
+ // SISrcMods uses the same bitmask for SEXT and NEG modifiers and
+ // hence the compiler can only support one type of modifier for
+ // each SDWA instruction. For V_CNDMASK_B32_sdwa, this is NEG
+ // since its operands get printed using
+ // AMDGPUInstPrinter::printOperandAndFPInputMods which produces
+ // the output intended for NEG if SEXT is set.
+ //
+ // The ISA does actually support both modifiers on most SDWA
+ // instructions.
+ //
+ // FIXME Accept SEXT here after fixing this issue.
+ if (Sext)
+ return false;
+ break;
}
// Find operand in instruction that matches source operand and replace it with
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll
new file mode 100644
index 0000000000000..2c7819a395c86
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s
+
+; FIXME The sext modifier is turned into a neg modifier in the asm output
+
+define i32 @test_select_on_sext_sdwa(i8 %x, i32 %y, i1 %cond) {
+; CHECK-LABEL: test_select_on_sext_sdwa:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v2, 1, v2
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; CHECK-NEXT: v_bfe_i32 v0, v0, 0, 8
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %sext = sext i8 %x to i32
+ %select = select i1 %cond, i32 %sext, i32 0
+ %or = or i32 %select, %y
+ ret i32 %or
+}
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-wave64.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-wave64.mir
index e243df4077ff4..52c06952aa9fd 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-wave64.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-wave64.mir
@@ -231,3 +231,41 @@ body: |
$vgpr0 = COPY %3
SI_RETURN implicit $vgpr0
...
+
+# SDWA conversion of V_CNDMASK_B32 with V_BFE_I32 operand had to be
+# disabled.
+# FIXME sext modifier gets erroneously printed as neg modifier.
+
+...
+---
+name: issue138766_cndmask_with_sext
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: issue138766_cndmask_with_sext
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec
+ ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY2]], 0, 8, implicit $exec
+ ; CHECK-NEXT: $vcc = COPY killed [[V_CMP_EQ_U32_e64_]]
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 0, killed [[V_BFE_I32_e64_]], implicit $vcc, implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[V_CNDMASK_B32_e32_]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %10:vgpr_32 = COPY $vgpr2
+ %9:vgpr_32 = COPY $vgpr1
+ %8:vgpr_32 = COPY $vgpr0
+ %11:vgpr_32 = V_AND_B32_e64 1, %10, implicit $exec
+ %12:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %11, 1, implicit $exec
+ %14:vgpr_32 = V_BFE_I32_e64 %8, 0, 8, implicit $exec
+ %16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, killed %14, killed %12, implicit $exec
+ %18:vgpr_32 = V_OR_B32_e64 killed %16, %9, implicit $exec
+ $vgpr0 = COPY %18
+ SI_RETURN implicit $vgpr0
+...
More information about the llvm-commits
mailing list