[llvm] [AMDGPU] Use different values for SISrcMods::NEG and SISrcMods::SEXT (PR #147964)
Frederik Harwath via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 10 07:22:09 PDT 2025
https://github.com/frederik-h updated https://github.com/llvm/llvm-project/pull/147964
>From b6e6dda4b56fca50f5533676d9856ce612568d38 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Wed, 4 Jun 2025 11:54:46 -0400
Subject: [PATCH] [AMDGPU] Use different values for SISrcMods::Neg and ::Sext
The SISrcMods::Neg and SISrcMods::Sext enumerators share the same enum
value. At the time when they were introduced, it was assumed that the
"floating point" "neg" and "abs" and the "integer" "sext" source
modifiers are mutually exclusive. This can lead to miscompilation as a
"sext" modifier may right now be accepted erroneously on some
instructions which are encoded as floating point instructions (see the
test case modified by this PR). The encoding will then use the "neg"
modifier. Furthermore, the "neg"/"abs" and the "sext" modifiers are
not necessarily mutually exclusive, i.e. the hardware may support
both. This cannot be handled correctly with the current
representation.
This patch changes the SISrcMods enum to use different values for
them. This is meant as a first step to allow their coexistence on the
same instruction.
---
llvm/lib/Target/AMDGPU/SIDefines.h | 20 ++++++------
llvm/lib/Target/AMDGPU/VOPInstructions.td | 16 +++++-----
.../sdwa-peephole-instr-combine-sel-src.mir | 32 +++++++++----------
llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s | 3 ++
.../AMDGPU/gfx9_vop2_features.txt | 3 +-
5 files changed, 39 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index baf74dbdde20e..c6b0c5b2ce9cb 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -262,16 +262,16 @@ enum OperandType : unsigned {
// Input operand modifiers bit-masks
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
namespace SISrcMods {
- enum : unsigned {
- NONE = 0,
- NEG = 1 << 0, // Floating-point negate modifier
- ABS = 1 << 1, // Floating-point absolute modifier
- SEXT = 1 << 0, // Integer sign-extend modifier
- NEG_HI = ABS, // Floating-point negate high packed component modifier.
- OP_SEL_0 = 1 << 2,
- OP_SEL_1 = 1 << 3,
- DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1)
- };
+enum : unsigned {
+ NONE = 0,
+ NEG = 1 << 0, // Floating-point negate modifier
+ ABS = 1 << 1, // Floating-point absolute modifier
+ SEXT = 1 << 4, // Integer sign-extend modifier
+ NEG_HI = ABS, // Floating-point negate high packed component modifier.
+ OP_SEL_0 = 1 << 2,
+ OP_SEL_1 = 1 << 3,
+ DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1)
+};
}
namespace SIOutMods {
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 1e47acb5fde4f..f0adddbee9310 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -626,9 +626,9 @@ def SDWA {
class VOP_SDWAe<VOPProfile P> : Enc64 {
bits<8> src0;
bits<3> src0_sel;
- bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
+ bits<5> src0_modifiers; // float: {abs,neg}, int {sext}
bits<3> src1_sel;
- bits<2> src1_modifiers;
+ bits<5> src1_modifiers;
bits<3> dst_sel;
bits<2> dst_unused;
bits<1> clamp;
@@ -638,10 +638,10 @@ class VOP_SDWAe<VOPProfile P> : Enc64 {
let Inst{44-43} = !if(P.EmitDstSel, dst_unused{1-0}, ?);
let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, 0);
- let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
+ let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{4}, 0);
let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, 0);
- let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
+ let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{4}, 0);
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
}
@@ -662,18 +662,18 @@ class VOP_SDWAe<VOPProfile P> : Enc64 {
class VOP_SDWA9e<VOPProfile P> : Enc64 {
bits<9> src0; // {src0_sgpr{0}, src0{7-0}}
bits<3> src0_sel;
- bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
+ bits<5> src0_modifiers; // float: {abs,neg}, int {sext}
bits<3> src1_sel;
- bits<2> src1_modifiers;
+ bits<5> src1_modifiers;
bits<1> src1_sgpr;
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, 0);
- let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
+ let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{4}, 0);
let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
let Inst{55} = !if(P.HasSrc0, src0{8}, 0);
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, 0);
- let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
+ let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{4}, 0);
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
let Inst{63} = 0; // src1_sgpr - should be specified in subclass
}
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir
index 14ba8fccb172d..1c20db9577695 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir
@@ -484,7 +484,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 8, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -572,7 +572,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 8, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -638,7 +638,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 24, 8, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -704,7 +704,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 24, 8, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -792,7 +792,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -814,7 +814,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -836,7 +836,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -902,7 +902,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -924,7 +924,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -946,7 +946,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 6, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 6, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -968,7 +968,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -990,7 +990,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 4, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 4, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -1012,7 +1012,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -1034,7 +1034,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -1056,7 +1056,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 1, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
@@ -1078,7 +1078,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec
- ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
%1:vgpr_32 = COPY $vgpr0
%2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s
index 4b5efd00a7adf..85978b04779d0 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s
@@ -88,4 +88,7 @@ v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05]
v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
+
+v_pk_fmac_f16_sdwa v5, v1, -v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt
index 2b8d58853847b..55fdc2b15bf05 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt
@@ -87,6 +87,7 @@
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05
-# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+# FIXME: The instruction gets printed using the wrong function (AMDGPUInstPrinter::printOperandAndIntInputMods) and hence the "-" modifier is not printed.
+# COM: v_pk_fmac_f16_sdwa v5, v1, -v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16
More information about the llvm-commits
mailing list