[llvm] [AMDGPU] Combine VGPRSrc and VGPROp definitions into VGPROp (PR #157516)
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 8 11:04:50 PDT 2025
https://github.com/Sisyph updated https://github.com/llvm/llvm-project/pull/157516
>From 5ddefd893b8d523d9b5a5039675224bc54e6f171 Mon Sep 17 00:00:00 2001
From: Joseph Nash <joseph.nash at amd.com>
Date: Mon, 8 Sep 2025 10:55:33 -0400
Subject: [PATCH 1/2] [AMDGPU] Combine VGPRSrc and VGPROp definitions into
VGPROp
These can be represented by the same definition. It is just a
RegisterOperand wrapper for a VGPR register class with a DecoderMethod
override.
NFC.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 6 +--
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 48 ++++++++-------------
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 24 +++++------
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOPCInstructions.td | 18 ++++----
llvm/lib/Target/AMDGPU/VOPDInstructions.td | 2 +-
6 files changed, 45 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 562023cde44a4..7ca83eb4df035 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1898,7 +1898,7 @@ class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
!eq(VT.Size, 64) : RegisterOperand<VReg_64>,
!eq(VT.Size, 48) : RegisterOperand<VReg_64>,
!eq(VT.Size, 16) : !if(IsTrue16,
- !if(IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128),
+ !if(IsFake16, VGPROp_32_Lo128, VGPROp_16_Lo128),
RegisterOperand<VGPR_32>),
1 : RegisterOperand<VGPR_32>);
}
@@ -2681,7 +2681,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
- field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
+ field RegisterOperand Src0VOP3DPP = VGPROp_32;
field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
@@ -2897,7 +2897,7 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
- let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPRSrc_16, VGPRSrc_32);
+ let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPROp_16, VGPROp_32);
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0 /*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0 /*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 162ef647be7e9..5f5eec49bab06 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1307,51 +1307,41 @@ def VRegSrc_fake16: SrcReg9<VGPR_32> {
let EncoderMethod = "getMachineOpValueT16";
}
//===----------------------------------------------------------------------===//
-// VGPRSrc_*
+// VGPROp_* An 8-bit RegisterOperand wrapper for a VGPR
//===----------------------------------------------------------------------===//
-// An 8-bit RegisterOperand wrapper for a VGPR
-def VGPRSrc_32 : RegisterOperand<VGPR_32> {
- let DecoderMethod = "DecodeVGPR_32RegisterClass";
+class VGPROp<RegisterClass regClass> : RegisterOperand<regClass> {
+ let DecoderMethod = "Decode" # regClass # "RegisterClass";
}
-def VGPRSrc_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
- let DecoderMethod = "DecodeVGPR_32RegisterClass";
+class VGPROp_Align2<RegisterClass regClass> : RegisterOperand<!cast<RegisterClass>(regClass#_Align2)> {
+ let DecoderMethod = "Decode" # regClass # "RegisterClass";
}
-def VGPRSrc_64 : RegisterOperand<VReg_64> {
- let DecoderMethod = "DecodeVReg_64RegisterClass";
+multiclass VGPROp_Aligned<RegisterClass regClass> {
+ def _Align1 : VGPROp<regClass>;
+ def _Align2 : VGPROp_Align2<regClass>;
}
-def VGPRSrc_96 : RegisterOperand<VReg_96> {
- let DecoderMethod = "DecodeVReg_96RegisterClass";
+// TODO: These cases should use default target alignment
+def VGPROp_16 : VGPROp<VGPR_16> {
+ let EncoderMethod = "getMachineOpValueT16";
}
+def VGPROp_32 : VGPROp<VGPR_32>;
-def VGPRSrc_128 : RegisterOperand<VReg_128> {
- let DecoderMethod = "DecodeVReg_128RegisterClass";
+foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "512", "1024"] in {
+ def VGPROp_#size : VGPROp<!cast<RegisterClass>("VReg_"#size)>;
}
-def VGPRSrc_192 : RegisterOperand<VReg_192> {
- let DecoderMethod = "DecodeVReg_192RegisterClass";
+foreach size = ["64", "96", "128", "160", "256", "1024"] in {
+ defm VGPROp_#size : VGPROp_Aligned<!cast<RegisterClass>("VReg_"#size)>;
}
-def VGPRSrc_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
+def VGPROp_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
let EncoderMethod = "getMachineOpValueT16Lo128";
}
-// True 16 operands.
-def VGPRSrc_16 : RegisterOperand<VGPR_16> {
- let DecoderMethod = "DecodeVGPR_16RegisterClass";
- let EncoderMethod = "getMachineOpValueT16";
-}
-
-// TODO: These cases should use default target alignment
-def VGPROp_16 : RegisterOperand<VGPR_16>;
-def VGPROp_32 : RegisterOperand<VGPR_32>;
-
-foreach size = ["64", "96", "128", "160", "256", "1024" ] in {
- def VGPROp_#size : RegisterOperand<!cast<RegisterClass>("VReg_"#size)>;
- def VGPROp_#size#_Align1 : RegisterOperand<!cast<RegisterClass>("VReg_"#size)>;
- def VGPROp_#size#_Align2 : RegisterOperand<!cast<RegisterClass>("VReg_"#size#_Align2)>;
+def VGPROp_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
+ let DecoderMethod = "DecodeVGPR_32RegisterClass";
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 4f6b7c5923397..cff66aaedb11e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -416,12 +416,12 @@ def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
- let Ins32 = (ins VSrcT_f16_Lo128:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm);
+ let Ins32 = (ins VSrcT_f16_Lo128:$src0, VGPROp_16_Lo128:$src1, ImmOpType:$imm);
}
def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
- let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm);
+ let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPROp_32_Lo128:$src1, ImmOpType:$imm);
}
def VOP_MADAK_F32 : VOP_MADAK <f32>;
def VOP_MADAK_F64 : VOP_MADAK <f64>;
@@ -452,12 +452,12 @@ def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
- let Ins32 = (ins VSrcT_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1);
+ let Ins32 = (ins VSrcT_f16_Lo128:$src0, ImmOpType:$imm, VGPROp_16_Lo128:$src1);
}
def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
- let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1);
+ let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPROp_32_Lo128:$src1);
}
def VOP_MADMK_F32 : VOP_MADMK <f32>;
def VOP_MADMK_F64 : VOP_MADMK <f64>;
@@ -496,14 +496,14 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
HasClamp, HasModifiers, HasModifiers, HasOMod,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret;
// We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
- let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
- let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
+ let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPROp_32:$src2X);
+ let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPROp_32:$src2Y);
let InsVOPD3X = (ins Src0ModVOPD3:$src0X_modifiers, Src0VOPD3:$src0X,
Src1ModVOPD3:$vsrc1X_modifiers, Src1RC32:$vsrc1X,
- VGPRSrc_32:$src2X);
+ VGPROp_32:$src2X);
let InsVOPD3Y = (ins Src0ModVOPD3:$src0Y_modifiers, Src0VOPD3:$src0Y,
Src1ModVOPD3:$vsrc1Y_modifiers, Src1RC32:$vsrc1Y,
- VGPRSrc_32:$src2Y);
+ VGPROp_32:$src2Y);
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
@@ -565,7 +565,7 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue*/, 1/*IsVOP3Encoding*/>.ret;
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_16;
+ let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
@@ -597,7 +597,7 @@ def VOP_MAC_F16_fake16 : VOP_MAC <f16> {
getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
dpp8:$dpp8, Dpp8FI:$fi);
let DstRC64 = getVALUDstForVT<DstVT>.ret;
- let Src0VOP3DPP = VGPRSrc_32;
+ let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
@@ -796,7 +796,7 @@ def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
Src2RC64, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod,
Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret;
- let Src0VOP3DPP = VGPRSrc_16;
+ let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 0/*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3VC<f16, 0/*IsFake16*/>.ret;
@@ -808,7 +808,7 @@ def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_32;
+ let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 1/*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3VC<f16, 1/*IsFake16*/>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index cf0e502b2dab4..71946b88aff82 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -44,7 +44,7 @@ class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
FP16InputMods:$src1_modifiers, Src1RC:$src1,
FP16InputMods:$src2_modifiers, Src2RC:$src2);
dag dpp_srcs =
- (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0,
+ (ins FPVRegInputMods:$src0_modifiers, VGPROp_32:$src0,
FPVRegInputMods:$src1_modifiers, VRegSrc_32:$src1,
FP16InputMods:$src2_modifiers, Src2RC:$src2);
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 450c88b6bf60c..2730ec52294e9 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -100,7 +100,7 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_16;
+ let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
@@ -126,7 +126,7 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_32;
+ let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
@@ -173,7 +173,7 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_16;
+ let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
@@ -197,7 +197,7 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_32;
+ let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
@@ -892,7 +892,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
// DPP8 forbids modifiers and can inherit from VOPC_Profile
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VCSrc_b32:$src1);
+ dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPROp_32:$src0, VCSrc_b32:$src1);
let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
(ins)));
let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
@@ -915,7 +915,7 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_16;
+ let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
@@ -941,7 +941,7 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_32;
+ let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
@@ -985,7 +985,7 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_16;
+ let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
@@ -1009,7 +1009,7 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_32;
+ let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
index 4d97b9c81ea86..f416c06540480 100644
--- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -224,7 +224,7 @@ foreach Gen = [GFX11GenD, GFX12GenD, GFX1250GenD] in {
defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"));
defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2) # Gen.Suffix;
- defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY);
+ defvar outs = (outs VGPROp_32:$vdstX, VOPDDstYOperand:$vdstY);
if !or(isOpXMADK, isOpYMADK) then {
// If Both X and Y are MADK, the mandatory literal of X additionally must
// use an alternate operand format which defers to the 'real' Y literal.
>From 744c2843448e644ece19d49b227563bef0a7f6ea Mon Sep 17 00:00:00 2001
From: Joseph Nash <joseph.nash at amd.com>
Date: Mon, 8 Sep 2025 14:01:51 -0400
Subject: [PATCH 2/2] Add getVGPRSrcForVT
---
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 7ca83eb4df035..2ea59585ee1ca 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1950,6 +1950,20 @@ class getVOP3VRegSrcForVT<ValueType VT> {
1 : VRegSrc_32);
}
+// VGPR only VOP3 src with 8 bit encoding e.g. VOP3DPP src0.
+class getVGPRSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
+ RegisterOperand ret =
+ !cond(!eq(VT.Size, 128) : VGPROp_128,
+ !eq(VT.Size, 96) : VGPROp_96,
+ !eq(VT.Size, 64) : VGPROp_64,
+ !eq(VT.Size, 48) : VGPROp_64,
+ !eq(VT.Size, 16) : !if(IsTrue16,
+ !if(IsFake16, VGPROp_32,
+ VGPROp_16),
+ VGPROp_32),
+ 1 : VGPROp_32);
+}
+
// Src2 of VOP3 DPP instructions cannot be a literal
class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
RegisterOperand ret =
@@ -2681,7 +2695,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
- field RegisterOperand Src0VOP3DPP = VGPROp_32;
+ field RegisterOperand Src0VOP3DPP = getVGPRSrcForVT<Src0VT>.ret;
field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
More information about the llvm-commits
mailing list