[llvm] cfe9a13 - [AMDGPU] Rename 64BitDPP feature and fix the checks
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 22 11:00:17 PDT 2023
Author: Stanislav Mekhanoshin
Date: 2023-08-22T11:00:10-07:00
New Revision: cfe9a134bb1dda837786bbcca1b8f75f5c797175
URL: https://github.com/llvm/llvm-project/commit/cfe9a134bb1dda837786bbcca1b8f75f5c797175
DIFF: https://github.com/llvm/llvm-project/commit/cfe9a134bb1dda837786bbcca1b8f75f5c797175.diff
LOG: [AMDGPU] Rename 64BitDPP feature and fix the checks
Names '64BitDPP' and especially 'DPP64' were found misleading, and
DPP64 can easily be mixed with DPP16 and DPP8 while these are
different concepts. DPP16 and DPP8 refers to lanes where DPP64
refers to the operand size.
In fact the essential part here is that these instructions are
executed on the DP ALU, so rename the feature accordingly.
I have also found a bug in a check for these instructions, which is
fixed here and a common utility function is now used.
Differential Revision: https://reviews.llvm.org/D158465
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/MC/AMDGPU/gfx90a_err.s
llvm/test/MC/AMDGPU/gfx940_asm_features.s
llvm/test/MC/AMDGPU/gfx940_err.s
llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index dd30fcac2ae585..856def93b6047f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -454,10 +454,10 @@ def FeatureDPP8 : SubtargetFeature<"dpp8",
"Support DPP8 (Data Parallel Primitives) extension"
>;
-def Feature64BitDPP : SubtargetFeature<"dpp-64bit",
- "Has64BitDPP",
+def FeatureDPALU_DPP : SubtargetFeature<"dpp-64bit",
+ "HasDPALU_DPP",
"true",
- "Support DPP (Data Parallel Primitives) extension"
+ "Support DPP (Data Parallel Primitives) extension in DP ALU"
>;
def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops",
@@ -1179,7 +1179,7 @@ def FeatureISAVersion9_0_A : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
[FeatureGFX90AInsts,
FeatureFmacF64Inst,
- Feature64BitDPP,
+ FeatureDPALU_DPP,
FeaturePackedFP32Ops,
FeatureAtomicFaddRtnInsts,
FeatureAtomicBufferGlobalPkAddF16Insts,
@@ -1213,7 +1213,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureDot10Insts,
FeatureAtomicDsPkAdd16Insts,
FeatureAtomicFlatPkAdd16Insts,
- Feature64BitDPP,
+ FeatureDPALU_DPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
FeatureFP8Insts,
@@ -1699,8 +1699,8 @@ def HasDPP : Predicate<"Subtarget->hasDPP()">,
def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>;
-def Has64BitDPP : Predicate<"Subtarget->has64BitDPP()">,
- AssemblerPredicate<(all_of Feature64BitDPP)>;
+def HasDPALU_DPP : Predicate<"Subtarget->hasDPALU_DPP()">,
+ AssemblerPredicate<(all_of FeatureDPALU_DPP)>;
def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">,
AssemblerPredicate<(all_of FeaturePackedFP32Ops)>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 8ee43a0158e1b5..15b6fc2ae9cdf2 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1990,7 +1990,7 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
// GFX90A allows DPP on 64-bit operands.
(isRegClass(AMDGPU::VReg_64RegClassID) &&
- AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
+ AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
}
bool AMDGPUOperand::isT16VRegWithInputMods() const {
@@ -4196,15 +4196,12 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
return true;
unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
- if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
- // DPP64 is supported for row_newbcast only.
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- if (Src0Idx >= 0 &&
- getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
- SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
- Error(S, "64 bit dpp only supports row_newbcast");
- return false;
- }
+ if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
+ AMDGPU::isDPALU_DPP(MII.get(Opc))) {
+ // DP ALU DPP is supported for row_newbcast only on GFX9*
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
+ Error(S, "DP ALU dpp only supports row_newbcast");
+ return false;
}
return true;
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 2592584b89c6ba..e1cd0f0a732b93 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -505,7 +505,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
assert(DppCtrl && DppCtrl->isImm());
- if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
+ if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl->getImm())) {
LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported"
" control value\n");
// Let it split, then control may become legal.
@@ -728,7 +728,7 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
++NumDPPMovsCombined;
} else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
- if (ST->has64BitDPP() && combineDPPMov(MI)) {
+ if (ST->hasDPALU_DPP() && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
} else {
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 55966ab5a6a160..425b40f4bd9a23 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -125,7 +125,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasSDWAOutModsVOPC = false;
bool HasDPP = false;
bool HasDPP8 = false;
- bool Has64BitDPP = false;
+ bool HasDPALU_DPP = false;
bool HasPackedFP32Ops = false;
bool HasImageInsts = false;
bool HasExtendedImageInsts = false;
@@ -908,8 +908,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return HasDPP8;
}
- bool has64BitDPP() const {
- return Has64BitDPP;
+ bool hasDPALU_DPP() const {
+ return HasDPALU_DPP;
}
bool hasPackedFP32Ops() const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 8a58bb2231b40a..bdfffc475c90ae 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -846,13 +846,9 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
unsigned Imm = MI->getOperand(OpNo).getImm();
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src0);
- if (Src0Idx >= 0 &&
- Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID &&
- !AMDGPU::isLegal64BitDPPControl(Imm)) {
- O << " /* 64 bit dpp only supports row_newbcast */";
+ if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
+ O << " /* DP ALU dpp only supports row_newbcast */";
return;
} else if (Imm <= DppCtrl::QUAD_PERM_LAST) {
O << "quad_perm:[";
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2084e722a8888b..9bebb077c4f8d7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2369,7 +2369,7 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
if (ST.hasMovB64() &&
- AMDGPU::isLegal64BitDPPControl(
+ AMDGPU::isLegalDPALU_DPPControl(
getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
return std::pair(&MI, nullptr);
@@ -4809,20 +4809,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
-
if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
- ((DstIdx >= 0 &&
- (Desc.operands()[DstIdx].RegClass == AMDGPU::VReg_64RegClassID ||
- Desc.operands()[DstIdx].RegClass ==
- AMDGPU::VReg_64_Align2RegClassID)) ||
- ((Src0Idx >= 0 &&
- (Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID ||
- Desc.operands()[Src0Idx].RegClass ==
- AMDGPU::VReg_64_Align2RegClassID)))) &&
- !AMDGPU::isLegal64BitDPPControl(DC)) {
+ !AMDGPU::isLegalDPALU_DPPControl(DC) && AMDGPU::isDPALU_DPP(Desc)) {
ErrInfo = "Invalid dpp_ctrl value: "
- "64 bit dpp only support row_newbcast";
+ "DP ALU dpp only support row_newbcast";
return false;
}
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 632af5c94aabac..63da86391e5c65 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2740,6 +2740,25 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
: getGfx9BufferFormatInfo(Format);
}
+bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
+ for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
+ OpName::src2 }) {
+ int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
+ if (Idx == -1)
+ continue;
+
+ if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
+ OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
+ return true;
+ }
+
+ return false;
+}
+
+bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
+ return hasAny64BitVGPROperands(OpDesc);
+}
+
} // namespace AMDGPU
raw_ostream &operator<<(raw_ostream &OS,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c123fa535d8bb2..2273ba935f5d63 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1331,10 +1331,16 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
LLVM_READNONE
-inline bool isLegal64BitDPPControl(unsigned DC) {
+inline bool isLegalDPALU_DPPControl(unsigned DC) {
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
}
+/// \returns true if an instruction may have a 64-bit VGPR operand.
+bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
+
+/// \returns true if an instruction is a DP ALU DPP.
+bool isDPALU_DPP(const MCInstrDesc &OpDesc);
+
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 3755daf4f9b18e..90ba6e298429f4 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -791,8 +791,8 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
string AsmOperands = asmOps;
let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", "");
- let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
- let AssemblerPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
+ let SubtargetPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP);
+ let AssemblerPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP);
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
@@ -862,8 +862,8 @@ class VOP_DPP_Base <string OpName, VOPProfile P,
let Size = 8;
let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", "");
- let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
- let AssemblerPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
+ let SubtargetPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP);
+ let AssemblerPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP);
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s
index ff293ce2d8c3d4..f494bd83913213 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_err.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_err.s
@@ -139,16 +139,16 @@ v_mov_b32_dpp v5, v1 row_share:1 row_mask:0x0 bank_mask:0x0
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
v_ceil_f64_dpp v[0:1], v[2:3] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast
v_ceil_f64_dpp v[0:1], v[2:3] row_shl:1 row_mask:0xf bank_mask:0xf
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast
v_ceil_f64_dpp v[0:1], v[2:3] wave_ror:1 row_mask:0xf bank_mask:0xf
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast
v_cvt_u32_f64 v5, v[0:1] quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast
v_ceil_f64_dpp v[0:1], v[2:3] row_share:1 row_mask:0xf bank_mask:0xf
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
index c88f31d0e66397..a8f7b06a1fe323 100644
--- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
@@ -651,8 +651,8 @@ v_cvt_pk_f32_bf8 v[2:3], v3
v_cvt_pk_f32_bf8 v[2:3], s3 src0_sel:WORD_1
// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff]
-v_cvt_pk_f32_bf8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
+// GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff]
+v_cvt_pk_f32_bf8 v[0:1], v3 row_newbcast:3
// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08]
@@ -687,8 +687,8 @@ v_cvt_pk_f32_fp8 v[2:3], s3 src0_sel:WORD_1
v_cvt_pk_f32_fp8 v[2:3], 3 src0_sel:WORD_1
// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff]
-v_cvt_pk_f32_fp8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
+// GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff]
+v_cvt_pk_f32_fp8 v[0:1], v3 row_newbcast:3
// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08]
diff --git a/llvm/test/MC/AMDGPU/gfx940_err.s b/llvm/test/MC/AMDGPU/gfx940_err.s
index 9a3ba6fd0f6b99..ad52d8bd643a5a 100644
--- a/llvm/test/MC/AMDGPU/gfx940_err.s
+++ b/llvm/test/MC/AMDGPU/gfx940_err.s
@@ -28,7 +28,7 @@ v_mad_legacy_f32 v0, v1, v2, v3
// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mov_b64 v[2:3], v[4:5] row_shl:1
-// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast
+// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast
v_mov_b64 v[2:3], -v[4:5]
// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
index c0516ff0dd2ef3..01eef8b646f4c6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt
@@ -450,8 +450,8 @@
# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00]
0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00
-# GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff]
-0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff
+# GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff]
+0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff
# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08]
0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08
@@ -477,8 +477,8 @@
# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00]
0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00
-# GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff]
-0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff
+# GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff]
+0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff
# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08]
0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08
More information about the llvm-commits
mailing list