[llvm] 4cc2785 - [AMDGPU][True16][MC] VOPC profile fake16 pseudo update (#113175)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 22 09:12:16 PST 2024
Author: Brox Chen
Date: 2024-11-22T12:12:13-05:00
New Revision: 4cc278587f3f44df08c6bebc0b4887f8522143f1
URL: https://github.com/llvm/llvm-project/commit/4cc278587f3f44df08c6bebc0b4887f8522143f1
DIFF: https://github.com/llvm/llvm-project/commit/4cc278587f3f44df08c6bebc0b4887f8522143f1.diff
LOG: [AMDGPU][True16][MC] VOPC profile fake16 pseudo update (#113175)
Update VOPC profile with VOP3 pseudo:
1. On GFX11+, v_cmp_class_f16 has src1 type f16 for literals, however
it's semantically interpreted as an integer. Update VOPC class f16
profile from operand type f16, i16 to f16, f16, currently updating it
for fake16 format, and will update t16 format in the following patch.
2. 16bit V_CMP_CLASS instructions (V_CMP_**_U/I/F16) are named with
`t16`, but actually using 32 bit registers. Correct it by updating the
pseudo definitions with useRealTrue16/useFakeTrue16 predicates and
rename these `t16` instructions to `fake16`.
3. Update the inst select so that `t16`/`fake16` instructions are
selected in true16/fake16 flow.
4. The mir test file are impacted for a name change of these impacted 16
bit V_CMP instructions, but non-functional change to emitted code
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/VOPCInstructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
llvm/test/CodeGen/AMDGPU/shrink-true16.mir
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3af6328af7acd6..4415d23f8b57f0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1114,10 +1114,13 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
if (Size == 16 && !ST.has16BitInsts())
return -1;
- const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc,
+ const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc,
+ unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
- return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
+ return ST.hasTrue16BitInsts()
+ ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
+ : S16Opc;
if (Size == 32)
return S32Opc;
return S64Opc;
@@ -1128,83 +1131,109 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
- AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
+ AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
+ AMDGPU::V_CMP_NE_U64_e64);
case CmpInst::ICMP_EQ:
return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
- AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
+ AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
+ AMDGPU::V_CMP_EQ_U64_e64);
case CmpInst::ICMP_SGT:
return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
- AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
+ AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
+ AMDGPU::V_CMP_GT_I64_e64);
case CmpInst::ICMP_SGE:
return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
- AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
+ AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
+ AMDGPU::V_CMP_GE_I64_e64);
case CmpInst::ICMP_SLT:
return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
- AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
+ AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
+ AMDGPU::V_CMP_LT_I64_e64);
case CmpInst::ICMP_SLE:
return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
- AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
+ AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
+ AMDGPU::V_CMP_LE_I64_e64);
case CmpInst::ICMP_UGT:
return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
- AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
+ AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
+ AMDGPU::V_CMP_GT_U64_e64);
case CmpInst::ICMP_UGE:
return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
- AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
+ AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
+ AMDGPU::V_CMP_GE_U64_e64);
case CmpInst::ICMP_ULT:
return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
- AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
+ AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
+ AMDGPU::V_CMP_LT_U64_e64);
case CmpInst::ICMP_ULE:
return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
- AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
+ AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
+ AMDGPU::V_CMP_LE_U64_e64);
case CmpInst::FCMP_OEQ:
return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
- AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
+ AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
+ AMDGPU::V_CMP_EQ_F64_e64);
case CmpInst::FCMP_OGT:
return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
- AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
+ AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
+ AMDGPU::V_CMP_GT_F64_e64);
case CmpInst::FCMP_OGE:
return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
- AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
+ AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
+ AMDGPU::V_CMP_GE_F64_e64);
case CmpInst::FCMP_OLT:
return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
- AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
+ AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
+ AMDGPU::V_CMP_LT_F64_e64);
case CmpInst::FCMP_OLE:
return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
- AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
+ AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
+ AMDGPU::V_CMP_LE_F64_e64);
case CmpInst::FCMP_ONE:
return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
- AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
+ AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
+ AMDGPU::V_CMP_NEQ_F64_e64);
case CmpInst::FCMP_ORD:
return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
- AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
+ AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
+ AMDGPU::V_CMP_O_F64_e64);
case CmpInst::FCMP_UNO:
return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
- AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
+ AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
+ AMDGPU::V_CMP_U_F64_e64);
case CmpInst::FCMP_UEQ:
return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
- AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
+ AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
+ AMDGPU::V_CMP_NLG_F64_e64);
case CmpInst::FCMP_UGT:
return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
- AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
+ AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
+ AMDGPU::V_CMP_NLE_F64_e64);
case CmpInst::FCMP_UGE:
return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
- AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
+ AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
+ AMDGPU::V_CMP_NLT_F64_e64);
case CmpInst::FCMP_ULT:
return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
- AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
+ AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
+ AMDGPU::V_CMP_NGE_F64_e64);
case CmpInst::FCMP_ULE:
return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
- AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
+ AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
+ AMDGPU::V_CMP_NGT_F64_e64);
case CmpInst::FCMP_UNE:
return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
- AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
+ AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
+ AMDGPU::V_CMP_NEQ_F64_e64);
case CmpInst::FCMP_TRUE:
return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
- AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
+ AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
+ AMDGPU::V_CMP_TRU_F64_e64);
case CmpInst::FCMP_FALSE:
return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
- AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
+ AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
+ AMDGPU::V_CMP_F_F64_e64);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c864f03f1f0f9e..b7c008235fb7ae 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5501,20 +5501,48 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64;
case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64;
case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64;
- case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64;
- case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64;
- case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64;
- case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64;
- case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64;
- case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64;
- case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64;
- case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64;
- case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64;
- case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64;
- case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64;
- case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64;
- case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
- case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
+ case AMDGPU::S_CMP_LT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
+ : AMDGPU::V_CMP_LT_F16_fake16_e64;
+ case AMDGPU::S_CMP_EQ_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
+ : AMDGPU::V_CMP_EQ_F16_fake16_e64;
+ case AMDGPU::S_CMP_LE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
+ : AMDGPU::V_CMP_LE_F16_fake16_e64;
+ case AMDGPU::S_CMP_GT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
+ : AMDGPU::V_CMP_GT_F16_fake16_e64;
+ case AMDGPU::S_CMP_LG_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
+ : AMDGPU::V_CMP_LG_F16_fake16_e64;
+ case AMDGPU::S_CMP_GE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
+ : AMDGPU::V_CMP_GE_F16_fake16_e64;
+ case AMDGPU::S_CMP_O_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
+ : AMDGPU::V_CMP_O_F16_fake16_e64;
+ case AMDGPU::S_CMP_U_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
+ : AMDGPU::V_CMP_U_F16_fake16_e64;
+ case AMDGPU::S_CMP_NGE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
+ : AMDGPU::V_CMP_NGE_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLG_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
+ : AMDGPU::V_CMP_NLG_F16_fake16_e64;
+ case AMDGPU::S_CMP_NGT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
+ : AMDGPU::V_CMP_NGT_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
+ : AMDGPU::V_CMP_NLE_F16_fake16_e64;
+ case AMDGPU::S_CMP_NEQ_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
+ : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
+ : AMDGPU::V_CMP_NLT_F16_fake16_e64;
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
@@ -7324,7 +7352,29 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
case AMDGPU::S_CMP_NGT_F32:
case AMDGPU::S_CMP_NLE_F32:
case AMDGPU::S_CMP_NEQ_F32:
- case AMDGPU::S_CMP_NLT_F32:
+ case AMDGPU::S_CMP_NLT_F32: {
+ Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
+ auto NewInstr =
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
+ .setMIFlags(Inst.getFlags());
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
+ 0) {
+ NewInstr
+ .addImm(0) // src0_modifiers
+ .add(Inst.getOperand(0)) // src0
+ .addImm(0) // src1_modifiers
+ .add(Inst.getOperand(1)) // src1
+ .addImm(0); // clamp
+ } else {
+ NewInstr.add(Inst.getOperand(0)).add(Inst.getOperand(1));
+ }
+ legalizeOperands(*NewInstr, MDT);
+ int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC, /*TRI=*/nullptr);
+ MachineOperand SCCOp = Inst.getOperand(SCCIdx);
+ addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
+ Inst.eraseFromParent();
+ return;
+ }
case AMDGPU::S_CMP_LT_F16:
case AMDGPU::S_CMP_EQ_F16:
case AMDGPU::S_CMP_LE_F16:
@@ -7343,14 +7393,15 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
auto NewInstr =
BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
.setMIFlags(Inst.getFlags());
- if (AMDGPU::getNamedOperandIdx(NewOpcode,
- AMDGPU::OpName::src0_modifiers) >= 0) {
+ if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0_modifiers)) {
NewInstr
.addImm(0) // src0_modifiers
.add(Inst.getOperand(0)) // src0
.addImm(0) // src1_modifiers
.add(Inst.getOperand(1)) // src1
.addImm(0); // clamp
+ if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::op_sel))
+ NewInstr.addImm(0); // op_sel0
} else {
NewInstr
.add(Inst.getOperand(0))
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index f4ccae1decb1df..0a4b51c4ac631d 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -192,6 +192,8 @@ class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily, string asm_name = ps.Pseudo
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
+ let True16Predicate = ps.True16Predicate;
+ let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
@@ -314,7 +316,7 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
- def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
+ def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret, /*IsVOP3P*/false, P.HasOpSel>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>,
VCMPXNoSDstTable<1, opName#"_e64">,
VCMPVCMPXTable<opName#"_e64"> {
@@ -373,7 +375,7 @@ multiclass VOPCX_Pseudos <string opName,
let IsVCMPX = 1;
}
- def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+ def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst, [], /*IsVOP3P*/false, P_NoSDst.HasOpSel>,
Commutable_REV<revOp#"_nosdst_e64", !eq(revOp, opName)>,
VCMPXNoSDstTable<0, opName#"_e64">,
VCMPVCMPXTable<!subst("v_cmpx", "v_cmp", opName#"_e64")> {
@@ -799,11 +801,22 @@ defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">;
// Class instructions
//===----------------------------------------------------------------------===//
-class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
+class VOPC_Class_Profile_Base<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
VOPC_Profile<sched, src0VT, src1VT> {
+ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+ Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
+ Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
+
+ let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
+ let HasClamp = 0;
+ let HasOMod = 0;
+}
+
+class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
+ VOPC_Class_Profile_Base<sched, src0VT, src1VT> {
let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let AsmDPP16 = AsmDPP#"$fi";
- let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
+ let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
// DPP8 forbids modifiers and can inherit from VOPC_Profile
@@ -812,15 +825,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
(ins)));
let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
-
- let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
- Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
- Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
-
- let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
let HasSrc1Mods = 0;
- let HasClamp = 0;
- let HasOMod = 0;
}
multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
@@ -837,16 +842,25 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
}
- def _fake16 : VOPC_Class_Profile<sched, f16, i16> {
+ def _fake16 : VOPC_Class_Profile_Base<sched, f16, f16> {
let IsTrue16 = 1;
+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC64 = VSrc_b32;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_32;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
}
@@ -889,17 +903,34 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
}
}
-class getVOPCClassPat64 <VOPProfile P> {
- list<dag> ret =
- [(set i1:$sdst,
+multiclass VOPCClassPat64<string inst_name> {
+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_e64");
+ defvar P = inst.Pfl;
+ def : GCNPat <
+ (i1:$sdst
(AMDGPUfp_class
(P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
- i32:$src1))];
+ P.Src1VT:$src1)),
+ (inst i32:$src0_modifiers, P.Src0VT:$src0, P.Src1VT:$src1)
+ >;
}
+multiclass VOPCClassPat64_fake16<string inst_name> {
+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
+ defvar P = inst.Pfl;
+ def : GCNPat <
+ (i1:$sdst
+ (AMDGPUfp_class
+ (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
+ i32:$src1)),
+ (inst i32:$src0_modifiers, P.Src0VT:$src0,
+ 0 /*src1_modifiers*/, VGPR_32:$src1)
+ >;
+}
-// Special case for class instructions which only have modifiers on
-// the 1st source operand.
+// cmp_class ignores the FP mode and faithfully reports the unmodified
+// source value.
+let ReadsModeReg = 0, mayRaiseFPException = 0 in {
multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
bit DefVcc = 1> {
def _e32 : VOPC_Pseudo <opName, p>,
@@ -910,7 +941,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
let isConvergent = DefExec;
}
- def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret>,
+ def _e64 : VOP3_Pseudo<opName, p, [], 0/*IsVOP3P*/, p.HasOpSel>,
VCMPXNoSDstTable<1, opName#"_e64"> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = p.Schedule;
@@ -957,7 +988,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
let SubtargetPredicate = HasNoSdstCMPX;
}
- def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+ def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst, [], 0/*IsVOP3P*/, P_NoSDst.HasOpSel>,
VCMPXNoSDstTable<0, opName#"_e64"> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
@@ -990,6 +1021,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
} // end SubtargetPredicate = isGFX11Plus
}
} // End SubtargetPredicate = HasSdstCMPX
+} // End ReadsModeReg = 0, mayRaiseFPException = 0
defm VOPC_I1_F16_I16 : VOPC_Class_Profile_t16<[Write32Bit]>;
def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>;
@@ -1002,12 +1034,14 @@ def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>;
multiclass VOPC_CLASS_F16 <string opName> {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F16_I16, 0>;
+ defm : VOPCClassPat64<NAME>;
}
- let OtherPredicates = [UseRealTrue16Insts] in {
+ let True16Predicate = UseRealTrue16Insts in {
defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
}
- let OtherPredicates = [UseFakeTrue16Insts] in {
+ let True16Predicate = UseFakeTrue16Insts in {
defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
+ defm : VOPCClassPat64_fake16<NAME>;
}
}
@@ -1023,21 +1057,22 @@ multiclass VOPCX_CLASS_F16 <string opName> {
}
}
-multiclass VOPC_CLASS_F32 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
+multiclass VOPC_CLASS_F32 <string opName> {
+ defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
+ defm : VOPCClassPat64<NAME>;
+}
multiclass VOPCX_CLASS_F32 <string opName> :
VOPCX_Class_Pseudos <opName, VOPC_I1_F32_I32, VOPC_F32_I32>;
-multiclass VOPC_CLASS_F64 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
+multiclass VOPC_CLASS_F64 <string opName> {
+ defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
+ defm : VOPCClassPat64<NAME>;
+}
multiclass VOPCX_CLASS_F64 <string opName> :
VOPCX_Class_Pseudos <opName, VOPC_I1_F64_I32, VOPC_F64_I32>;
-// cmp_class ignores the FP mode and faithfully reports the unmodified
-// source value.
-let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
@@ -1045,7 +1080,6 @@ defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">;
defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">;
defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
-} // End ReadsModeReg = 0, mayRaiseFPException = 0
//===----------------------------------------------------------------------===//
// V_ICMPIntrinsic Pattern.
@@ -1283,6 +1317,7 @@ class VOPC_DPP16<bits<8> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOPC_DPP_Base<op, opName, ps.Pfl> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
+ let True16Predicate = ps.True16Predicate;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
@@ -1303,6 +1338,7 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
let Constraints = "";
}
@@ -1333,6 +1369,7 @@ class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP_Base<op, opName, ps.Pfl> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
+ let True16Predicate = ps.True16Predicate;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
@@ -1375,6 +1412,7 @@ class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
}
class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index aa9758219db914..eb9d00972468c2 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1958,12 +1958,18 @@ class ClassPat<Instruction inst, ValueType vt> : GCNPat <
(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
>;
+class ClassPat_t16<Instruction inst, ValueType vt> : GCNPat <
+ (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
+ (inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask))
+>;
+
def : ClassPat<V_CMP_CLASS_F16_e64, f16> {
- let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts];
+ let OtherPredicates = [Has16BitInsts];
+ let True16Predicate = NotHasTrue16BitInsts;
}
-def : ClassPat<V_CMP_CLASS_F16_t16_e64, f16> {
- let OtherPredicates = [HasTrue16BitInsts];
+def : ClassPat_t16<V_CMP_CLASS_F16_fake16_e64, f16> {
+ let True16Predicate = UseFakeTrue16Insts;
}
def : ClassPat<V_CMP_CLASS_F32_e64, f32>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index b5f91b6b860831..55015c6d13d8af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -30,8 +30,8 @@ body: |
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
+ ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
@@ -68,8 +68,8 @@ body: |
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
+ ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index a67a0b6455fac9..4241f945a87d53 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -30,8 +30,8 @@ body: |
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
+ ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
@@ -68,8 +68,8 @@ body: |
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
+ ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index e994f42e0d60d6..d45bc31a127291 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -20,6 +20,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_sv
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
@@ -27,13 +28,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_sv
; GFX11: liveins: $sgpr0, $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
@@ -59,6 +61,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_vs
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
@@ -66,13 +69,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_vs
; GFX11: liveins: $sgpr0, $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
@@ -98,6 +102,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -105,13 +110,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -137,6 +143,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ne_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -144,13 +151,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ne_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -176,6 +184,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_slt_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -183,13 +192,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_slt_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -215,6 +225,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_sle_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -222,13 +233,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_sle_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -254,6 +266,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ult_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -261,13 +274,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ult_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -293,6 +307,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ule_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -300,13 +315,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ule_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
index 30a24c675a76b6..5d90bab1384eb8 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
@@ -12,8 +12,8 @@ body: |
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[V_CVT_F16_U16_fake16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec
+ ; GCN-NEXT: [[V_CMP_LT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_fake16_e64 0, [[V_CVT_F16_U16_fake16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_fake16_e64_]], implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
%2:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
index be759049bc3a7d..245c5e1005d08f 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
@@ -11,8 +11,8 @@ body: |
; GFX1100-LABEL: name: 16bit_lo128_shrink
; GFX1100: liveins: $vgpr127
; GFX1100-NEXT: {{ $}}
- ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec
- $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec
+ ; GFX1100-NEXT: V_CMP_EQ_U16_fake16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec
+ $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec
...
---
@@ -24,6 +24,6 @@ body: |
; GFX1100-LABEL: name: 16bit_lo128_no_shrink
; GFX1100: liveins: $vgpr128
; GFX1100-NEXT: {{ $}}
- ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec
- $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec
+ ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec
+ $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 123893674ff5e9..656c849bbd56b6 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -18,15 +18,15 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ ; GCN-NEXT: V_CMPX_EQ_I16_fake16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_CLASS_F16_fake16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_fake16_e64_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_CMP_GE_F16_fake16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_fake16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_NGE_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, [[V_CMP_NGE_F16_fake16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
@@ -40,13 +40,13 @@ body: |
; unsafe to combine cmpx
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
+ V_CMPX_EQ_I16_fake16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
%6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
+ %7:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, %6, 0, %0, implicit-def $vcc, implicit $mode, implicit $exec
%8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
+ %9:sgpr_32 = V_CMP_GE_F16_fake16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
; unsafe to combine cmpx
%10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
@@ -61,7 +61,7 @@ body: |
; do not shrink True16 instructions
%15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
+ %16:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink, sdst used
%17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
@@ -89,7 +89,7 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_CLASS_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, [[V_MOV_B32_dpp]], 0, [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
%0:vgpr_32 = COPY $vgpr0
@@ -100,7 +100,7 @@ body: |
; Do not combine VOPC when row_mask or bank_mask is not 0xf
; All cases are covered by generic rules for creating DPP instructions
%4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
- %99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
+ %99:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, %4, 0, %0, implicit-def $vcc, implicit $mode, implicit $exec
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
%6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
More information about the llvm-commits
mailing list