[llvm] [AMDGPU][True16][MC] update VOPC profile with latest vop3 true16 (PR #113175)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 08:09:52 PDT 2024
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/113175
f16 for fake16 format
>From 6326dc2262505580f8a48375949cc26e7243d67b Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 21 Oct 2024 11:09:07 -0400
Subject: [PATCH] [AMDGPU][True16][MC] update VOPC profile with latest vop3
true16, use f16 for fake16 format
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 85 ++++++++++-----
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 62 ++++++++---
llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOPCInstructions.td | 100 +++++++++++-------
llvm/lib/Target/AMDGPU/VOPInstructions.td | 9 +-
.../GlobalISel/inst-select-amdgcn.class.mir | 30 +++---
.../inst-select-amdgcn.class.s16.mir | 15 +--
.../AMDGPU/GlobalISel/inst-select-brcond.mir | 2 +-
.../GlobalISel/inst-select-icmp.s16.mir | 48 ++++++---
.../AMDGPU/fix-sgpr-copies-f16-fake16.mir | 20 ++++
llvm/test/CodeGen/AMDGPU/shrink-true16.mir | 8 +-
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir | 24 ++---
12 files changed, 267 insertions(+), 138 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 800bdbe04cf70d..e1c780c33e9678 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1104,10 +1104,13 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
if (Size == 16 && !ST.has16BitInsts())
return -1;
- const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc,
+ const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc,
+ unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
- return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
+ return ST.hasTrue16BitInsts()
+ ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
+ : S16Opc;
if (Size == 32)
return S32Opc;
return S64Opc;
@@ -1118,83 +1121,109 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
- AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
+ AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
+ AMDGPU::V_CMP_NE_U64_e64);
case CmpInst::ICMP_EQ:
return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
- AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
+ AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
+ AMDGPU::V_CMP_EQ_U64_e64);
case CmpInst::ICMP_SGT:
return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
- AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
+ AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
+ AMDGPU::V_CMP_GT_I64_e64);
case CmpInst::ICMP_SGE:
return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
- AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
+ AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
+ AMDGPU::V_CMP_GE_I64_e64);
case CmpInst::ICMP_SLT:
return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
- AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
+ AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
+ AMDGPU::V_CMP_LT_I64_e64);
case CmpInst::ICMP_SLE:
return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
- AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
+ AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
+ AMDGPU::V_CMP_LE_I64_e64);
case CmpInst::ICMP_UGT:
return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
- AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
+ AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
+ AMDGPU::V_CMP_GT_U64_e64);
case CmpInst::ICMP_UGE:
return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
- AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
+ AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
+ AMDGPU::V_CMP_GE_U64_e64);
case CmpInst::ICMP_ULT:
return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
- AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
+ AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
+ AMDGPU::V_CMP_LT_U64_e64);
case CmpInst::ICMP_ULE:
return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
- AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
+ AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
+ AMDGPU::V_CMP_LE_U64_e64);
case CmpInst::FCMP_OEQ:
return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
- AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
+ AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
+ AMDGPU::V_CMP_EQ_F64_e64);
case CmpInst::FCMP_OGT:
return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
- AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
+ AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
+ AMDGPU::V_CMP_GT_F64_e64);
case CmpInst::FCMP_OGE:
return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
- AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
+ AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
+ AMDGPU::V_CMP_GE_F64_e64);
case CmpInst::FCMP_OLT:
return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
- AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
+ AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
+ AMDGPU::V_CMP_LT_F64_e64);
case CmpInst::FCMP_OLE:
return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
- AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
+ AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
+ AMDGPU::V_CMP_LE_F64_e64);
case CmpInst::FCMP_ONE:
return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
- AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
+ AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
+ AMDGPU::V_CMP_NEQ_F64_e64);
case CmpInst::FCMP_ORD:
return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
- AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
+ AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
+ AMDGPU::V_CMP_O_F64_e64);
case CmpInst::FCMP_UNO:
return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
- AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
+ AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
+ AMDGPU::V_CMP_U_F64_e64);
case CmpInst::FCMP_UEQ:
return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
- AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
+ AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
+ AMDGPU::V_CMP_NLG_F64_e64);
case CmpInst::FCMP_UGT:
return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
- AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
+ AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
+ AMDGPU::V_CMP_NLE_F64_e64);
case CmpInst::FCMP_UGE:
return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
- AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
+ AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
+ AMDGPU::V_CMP_NLT_F64_e64);
case CmpInst::FCMP_ULT:
return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
- AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
+ AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
+ AMDGPU::V_CMP_NGE_F64_e64);
case CmpInst::FCMP_ULE:
return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
- AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
+ AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
+ AMDGPU::V_CMP_NGT_F64_e64);
case CmpInst::FCMP_UNE:
return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
- AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
+ AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
+ AMDGPU::V_CMP_NEQ_F64_e64);
case CmpInst::FCMP_TRUE:
return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
- AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
+ AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
+ AMDGPU::V_CMP_TRU_F64_e64);
case CmpInst::FCMP_FALSE:
return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
- AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
+ AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
+ AMDGPU::V_CMP_F_F64_e64);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 89a2eb4f18946b..fce88860aeea42 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5501,20 +5501,48 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64;
case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64;
case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64;
- case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64;
- case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64;
- case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64;
- case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64;
- case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64;
- case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64;
- case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64;
- case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64;
- case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64;
- case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64;
- case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64;
- case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64;
- case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
- case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
+ case AMDGPU::S_CMP_LT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
+ : AMDGPU::V_CMP_LT_F16_fake16_e64;
+ case AMDGPU::S_CMP_EQ_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
+ : AMDGPU::V_CMP_EQ_F16_fake16_e64;
+ case AMDGPU::S_CMP_LE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
+ : AMDGPU::V_CMP_LE_F16_fake16_e64;
+ case AMDGPU::S_CMP_GT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
+ : AMDGPU::V_CMP_GT_F16_fake16_e64;
+ case AMDGPU::S_CMP_LG_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
+ : AMDGPU::V_CMP_LG_F16_fake16_e64;
+ case AMDGPU::S_CMP_GE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
+ : AMDGPU::V_CMP_GE_F16_fake16_e64;
+ case AMDGPU::S_CMP_O_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
+ : AMDGPU::V_CMP_O_F16_fake16_e64;
+ case AMDGPU::S_CMP_U_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
+ : AMDGPU::V_CMP_U_F16_fake16_e64;
+ case AMDGPU::S_CMP_NGE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
+ : AMDGPU::V_CMP_NGE_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLG_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
+ : AMDGPU::V_CMP_NLG_F16_fake16_e64;
+ case AMDGPU::S_CMP_NGT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
+ : AMDGPU::V_CMP_NGT_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLE_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
+ : AMDGPU::V_CMP_NLE_F16_fake16_e64;
+ case AMDGPU::S_CMP_NEQ_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
+ : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
+ case AMDGPU::S_CMP_NLT_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
+ : AMDGPU::V_CMP_NLT_F16_fake16_e64;
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
@@ -7343,14 +7371,16 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
auto NewInstr =
BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
.setMIFlags(Inst.getFlags());
- if (AMDGPU::getNamedOperandIdx(NewOpcode,
- AMDGPU::OpName::src0_modifiers) >= 0) {
+ if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0_modifiers)) {
NewInstr
.addImm(0) // src0_modifiers
.add(Inst.getOperand(0)) // src0
.addImm(0) // src1_modifiers
.add(Inst.getOperand(1)) // src1
.addImm(0); // clamp
+
+ if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::op_sel))
+ NewInstr.addImm(0); // op_sel0
} else {
NewInstr
.add(Inst.getOperand(0))
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index faa0b6d6c3f506..2f3b3370c3393e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3463,7 +3463,7 @@ def : GCNPat <
SRCMODS.NONE,
(V_MOV_B64_PSEUDO (i64 0x3fefffffffffffff))),
$x,
- (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))))
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, SRCMODS.NONE, (i32 3 /*NaN*/))))
>;
} // End SubtargetPredicates = isGFX6
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index d6e08dce130ced..08881792b59847 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -192,6 +192,8 @@ class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily, string asm_name = ps.Pseudo
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
+ let True16Predicate = ps.True16Predicate;
+ let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
@@ -314,7 +316,7 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
- def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
+ def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret, 0/*IsVOP3P*/, P.HasOpSel>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>,
VCMPXNoSDstTable<1, opName#"_e64">,
VCMPVCMPXTable<opName#"_e64"> {
@@ -373,7 +375,7 @@ multiclass VOPCX_Pseudos <string opName,
let IsVCMPX = 1;
}
- def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+ def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst, [], 0/*IsVOP3P*/, P_NoSDst.HasOpSel>,
Commutable_REV<revOp#"_nosdst_e64", !eq(revOp, opName)>,
VCMPXNoSDstTable<0, opName#"_e64">,
VCMPVCMPXTable<!subst("v_cmpx", "v_cmp", opName#"_e64")> {
@@ -801,24 +803,11 @@ defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">;
class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
VOPC_Profile<sched, src0VT, src1VT> {
- let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
- let AsmDPP16 = AsmDPP#"$fi";
- let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
- let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
- // DPP8 forbids modifiers and can inherit from VOPC_Profile
-
- let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VCSrc_b32:$src1);
- let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
- (ins)));
- let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
-
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
- let HasSrc1Mods = 0;
let HasClamp = 0;
let HasOMod = 0;
}
@@ -837,16 +826,26 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
}
- def _fake16 : VOPC_Class_Profile<sched, f16, i16> {
+ def _fake16 : VOPC_Class_Profile<sched, f16, f16> {
let IsTrue16 = 1;
+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC64 = VSrc_b32;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_32;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
+
}
}
@@ -889,17 +888,34 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
}
}
-class getVOPCClassPat64 <VOPProfile P> {
- list<dag> ret =
- [(set i1:$sdst,
+multiclass VOPCClassPat64<string inst_name> {
+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_e64");
+ defvar P = inst.Pfl;
+ def : GCNPat <
+ (i1:$sdst
(AMDGPUfp_class
(P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
- i32:$src1))];
+ P.Src1VT:$src1)),
+ (inst i32:$src0_modifiers, P.Src0VT:$src0,
+ 0 /*src1_modifiers*/, P.Src1VT:$src1)
+ >;
+}
+multiclass VOPCClassPat64_fake16<string inst_name> {
+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
+ defvar P = inst.Pfl;
+ def : GCNPat <
+ (i1:$sdst
+ (AMDGPUfp_class
+ (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
+ i32:$src1)),
+ (inst i32:$src0_modifiers, P.Src0VT:$src0,
+ 0 /*src1_modifiers*/, VGPR_32:$src1)
+ >;
}
-
-// Special case for class instructions which only have modifiers on
-// the 1st source operand.
+// cmp_class ignores the FP mode and faithfully reports the unmodified
+// source value.
+let ReadsModeReg = 0, mayRaiseFPException = 0 in {
multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
bit DefVcc = 1> {
def _e32 : VOPC_Pseudo <opName, p>,
@@ -910,7 +926,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
let isConvergent = DefExec;
}
- def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret>,
+ def _e64 : VOP3_Pseudo<opName, p, [], 0/*IsVOP3P*/, p.HasOpSel>,
VCMPXNoSDstTable<1, opName#"_e64"> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = p.Schedule;
@@ -957,7 +973,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
let SubtargetPredicate = HasNoSdstCMPX;
}
- def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+ def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst, [], 0/*IsVOP3P*/, P_NoSDst.HasOpSel>,
VCMPXNoSDstTable<0, opName#"_e64"> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
@@ -990,6 +1006,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
} // end SubtargetPredicate = isGFX11Plus
}
} // End SubtargetPredicate = HasSdstCMPX
+} // End ReadsModeReg = 0, mayRaiseFPException = 0
defm VOPC_I1_F16_I16 : VOPC_Class_Profile_t16<[Write32Bit]>;
def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>;
@@ -1002,12 +1019,14 @@ def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>;
multiclass VOPC_CLASS_F16 <string opName> {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F16_I16, 0>;
+ defm : VOPCClassPat64<NAME>;
}
- let OtherPredicates = [UseRealTrue16Insts] in {
+ let True16Predicate = UseRealTrue16Insts in {
defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
}
- let OtherPredicates = [UseFakeTrue16Insts] in {
+ let True16Predicate = UseFakeTrue16Insts in {
defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
+ defm : VOPCClassPat64_fake16<NAME>;
}
}
@@ -1023,21 +1042,22 @@ multiclass VOPCX_CLASS_F16 <string opName> {
}
}
-multiclass VOPC_CLASS_F32 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
+multiclass VOPC_CLASS_F32 <string opName> {
+ defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
+ defm : VOPCClassPat64<NAME>;
+}
multiclass VOPCX_CLASS_F32 <string opName> :
VOPCX_Class_Pseudos <opName, VOPC_I1_F32_I32, VOPC_F32_I32>;
-multiclass VOPC_CLASS_F64 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
+multiclass VOPC_CLASS_F64 <string opName> {
+ defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
+ defm : VOPCClassPat64<NAME>;
+}
multiclass VOPCX_CLASS_F64 <string opName> :
VOPCX_Class_Pseudos <opName, VOPC_I1_F64_I32, VOPC_F64_I32>;
-// cmp_class ignores the FP mode and faithfully reports the unmodified
-// source value.
-let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
@@ -1045,7 +1065,6 @@ defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">;
defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">;
defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
-} // End ReadsModeReg = 0, mayRaiseFPException = 0
//===----------------------------------------------------------------------===//
// V_ICMPIntrinsic Pattern.
@@ -1283,11 +1302,13 @@ class VOPC_DPP16<bits<8> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOPC_DPP_Base<op, opName, ps.Pfl> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
+ let True16Predicate = ps.True16Predicate;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
let Constraints = ps.Constraints;
}
@@ -1303,6 +1324,7 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
let Constraints = "";
}
@@ -1333,6 +1355,7 @@ class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP_Base<op, opName, ps.Pfl> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
+ let True16Predicate = ps.True16Predicate;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
@@ -1375,6 +1398,7 @@ class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
+ let True16Predicate = ps.True16Predicate;
}
class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index aab5dc7465d938..37d6c30be0695f 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1873,15 +1873,16 @@ include "VOPDInstructions.td"
class ClassPat<Instruction inst, ValueType vt> : GCNPat <
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
- (inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
+ (inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask))
>;
def : ClassPat<V_CMP_CLASS_F16_e64, f16> {
- let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts];
+ let OtherPredicates = [Has16BitInsts];
+ let True16Predicate = NotHasTrue16BitInsts;
}
-def : ClassPat<V_CMP_CLASS_F16_t16_e64, f16> {
- let OtherPredicates = [HasTrue16BitInsts];
+def : ClassPat<V_CMP_CLASS_F16_fake16_e64, f16> {
+ let True16Predicate = UseFakeTrue16Insts;
}
def : ClassPat<V_CMP_CLASS_F32_e64, f32>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir
index 46c801b5738e11..41d9647b4a23e4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir
@@ -16,14 +16,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
+ ;
; WAVE32-LABEL: name: class_s32_vcc_sv
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
@@ -45,14 +46,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
+ ;
; WAVE32-LABEL: name: class_s32_vcc_vs
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
@@ -74,14 +76,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
+ ;
; WAVE32-LABEL: name: class_s32_vcc_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -103,14 +106,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
+ ;
; WAVE32-LABEL: name: class_s64_vcc_sv
; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@@ -133,14 +137,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
+ ;
; WAVE32-LABEL: name: class_s64_vcc_vs
; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:sgpr(s32) = COPY $sgpr0
@@ -163,14 +168,15 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
+ ;
; WAVE32-LABEL: name: class_s64_vcc_vv
; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir
index 3ca3928fbfad30..c1f334c5ec043a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir
@@ -24,14 +24,15 @@ body: |
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
+ ;
; WAVE64-LABEL: name: class_s16_vcc_sv
; WAVE64: liveins: $sgpr0, $vgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
@@ -54,14 +55,15 @@ body: |
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
+ ;
; WAVE64-LABEL: name: class_s16_vcc_vs
; WAVE64: liveins: $sgpr0, $vgpr0
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
@@ -84,14 +86,15 @@ body: |
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
+ ;
; WAVE64-LABEL: name: class_s16_vcc_vv
; WAVE64: liveins: $vgpr0, $vgpr1
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
index ecb07f79e9fd19..771c4e9ec36474 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir
@@ -208,7 +208,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
; GCN-NEXT: $vcc = COPY [[V_CMP_CLASS_F32_e64_]]
; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; GCN-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index e994f42e0d60d6..d45bc31a127291 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -20,6 +20,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_sv
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
@@ -27,13 +28,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_sv
; GFX11: liveins: $sgpr0, $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
@@ -59,6 +61,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_vs
; WAVE32: liveins: $sgpr0, $vgpr0
; WAVE32-NEXT: {{ $}}
@@ -66,13 +69,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_vs
; GFX11: liveins: $sgpr0, $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
@@ -98,6 +102,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_eq_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -105,13 +110,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_eq_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -137,6 +143,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ne_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -144,13 +151,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ne_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -176,6 +184,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_slt_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -183,13 +192,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_slt_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -215,6 +225,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_sle_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -222,13 +233,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_sle_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -254,6 +266,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ult_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -261,13 +274,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ult_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -293,6 +307,7 @@ body: |
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
+ ;
; WAVE32-LABEL: name: icmp_ule_s16_vv
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: {{ $}}
@@ -300,13 +315,14 @@ body: |
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
+ ;
; GFX11-LABEL: name: icmp_ule_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+ ; GFX11-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
index 30a24c675a76b6..2894bb3d588df7 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
@@ -24,6 +24,26 @@ body: |
...
# Needs extra shift instruction to select hi 16 bits
+---
+name: cmp_f16
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: cmp_f16
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CMP_LT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_fake16_e64 0, [[V_CVT_F16_U16_fake16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_fake16_e64_]], implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %3:sreg_32 = COPY %2:vgpr_32
+ nofpexcept S_CMP_LT_F16 killed %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $mode
+ %4:sreg_32_xm0_xexec = COPY $scc
+ %5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
+...
+
---
name: cvt_hi_f32_f16
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
index be759049bc3a7d..245c5e1005d08f 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
@@ -11,8 +11,8 @@ body: |
; GFX1100-LABEL: name: 16bit_lo128_shrink
; GFX1100: liveins: $vgpr127
; GFX1100-NEXT: {{ $}}
- ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec
- $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec
+ ; GFX1100-NEXT: V_CMP_EQ_U16_fake16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec
+ $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec
...
---
@@ -24,6 +24,6 @@ body: |
; GFX1100-LABEL: name: 16bit_lo128_no_shrink
; GFX1100: liveins: $vgpr128
; GFX1100-NEXT: {{ $}}
- ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec
- $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec
+ ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec
+ $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 123893674ff5e9..7521722e33b956 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -18,15 +18,15 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ ; GCN-NEXT: V_CMPX_EQ_I16_fake16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_CLASS_F16_fake16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_fake16_e64_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_CMP_GE_F16_fake16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_fake16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
- ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+ ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_NGE_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, [[V_CMP_NGE_F16_fake16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
@@ -40,20 +40,20 @@ body: |
; unsafe to combine cmpx
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
+ V_CMPX_EQ_I16_fake16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
%6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
+ %7:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, %6, 0, %0, implicit-def $vcc, implicit $mode, implicit $exec
%8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
+ %9:sgpr_32 = V_CMP_GE_F16_fake16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
; unsafe to combine cmpx
%10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec
%11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec
+ %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, 0, %0, implicit $mode, implicit $exec
; shrink
%13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
@@ -61,7 +61,7 @@ body: |
; do not shrink True16 instructions
%15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
- %16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
+ %16:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink, sdst used
%17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
@@ -89,7 +89,7 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
- ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CMP_CLASS_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, [[V_MOV_B32_dpp]], 0, [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
%0:vgpr_32 = COPY $vgpr0
@@ -100,7 +100,7 @@ body: |
; Do not combine VOPC when row_mask or bank_mask is not 0xf
; All cases are covered by generic rules for creating DPP instructions
%4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
- %99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
+ %99:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, %4, 0, %0, implicit-def $vcc, implicit $mode, implicit $exec
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
%6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
More information about the llvm-commits
mailing list